initial_commit

author: root <root@artemis.panaceas.org> 2015-12-25 04:40:36 +0000
committer: root <root@artemis.panaceas.org> 2015-12-25 04:40:36 +0000
commit: 849369d6c66d3054688672f97d31fceb8e8230fb (patch)
tree: 6135abc790ca67dedbe07c39806591e70eda81ce /drivers/block
download: linux-3.0.35-kobo-849369d6c66d3054688672f97d31fceb8e8230fb.tar.gz
linux-3.0.35-kobo-849369d6c66d3054688672f97d31fceb8e8230fb.tar.bz2
linux-3.0.35-kobo-849369d6c66d3054688672f97d31fceb8e8230fb.zip
99 files changed, 95197 insertions, 0 deletions
diff --git a/drivers/block/DAC960.c b/drivers/block/DAC960.c
new file mode 100644
index 00000000..e086fbbb
--- /dev/null
+++ b/drivers/block/DAC960.c
@@ -0,0 +1,7243 @@
+/*
+
+  Linux Driver for Mylex DAC960/AcceleRAID/eXtremeRAID PCI RAID Controllers
+
+  Copyright 1998-2001 by Leonard N. Zubkoff <lnz@dandelion.com>
+  Portions Copyright 2002 by Mylex (An IBM Business Unit)
+
+  This program is free software; you may redistribute and/or modify it under
+  the terms of the GNU General Public License Version 2 as published by the
+  Free Software Foundation.
+
+  This program is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY, without even the implied warranty of MERCHANTABILITY
+  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+  for complete details.
+
+*/
+
+
+#define DAC960_DriverVersion			"2.5.49"
+#define DAC960_DriverDate			"21 Aug 2007"
+
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/miscdevice.h>
+#include <linux/blkdev.h>
+#include <linux/bio.h>
+#include <linux/completion.h>
+#include <linux/delay.h>
+#include <linux/genhd.h>
+#include <linux/hdreg.h>
+#include <linux/blkpg.h>
+#include <linux/dma-mapping.h>
+#include <linux/interrupt.h>
+#include <linux/ioport.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/mutex.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/reboot.h>
+#include <linux/spinlock.h>
+#include <linux/timer.h>
+#include <linux/pci.h>
+#include <linux/init.h>
+#include <linux/jiffies.h>
+#include <linux/random.h>
+#include <linux/scatterlist.h>
+#include <asm/io.h>
+#include <asm/uaccess.h>
+#include "DAC960.h"
+
+#define DAC960_GAM_MINOR	252
+
+
+static DEFINE_MUTEX(DAC960_mutex);
+static DAC960_Controller_T *DAC960_Controllers[DAC960_MaxControllers];
+static int DAC960_ControllerCount;
+static struct proc_dir_entry *DAC960_ProcDirectoryEntry;
+
+static long disk_size(DAC960_Controller_T *p, int drive_nr)
+{
+	if (p->FirmwareType == DAC960_V1_Controller) {
+		if (drive_nr >= p->LogicalDriveCount)
+			return 0;
+		return p->V1.LogicalDriveInformation[drive_nr].
+			LogicalDriveSize;
+	} else {
+		DAC960_V2_LogicalDeviceInfo_T *i =
+			p->V2.LogicalDeviceInformation[drive_nr];
+		if (i == NULL)
+			return 0;
+		return i->ConfigurableDeviceSize;
+	}
+}
+
+static int DAC960_open(struct block_device *bdev, fmode_t mode)
+{
+	struct gendisk *disk = bdev->bd_disk;
+	DAC960_Controller_T *p = disk->queue->queuedata;
+	int drive_nr = (long)disk->private_data;
+	int ret = -ENXIO;
+
+	mutex_lock(&DAC960_mutex);
+	if (p->FirmwareType == DAC960_V1_Controller) {
+		if (p->V1.LogicalDriveInformation[drive_nr].
+		    LogicalDriveState == DAC960_V1_LogicalDrive_Offline)
+			goto out;
+	} else {
+		DAC960_V2_LogicalDeviceInfo_T *i =
+			p->V2.LogicalDeviceInformation[drive_nr];
+		if (!i || i->LogicalDeviceState == DAC960_V2_LogicalDevice_Offline)
+			goto out;
+	}
+
+	check_disk_change(bdev);
+
+	if (!get_capacity(p->disks[drive_nr]))
+		goto out;
+	ret = 0;
+out:
+	mutex_unlock(&DAC960_mutex);
+	return ret;
+}
+
+static int DAC960_getgeo(struct block_device *bdev, struct hd_geometry *geo)
+{
+	struct gendisk *disk = bdev->bd_disk;
+	DAC960_Controller_T *p = disk->queue->queuedata;
+	int drive_nr = (long)disk->private_data;
+
+	if (p->FirmwareType == DAC960_V1_Controller) {
+		geo->heads = p->V1.GeometryTranslationHeads;
+		geo->sectors = p->V1.GeometryTranslationSectors;
+		geo->cylinders = p->V1.LogicalDriveInformation[drive_nr].
+			LogicalDriveSize / (geo->heads * geo->sectors);
+	} else {
+		DAC960_V2_LogicalDeviceInfo_T *i =
+			p->V2.LogicalDeviceInformation[drive_nr];
+		switch (i->DriveGeometry) {
+		case DAC960_V2_Geometry_128_32:
+			geo->heads = 128;
+			geo->sectors = 32;
+			break;
+		case DAC960_V2_Geometry_255_63:
+			geo->heads = 255;
+			geo->sectors = 63;
+			break;
+		default:
+			DAC960_Error("Illegal Logical Device Geometry %d\n",
+					p, i->DriveGeometry);
+			return -EINVAL;
+		}
+
+		geo->cylinders = i->ConfigurableDeviceSize /
+			(geo->heads * geo->sectors);
+	}
+	
+	return 0;
+}
+
+static unsigned int DAC960_check_events(struct gendisk *disk,
+					unsigned int clearing)
+{
+	DAC960_Controller_T *p = disk->queue->queuedata;
+	int drive_nr = (long)disk->private_data;
+
+	if (!p->LogicalDriveInitiallyAccessible[drive_nr])
+		return DISK_EVENT_MEDIA_CHANGE;
+	return 0;
+}
+
+static int DAC960_revalidate_disk(struct gendisk *disk)
+{
+	DAC960_Controller_T *p = disk->queue->queuedata;
+	int unit = (long)disk->private_data;
+
+	set_capacity(disk, disk_size(p, unit));
+	return 0;
+}
+
+static const struct block_device_operations DAC960_BlockDeviceOperations = {
+	.owner			= THIS_MODULE,
+	.open			= DAC960_open,
+	.getgeo			= DAC960_getgeo,
+	.check_events		= DAC960_check_events,
+	.revalidate_disk	= DAC960_revalidate_disk,
+};
+
+
+/*
+  DAC960_AnnounceDriver announces the Driver Version and Date, Author's Name,
+  Copyright Notice, and Electronic Mail Address.
+*/
+
+static void DAC960_AnnounceDriver(DAC960_Controller_T *Controller)
+{
+  DAC960_Announce("***** DAC960 RAID Driver Version "
+		  DAC960_DriverVersion " of "
+		  DAC960_DriverDate " *****\n", Controller);
+  DAC960_Announce("Copyright 1998-2001 by Leonard N. Zubkoff "
+		  "<lnz@dandelion.com>\n", Controller);
+}
+
+
+/*
+  DAC960_Failure prints a standardized error message, and then returns false.
+*/
+
+static bool DAC960_Failure(DAC960_Controller_T *Controller,
+			      unsigned char *ErrorMessage)
+{
+  DAC960_Error("While configuring DAC960 PCI RAID Controller at\n",
+	       Controller);
+  if (Controller->IO_Address == 0)
+    DAC960_Error("PCI Bus %d Device %d Function %d I/O Address N/A "
+		 "PCI Address 0x%X\n", Controller,
+		 Controller->Bus, Controller->Device,
+		 Controller->Function, Controller->PCI_Address);
+  else DAC960_Error("PCI Bus %d Device %d Function %d I/O Address "
+		    "0x%X PCI Address 0x%X\n", Controller,
+		    Controller->Bus, Controller->Device,
+		    Controller->Function, Controller->IO_Address,
+		    Controller->PCI_Address);
+  DAC960_Error("%s FAILED - DETACHING\n", Controller, ErrorMessage);
+  return false;
+}
+
+/*
+  init_dma_loaf() and slice_dma_loaf() are helper functions for
+  aggregating the dma-mapped memory for a well-known collection of
+  data structures that are of different lengths.
+
+  These routines don't guarantee any alignment.  The caller must
+  include any space needed for alignment in the sizes of the structures
+  that are passed in.
+ */
+
+static bool init_dma_loaf(struct pci_dev *dev, struct dma_loaf *loaf,
+								 size_t len)
+{
+	void *cpu_addr;
+	dma_addr_t dma_handle;
+
+	cpu_addr = pci_alloc_consistent(dev, len, &dma_handle);
+	if (cpu_addr == NULL)
+		return false;
+	
+	loaf->cpu_free = loaf->cpu_base = cpu_addr;
+	loaf->dma_free =loaf->dma_base = dma_handle;
+	loaf->length = len;
+	memset(cpu_addr, 0, len);
+	return true;
+}
+
+static void *slice_dma_loaf(struct dma_loaf *loaf, size_t len,
+					dma_addr_t *dma_handle)
+{
+	void *cpu_end = loaf->cpu_free + len;
+	void *cpu_addr = loaf->cpu_free;
+
+	BUG_ON(cpu_end > loaf->cpu_base + loaf->length);
+	*dma_handle = loaf->dma_free;
+	loaf->cpu_free = cpu_end;
+	loaf->dma_free += len;
+	return cpu_addr;
+}
+
+static void free_dma_loaf(struct pci_dev *dev, struct dma_loaf *loaf_handle)
+{
+	if (loaf_handle->cpu_base != NULL)
+		pci_free_consistent(dev, loaf_handle->length,
+			loaf_handle->cpu_base, loaf_handle->dma_base);
+}
+
+
+/*
+  DAC960_CreateAuxiliaryStructures allocates and initializes the auxiliary
+  data structures for Controller.  It returns true on success and false on
+  failure.
+*/
+
+static bool DAC960_CreateAuxiliaryStructures(DAC960_Controller_T *Controller)
+{
+  int CommandAllocationLength, CommandAllocationGroupSize;
+  int CommandsRemaining = 0, CommandIdentifier, CommandGroupByteCount;
+  void *AllocationPointer = NULL;
+  void *ScatterGatherCPU = NULL;
+  dma_addr_t ScatterGatherDMA;
+  struct pci_pool *ScatterGatherPool;
+  void *RequestSenseCPU = NULL;
+  dma_addr_t RequestSenseDMA;
+  struct pci_pool *RequestSensePool = NULL;
+
+  if (Controller->FirmwareType == DAC960_V1_Controller)
+    {
+      CommandAllocationLength = offsetof(DAC960_Command_T, V1.EndMarker);
+      CommandAllocationGroupSize = DAC960_V1_CommandAllocationGroupSize;
+      ScatterGatherPool = pci_pool_create("DAC960_V1_ScatterGather",
+		Controller->PCIDevice,
+	DAC960_V1_ScatterGatherLimit * sizeof(DAC960_V1_ScatterGatherSegment_T),
+	sizeof(DAC960_V1_ScatterGatherSegment_T), 0);
+      if (ScatterGatherPool == NULL)
+	    return DAC960_Failure(Controller,
+			"AUXILIARY STRUCTURE CREATION (SG)");
+      Controller->ScatterGatherPool = ScatterGatherPool;
+    }
+  else
+    {
+      CommandAllocationLength = offsetof(DAC960_Command_T, V2.EndMarker);
+      CommandAllocationGroupSize = DAC960_V2_CommandAllocationGroupSize;
+      ScatterGatherPool = pci_pool_create("DAC960_V2_ScatterGather",
+		Controller->PCIDevice,
+	DAC960_V2_ScatterGatherLimit * sizeof(DAC960_V2_ScatterGatherSegment_T),
+	sizeof(DAC960_V2_ScatterGatherSegment_T), 0);
+      if (ScatterGatherPool == NULL)
+	    return DAC960_Failure(Controller,
+			"AUXILIARY STRUCTURE CREATION (SG)");
+      RequestSensePool = pci_pool_create("DAC960_V2_RequestSense",
+		Controller->PCIDevice, sizeof(DAC960_SCSI_RequestSense_T),
+		sizeof(int), 0);
+      if (RequestSensePool == NULL) {
+	    pci_pool_destroy(ScatterGatherPool);
+	    return DAC960_Failure(Controller,
+			"AUXILIARY STRUCTURE CREATION (SG)");
+      }
+      Controller->ScatterGatherPool = ScatterGatherPool;
+      Controller->V2.RequestSensePool = RequestSensePool;
+    }
+  Controller->CommandAllocationGroupSize = CommandAllocationGroupSize;
+  Controller->FreeCommands = NULL;
+  for (CommandIdentifier = 1;
+       CommandIdentifier <= Controller->DriverQueueDepth;
+       CommandIdentifier++)
+    {
+      DAC960_Command_T *Command;
+      if (--CommandsRemaining <= 0)
+	{
+	  CommandsRemaining =
+		Controller->DriverQueueDepth - CommandIdentifier + 1;
+	  if (CommandsRemaining > CommandAllocationGroupSize)
+		CommandsRemaining = CommandAllocationGroupSize;
+	  CommandGroupByteCount =
+		CommandsRemaining * CommandAllocationLength;
+	  AllocationPointer = kzalloc(CommandGroupByteCount, GFP_ATOMIC);
+	  if (AllocationPointer == NULL)
+		return DAC960_Failure(Controller,
+					"AUXILIARY STRUCTURE CREATION");
+	 }
+      Command = (DAC960_Command_T *) AllocationPointer;
+      AllocationPointer += CommandAllocationLength;
+      Command->CommandIdentifier = CommandIdentifier;
+      Command->Controller = Controller;
+      Command->Next = Controller->FreeCommands;
+      Controller->FreeCommands = Command;
+      Controller->Commands[CommandIdentifier-1] = Command;
+      ScatterGatherCPU = pci_pool_alloc(ScatterGatherPool, GFP_ATOMIC,
+							&ScatterGatherDMA);
+      if (ScatterGatherCPU == NULL)
+	  return DAC960_Failure(Controller, "AUXILIARY STRUCTURE CREATION");
+
+      if (RequestSensePool != NULL) {
+  	  RequestSenseCPU = pci_pool_alloc(RequestSensePool, GFP_ATOMIC,
+						&RequestSenseDMA);
+  	  if (RequestSenseCPU == NULL) {
+                pci_pool_free(ScatterGatherPool, ScatterGatherCPU,
+                                ScatterGatherDMA);
+    		return DAC960_Failure(Controller,
+					"AUXILIARY STRUCTURE CREATION");
+	  }
+        }
+     if (Controller->FirmwareType == DAC960_V1_Controller) {
+        Command->cmd_sglist = Command->V1.ScatterList;
+	Command->V1.ScatterGatherList =
+		(DAC960_V1_ScatterGatherSegment_T *)ScatterGatherCPU;
+	Command->V1.ScatterGatherListDMA = ScatterGatherDMA;
+	sg_init_table(Command->cmd_sglist, DAC960_V1_ScatterGatherLimit);
+      } else {
+        Command->cmd_sglist = Command->V2.ScatterList;
+	Command->V2.ScatterGatherList =
+		(DAC960_V2_ScatterGatherSegment_T *)ScatterGatherCPU;
+	Command->V2.ScatterGatherListDMA = ScatterGatherDMA;
+	Command->V2.RequestSense =
+				(DAC960_SCSI_RequestSense_T *)RequestSenseCPU;
+	Command->V2.RequestSenseDMA = RequestSenseDMA;
+	sg_init_table(Command->cmd_sglist, DAC960_V2_ScatterGatherLimit);
+      }
+    }
+  return true;
+}
+
+
+/*
+  DAC960_DestroyAuxiliaryStructures deallocates the auxiliary data
+  structures for Controller.
+*/
+
+static void DAC960_DestroyAuxiliaryStructures(DAC960_Controller_T *Controller)
+{
+  int i;
+  struct pci_pool *ScatterGatherPool = Controller->ScatterGatherPool;
+  struct pci_pool *RequestSensePool = NULL;
+  void *ScatterGatherCPU;
+  dma_addr_t ScatterGatherDMA;
+  void *RequestSenseCPU;
+  dma_addr_t RequestSenseDMA;
+  DAC960_Command_T *CommandGroup = NULL;
+  
+
+  if (Controller->FirmwareType == DAC960_V2_Controller)
+        RequestSensePool = Controller->V2.RequestSensePool;
+
+  Controller->FreeCommands = NULL;
+  for (i = 0; i < Controller->DriverQueueDepth; i++)
+    {
+      DAC960_Command_T *Command = Controller->Commands[i];
+
+      if (Command == NULL)
+	  continue;
+
+      if (Controller->FirmwareType == DAC960_V1_Controller) {
+	  ScatterGatherCPU = (void *)Command->V1.ScatterGatherList;
+	  ScatterGatherDMA = Command->V1.ScatterGatherListDMA;
+	  RequestSenseCPU = NULL;
+	  RequestSenseDMA = (dma_addr_t)0;
+      } else {
+          ScatterGatherCPU = (void *)Command->V2.ScatterGatherList;
+	  ScatterGatherDMA = Command->V2.ScatterGatherListDMA;
+	  RequestSenseCPU = (void *)Command->V2.RequestSense;
+	  RequestSenseDMA = Command->V2.RequestSenseDMA;
+      }
+      if (ScatterGatherCPU != NULL)
+          pci_pool_free(ScatterGatherPool, ScatterGatherCPU, ScatterGatherDMA);
+      if (RequestSenseCPU != NULL)
+          pci_pool_free(RequestSensePool, RequestSenseCPU, RequestSenseDMA);
+
+      if ((Command->CommandIdentifier
+	   % Controller->CommandAllocationGroupSize) == 1) {
+	   /*
+	    * We can't free the group of commands until all of the
+	    * request sense and scatter gather dma structures are free.
+            * Remember the beginning of the group, but don't free it
+	    * until we've reached the beginning of the next group.
+	    */
+	   kfree(CommandGroup);
+	   CommandGroup = Command;
+      }
+      Controller->Commands[i] = NULL;
+    }
+  kfree(CommandGroup);
+
+  if (Controller->CombinedStatusBuffer != NULL)
+    {
+      kfree(Controller->CombinedStatusBuffer);
+      Controller->CombinedStatusBuffer = NULL;
+      Controller->CurrentStatusBuffer = NULL;
+    }
+
+  if (ScatterGatherPool != NULL)
+  	pci_pool_destroy(ScatterGatherPool);
+  if (Controller->FirmwareType == DAC960_V1_Controller)
+  	return;
+
+  if (RequestSensePool != NULL)
+	pci_pool_destroy(RequestSensePool);
+
+  for (i = 0; i < DAC960_MaxLogicalDrives; i++) {
+	kfree(Controller->V2.LogicalDeviceInformation[i]);
+	Controller->V2.LogicalDeviceInformation[i] = NULL;
+  }
+
+  for (i = 0; i < DAC960_V2_MaxPhysicalDevices; i++)
+    {
+      kfree(Controller->V2.PhysicalDeviceInformation[i]);
+      Controller->V2.PhysicalDeviceInformation[i] = NULL;
+      kfree(Controller->V2.InquiryUnitSerialNumber[i]);
+      Controller->V2.InquiryUnitSerialNumber[i] = NULL;
+    }
+}
+
+
+/*
+  DAC960_V1_ClearCommand clears critical fields of Command for DAC960 V1
+  Firmware Controllers.
+*/
+
+static inline void DAC960_V1_ClearCommand(DAC960_Command_T *Command)
+{
+  DAC960_V1_CommandMailbox_T *CommandMailbox = &Command->V1.CommandMailbox;
+  memset(CommandMailbox, 0, sizeof(DAC960_V1_CommandMailbox_T));
+  Command->V1.CommandStatus = 0;
+}
+
+
+/*
+  DAC960_V2_ClearCommand clears critical fields of Command for DAC960 V2
+  Firmware Controllers.
+*/
+
+static inline void DAC960_V2_ClearCommand(DAC960_Command_T *Command)
+{
+  DAC960_V2_CommandMailbox_T *CommandMailbox = &Command->V2.CommandMailbox;
+  memset(CommandMailbox, 0, sizeof(DAC960_V2_CommandMailbox_T));
+  Command->V2.CommandStatus = 0;
+}
+
+
+/*
+  DAC960_AllocateCommand allocates a Command structure from Controller's
+  free list.  During driver initialization, a special initialization command
+  has been placed on the free list to guarantee that command allocation can
+  never fail.
+*/
+
+static inline DAC960_Command_T *DAC960_AllocateCommand(DAC960_Controller_T
+						       *Controller)
+{
+  DAC960_Command_T *Command = Controller->FreeCommands;
+  if (Command == NULL) return NULL;
+  Controller->FreeCommands = Command->Next;
+  Command->Next = NULL;
+  return Command;
+}
+
+
+/*
+  DAC960_DeallocateCommand deallocates Command, returning it to Controller's
+  free list.
+*/
+
+static inline void DAC960_DeallocateCommand(DAC960_Command_T *Command)
+{
+  DAC960_Controller_T *Controller = Command->Controller;
+
+  Command->Request = NULL;
+  Command->Next = Controller->FreeCommands;
+  Controller->FreeCommands = Command;
+}
+
+
+/*
+  DAC960_WaitForCommand waits for a wake_up on Controller's Command Wait Queue.
+*/
+
+static void DAC960_WaitForCommand(DAC960_Controller_T *Controller)
+{
+  spin_unlock_irq(&Controller->queue_lock);
+  __wait_event(Controller->CommandWaitQueue, Controller->FreeCommands);
+  spin_lock_irq(&Controller->queue_lock);
+}
+
+/*
+  DAC960_GEM_QueueCommand queues Command for DAC960 GEM Series Controllers.
+*/
+
+static void DAC960_GEM_QueueCommand(DAC960_Command_T *Command)
+{
+  DAC960_Controller_T *Controller = Command->Controller;
+  void __iomem *ControllerBaseAddress = Controller->BaseAddress;
+  DAC960_V2_CommandMailbox_T *CommandMailbox = &Command->V2.CommandMailbox;
+  DAC960_V2_CommandMailbox_T *NextCommandMailbox =
+      Controller->V2.NextCommandMailbox;
+
+  CommandMailbox->Common.CommandIdentifier = Command->CommandIdentifier;
+  DAC960_GEM_WriteCommandMailbox(NextCommandMailbox, CommandMailbox);
+
+  if (Controller->V2.PreviousCommandMailbox1->Words[0] == 0 ||
+      Controller->V2.PreviousCommandMailbox2->Words[0] == 0)
+      DAC960_GEM_MemoryMailboxNewCommand(ControllerBaseAddress);
+
+  Controller->V2.PreviousCommandMailbox2 =
+      Controller->V2.PreviousCommandMailbox1;
+  Controller->V2.PreviousCommandMailbox1 = NextCommandMailbox;
+
+  if (++NextCommandMailbox > Controller->V2.LastCommandMailbox)
+      NextCommandMailbox = Controller->V2.FirstCommandMailbox;
+
+  Controller->V2.NextCommandMailbox = NextCommandMailbox;
+}
+
+/*
+  DAC960_BA_QueueCommand queues Command for DAC960 BA Series Controllers.
+*/
+
+static void DAC960_BA_QueueCommand(DAC960_Command_T *Command)
+{
+  DAC960_Controller_T *Controller = Command->Controller;
+  void __iomem *ControllerBaseAddress = Controller->BaseAddress;
+  DAC960_V2_CommandMailbox_T *CommandMailbox = &Command->V2.CommandMailbox;
+  DAC960_V2_CommandMailbox_T *NextCommandMailbox =
+    Controller->V2.NextCommandMailbox;
+  CommandMailbox->Common.CommandIdentifier = Command->CommandIdentifier;
+  DAC960_BA_WriteCommandMailbox(NextCommandMailbox, CommandMailbox);
+  if (Controller->V2.PreviousCommandMailbox1->Words[0] == 0 ||
+      Controller->V2.PreviousCommandMailbox2->Words[0] == 0)
+    DAC960_BA_MemoryMailboxNewCommand(ControllerBaseAddress);
+  Controller->V2.PreviousCommandMailbox2 =
+    Controller->V2.PreviousCommandMailbox1;
+  Controller->V2.PreviousCommandMailbox1 = NextCommandMailbox;
+  if (++NextCommandMailbox > Controller->V2.LastCommandMailbox)
+    NextCommandMailbox = Controller->V2.FirstCommandMailbox;
+  Controller->V2.NextCommandMailbox = NextCommandMailbox;
+}
+
+
+/*
+  DAC960_LP_QueueCommand queues Command for DAC960 LP Series Controllers.
+*/
+
+static void DAC960_LP_QueueCommand(DAC960_Command_T *Command)
+{
+  DAC960_Controller_T *Controller = Command->Controller;
+  void __iomem *ControllerBaseAddress = Controller->BaseAddress;
+  DAC960_V2_CommandMailbox_T *CommandMailbox = &Command->V2.CommandMailbox;
+  DAC960_V2_CommandMailbox_T *NextCommandMailbox =
+    Controller->V2.NextCommandMailbox;
+  CommandMailbox->Common.CommandIdentifier = Command->CommandIdentifier;
+  DAC960_LP_WriteCommandMailbox(NextCommandMailbox, CommandMailbox);
+  if (Controller->V2.PreviousCommandMailbox1->Words[0] == 0 ||
+      Controller->V2.PreviousCommandMailbox2->Words[0] == 0)
+    DAC960_LP_MemoryMailboxNewCommand(ControllerBaseAddress);
+  Controller->V2.PreviousCommandMailbox2 =
+    Controller->V2.PreviousCommandMailbox1;
+  Controller->V2.PreviousCommandMailbox1 = NextCommandMailbox;
+  if (++NextCommandMailbox > Controller->V2.LastCommandMailbox)
+    NextCommandMailbox = Controller->V2.FirstCommandMailbox;
+  Controller->V2.NextCommandMailbox = NextCommandMailbox;
+}
+
+
+/*
+  DAC960_LA_QueueCommandDualMode queues Command for DAC960 LA Series
+  Controllers with Dual Mode Firmware.
+*/
+
+static void DAC960_LA_QueueCommandDualMode(DAC960_Command_T *Command)
+{
+  DAC960_Controller_T *Controller = Command->Controller;
+  void __iomem *ControllerBaseAddress = Controller->BaseAddress;
+  DAC960_V1_CommandMailbox_T *CommandMailbox = &Command->V1.CommandMailbox;
+  DAC960_V1_CommandMailbox_T *NextCommandMailbox =
+    Controller->V1.NextCommandMailbox;
+  CommandMailbox->Common.CommandIdentifier = Command->CommandIdentifier;
+  DAC960_LA_WriteCommandMailbox(NextCommandMailbox, CommandMailbox);
+  if (Controller->V1.PreviousCommandMailbox1->Words[0] == 0 ||
+      Controller->V1.PreviousCommandMailbox2->Words[0] == 0)
+    DAC960_LA_MemoryMailboxNewCommand(ControllerBaseAddress);
+  Controller->V1.PreviousCommandMailbox2 =
+    Controller->V1.PreviousCommandMailbox1;
+  Controller->V1.PreviousCommandMailbox1 = NextCommandMailbox;
+  if (++NextCommandMailbox > Controller->V1.LastCommandMailbox)
+    NextCommandMailbox = Controller->V1.FirstCommandMailbox;
+  Controller->V1.NextCommandMailbox = NextCommandMailbox;
+}
+
+
+/*
+  DAC960_LA_QueueCommandSingleMode queues Command for DAC960 LA Series
+  Controllers with Single Mode Firmware.
+*/
+
+static void DAC960_LA_QueueCommandSingleMode(DAC960_Command_T *Command)
+{
+  DAC960_Controller_T *Controller = Command->Controller;
+  void __iomem *ControllerBaseAddress = Controller->BaseAddress;
+  DAC960_V1_CommandMailbox_T *CommandMailbox = &Command->V1.CommandMailbox;
+  DAC960_V1_CommandMailbox_T *NextCommandMailbox =
+    Controller->V1.NextCommandMailbox;
+  CommandMailbox->Common.CommandIdentifier = Command->CommandIdentifier;
+  DAC960_LA_WriteCommandMailbox(NextCommandMailbox, CommandMailbox);
+  if (Controller->V1.PreviousCommandMailbox1->Words[0] == 0 ||
+      Controller->V1.PreviousCommandMailbox2->Words[0] == 0)
+    DAC960_LA_HardwareMailboxNewCommand(ControllerBaseAddress);
+  Controller->V1.PreviousCommandMailbox2 =
+    Controller->V1.PreviousCommandMailbox1;
+  Controller->V1.PreviousCommandMailbox1 = NextCommandMailbox;
+  if (++NextCommandMailbox > Controller->V1.LastCommandMailbox)
+    NextCommandMailbox = Controller->V1.FirstCommandMailbox;
+  Controller->V1.NextCommandMailbox = NextCommandMailbox;
+}
+
+
+/*
+  DAC960_PG_QueueCommandDualMode queues Command for DAC960 PG Series
+  Controllers with Dual Mode Firmware.
+*/
+
+static void DAC960_PG_QueueCommandDualMode(DAC960_Command_T *Command)
+{
+  DAC960_Controller_T *Controller = Command->Controller;
+  void __iomem *ControllerBaseAddress = Controller->BaseAddress;
+  DAC960_V1_CommandMailbox_T *CommandMailbox = &Command->V1.CommandMailbox;
+  DAC960_V1_CommandMailbox_T *NextCommandMailbox =
+    Controller->V1.NextCommandMailbox;
+  CommandMailbox->Common.CommandIdentifier = Command->CommandIdentifier;
+  DAC960_PG_WriteCommandMailbox(NextCommandMailbox, CommandMailbox);
+  if (Controller->V1.PreviousCommandMailbox1->Words[0] == 0 ||
+      Controller->V1.PreviousCommandMailbox2->Words[0] == 0)
+    DAC960_PG_MemoryMailboxNewCommand(ControllerBaseAddress);
+  Controller->V1.PreviousCommandMailbox2 =
+    Controller->V1.PreviousCommandMailbox1;
+  Controller->V1.PreviousCommandMailbox1 = NextCommandMailbox;
+  if (++NextCommandMailbox > Controller->V1.LastCommandMailbox)
+    NextCommandMailbox = Controller->V1.FirstCommandMailbox;
+  Controller->V1.NextCommandMailbox = NextCommandMailbox;
+}
+
+
+/*
+  DAC960_PG_QueueCommandSingleMode queues Command for DAC960 PG Series
+  Controllers with Single Mode Firmware.
+*/
+
+static void DAC960_PG_QueueCommandSingleMode(DAC960_Command_T *Command)
+{
+  DAC960_Controller_T *Controller = Command->Controller;
+  void __iomem *ControllerBaseAddress = Controller->BaseAddress;
+  DAC960_V1_CommandMailbox_T *CommandMailbox = &Command->V1.CommandMailbox;
+  DAC960_V1_CommandMailbox_T *NextCommandMailbox =
+    Controller->V1.NextCommandMailbox;
+  CommandMailbox->Common.CommandIdentifier = Command->CommandIdentifier;
+  DAC960_PG_WriteCommandMailbox(NextCommandMailbox, CommandMailbox);
+  if (Controller->V1.PreviousCommandMailbox1->Words[0] == 0 ||
+      Controller->V1.PreviousCommandMailbox2->Words[0] == 0)
+    DAC960_PG_HardwareMailboxNewCommand(ControllerBaseAddress);
+  Controller->V1.PreviousCommandMailbox2 =
+    Controller->V1.PreviousCommandMailbox1;
+  Controller->V1.PreviousCommandMailbox1 = NextCommandMailbox;
+  if (++NextCommandMailbox > Controller->V1.LastCommandMailbox)
+    NextCommandMailbox = Controller->V1.FirstCommandMailbox;
+  Controller->V1.NextCommandMailbox = NextCommandMailbox;
+}
+
+
+/*
+  DAC960_PD_QueueCommand queues Command for DAC960 PD Series Controllers.
+*/
+
+static void DAC960_PD_QueueCommand(DAC960_Command_T *Command)
+{
+  DAC960_Controller_T *Controller = Command->Controller;
+  void __iomem *ControllerBaseAddress = Controller->BaseAddress;
+  DAC960_V1_CommandMailbox_T *CommandMailbox = &Command->V1.CommandMailbox;
+  CommandMailbox->Common.CommandIdentifier = Command->CommandIdentifier;
+  while (DAC960_PD_MailboxFullP(ControllerBaseAddress))
+    udelay(1);
+  DAC960_PD_WriteCommandMailbox(ControllerBaseAddress, CommandMailbox);
+  DAC960_PD_NewCommand(ControllerBaseAddress);
+}
+
+
+/*
+  DAC960_P_QueueCommand queues Command for DAC960 P Series Controllers.
+*/
+
+static void DAC960_P_QueueCommand(DAC960_Command_T *Command)
+{
+  DAC960_Controller_T *Controller = Command->Controller;
+  void __iomem *ControllerBaseAddress = Controller->BaseAddress;
+  DAC960_V1_CommandMailbox_T *CommandMailbox = &Command->V1.CommandMailbox;
+  CommandMailbox->Common.CommandIdentifier = Command->CommandIdentifier;
+  switch (CommandMailbox->Common.CommandOpcode)
+    {
+    case DAC960_V1_Enquiry:
+      CommandMailbox->Common.CommandOpcode = DAC960_V1_Enquiry_Old;
+      break;
+    case DAC960_V1_GetDeviceState:
+      CommandMailbox->Common.CommandOpcode = DAC960_V1_GetDeviceState_Old;
+      break;
+    case DAC960_V1_Read:
+      CommandMailbox->Common.CommandOpcode = DAC960_V1_Read_Old;
+      DAC960_PD_To_P_TranslateReadWriteCommand(CommandMailbox);
+      break;
+    case DAC960_V1_Write:
+      CommandMailbox->Common.CommandOpcode = DAC960_V1_Write_Old;
+      DAC960_PD_To_P_TranslateReadWriteCommand(CommandMailbox);
+      break;
+    case DAC960_V1_ReadWithScatterGather:
+      CommandMailbox->Common.CommandOpcode =
+	DAC960_V1_ReadWithScatterGather_Old;
+      DAC960_PD_To_P_TranslateReadWriteCommand(CommandMailbox);
+      break;
+    case DAC960_V1_WriteWithScatterGather:
+      CommandMailbox->Common.CommandOpcode =
+	DAC960_V1_WriteWithScatterGather_Old;
+      DAC960_PD_To_P_TranslateReadWriteCommand(CommandMailbox);
+      break;
+    default:
+      break;
+    }
+  while (DAC960_PD_MailboxFullP(ControllerBaseAddress))
+    udelay(1);
+  DAC960_PD_WriteCommandMailbox(ControllerBaseAddress, CommandMailbox);
+  DAC960_PD_NewCommand(ControllerBaseAddress);
+}
+
+
+/*
+  DAC960_ExecuteCommand executes Command and waits for completion.
+*/
+
+static void DAC960_ExecuteCommand(DAC960_Command_T *Command)
+{
+  DAC960_Controller_T *Controller = Command->Controller;
+  DECLARE_COMPLETION_ONSTACK(Completion);
+  unsigned long flags;
+  Command->Completion = &Completion;
+
+  spin_lock_irqsave(&Controller->queue_lock, flags);
+  DAC960_QueueCommand(Command);
+  spin_unlock_irqrestore(&Controller->queue_lock, flags);
+ 
+  if (in_interrupt())
+	  return;
+  wait_for_completion(&Completion);
+}
+
+
+/*
+  DAC960_V1_ExecuteType3 executes a DAC960 V1 Firmware Controller Type 3
+  Command and waits for completion.  It returns true on success and false
+  on failure.
+*/
+
+static bool DAC960_V1_ExecuteType3(DAC960_Controller_T *Controller,
+				      DAC960_V1_CommandOpcode_T CommandOpcode,
+				      dma_addr_t DataDMA)
+{
+  DAC960_Command_T *Command = DAC960_AllocateCommand(Controller);
+  DAC960_V1_CommandMailbox_T *CommandMailbox = &Command->V1.CommandMailbox;
+  DAC960_V1_CommandStatus_T CommandStatus;
+  DAC960_V1_ClearCommand(Command);
+  Command->CommandType = DAC960_ImmediateCommand;
+  CommandMailbox->Type3.CommandOpcode = CommandOpcode;
+  CommandMailbox->Type3.BusAddress = DataDMA;
+  DAC960_ExecuteCommand(Command);
+  CommandStatus = Command->V1.CommandStatus;
+  DAC960_DeallocateCommand(Command);
+  return (CommandStatus == DAC960_V1_NormalCompletion);
+}
+
+
+/*
+  DAC960_V1_ExecuteTypeB executes a DAC960 V1 Firmware Controller Type 3B
+  Command and waits for completion.  It returns true on success and false
+  on failure.
+*/
+
+static bool DAC960_V1_ExecuteType3B(DAC960_Controller_T *Controller,
+				       DAC960_V1_CommandOpcode_T CommandOpcode,
+				       unsigned char CommandOpcode2,
+				       dma_addr_t DataDMA)
+{
+  DAC960_Command_T *Command = DAC960_AllocateCommand(Controller);
+  DAC960_V1_CommandMailbox_T *CommandMailbox = &Command->V1.CommandMailbox;
+  DAC960_V1_CommandStatus_T CommandStatus;
+  DAC960_V1_ClearCommand(Command);
+  Command->CommandType = DAC960_ImmediateCommand;
+  CommandMailbox->Type3B.CommandOpcode = CommandOpcode;
+  CommandMailbox->Type3B.CommandOpcode2 = CommandOpcode2;
+  CommandMailbox->Type3B.BusAddress = DataDMA;
+  DAC960_ExecuteCommand(Command);
+  CommandStatus = Command->V1.CommandStatus;
+  DAC960_DeallocateCommand(Command);
+  return (CommandStatus == DAC960_V1_NormalCompletion);
+}
+
+
+/*
+  DAC960_V1_ExecuteType3D executes a DAC960 V1 Firmware Controller Type 3D
+  Command and waits for completion.  It returns true on success and false
+  on failure.
+*/
+
+static bool DAC960_V1_ExecuteType3D(DAC960_Controller_T *Controller,
+				       DAC960_V1_CommandOpcode_T CommandOpcode,
+				       unsigned char Channel,
+				       unsigned char TargetID,
+				       dma_addr_t DataDMA)
+{
+  DAC960_Command_T *Command = DAC960_AllocateCommand(Controller);
+  DAC960_V1_CommandMailbox_T *CommandMailbox = &Command->V1.CommandMailbox;
+  DAC960_V1_CommandStatus_T CommandStatus;
+  DAC960_V1_ClearCommand(Command);
+  Command->CommandType = DAC960_ImmediateCommand;
+  CommandMailbox->Type3D.CommandOpcode = CommandOpcode;
+  CommandMailbox->Type3D.Channel = Channel;
+  CommandMailbox->Type3D.TargetID = TargetID;
+  CommandMailbox->Type3D.BusAddress = DataDMA;
+  DAC960_ExecuteCommand(Command);
+  CommandStatus = Command->V1.CommandStatus;
+  DAC960_DeallocateCommand(Command);
+  return (CommandStatus == DAC960_V1_NormalCompletion);
+}
+
+
+/*
+  DAC960_V2_GeneralInfo executes a DAC960 V2 Firmware General Information
+  Reading IOCTL Command and waits for completion.  It returns true on success
+  and false on failure.
+
+  Return data in The controller's HealthStatusBuffer, which is dma-able memory
+*/
+
+static bool DAC960_V2_GeneralInfo(DAC960_Controller_T *Controller)
+{
+  DAC960_Command_T *Command = DAC960_AllocateCommand(Controller);
+  DAC960_V2_CommandMailbox_T *CommandMailbox = &Command->V2.CommandMailbox;
+  DAC960_V2_CommandStatus_T CommandStatus;
+  DAC960_V2_ClearCommand(Command);
+  Command->CommandType = DAC960_ImmediateCommand;
+  CommandMailbox->Common.CommandOpcode = DAC960_V2_IOCTL;
+  CommandMailbox->Common.CommandControlBits
+			.DataTransferControllerToHost = true;
+  CommandMailbox->Common.CommandControlBits
+			.NoAutoRequestSense = true;
+  CommandMailbox->Common.DataTransferSize = sizeof(DAC960_V2_HealthStatusBuffer_T);
+  CommandMailbox->Common.IOCTL_Opcode = DAC960_V2_GetHealthStatus;
+  CommandMailbox->Common.DataTransferMemoryAddress
+			.ScatterGatherSegments[0]
+			.SegmentDataPointer =
+    Controller->V2.HealthStatusBufferDMA;
+  CommandMailbox->Common.DataTransferMemoryAddress
+			.ScatterGatherSegments[0]
+			.SegmentByteCount =
+    CommandMailbox->Common.DataTransferSize;
+  DAC960_ExecuteCommand(Command);
+  CommandStatus = Command->V2.CommandStatus;
+  DAC960_DeallocateCommand(Command);
+  return (CommandStatus == DAC960_V2_NormalCompletion);
+}
+
+
+/*
+  DAC960_V2_ControllerInfo executes a DAC960 V2 Firmware Controller
+  Information Reading IOCTL Command and waits for completion.  It returns
+  true on success and false on failure.
+
+  Data is returned in the controller's V2.NewControllerInformation dma-able
+  memory buffer.
+*/
+
+static bool DAC960_V2_NewControllerInfo(DAC960_Controller_T *Controller)
+{
+  DAC960_Command_T *Command = DAC960_AllocateCommand(Controller);
+  DAC960_V2_CommandMailbox_T *CommandMailbox = &Command->V2.CommandMailbox;
+  DAC960_V2_CommandStatus_T CommandStatus;
+  DAC960_V2_ClearCommand(Command);
+  Command->CommandType = DAC960_ImmediateCommand;
+  CommandMailbox->ControllerInfo.CommandOpcode = DAC960_V2_IOCTL;
+  CommandMailbox->ControllerInfo.CommandControlBits
+				.DataTransferControllerToHost = true;
+  CommandMailbox->ControllerInfo.CommandControlBits
+				.NoAutoRequestSense = true;
+  CommandMailbox->ControllerInfo.DataTransferSize = sizeof(DAC960_V2_ControllerInfo_T);
+  CommandMailbox->ControllerInfo.ControllerNumber = 0;
+  CommandMailbox->ControllerInfo.IOCTL_Opcode = DAC960_V2_GetControllerInfo;
+  CommandMailbox->ControllerInfo.DataTransferMemoryAddress
+				.ScatterGatherSegments[0]
+				.SegmentDataPointer =
+    	Controller->V2.NewControllerInformationDMA;
+  CommandMailbox->ControllerInfo.DataTransferMemoryAddress
+				.ScatterGatherSegments[0]
+				.SegmentByteCount =
+    CommandMailbox->ControllerInfo.DataTransferSize;
+  DAC960_ExecuteCommand(Command);
+  CommandStatus = Command->V2.CommandStatus;
+  DAC960_DeallocateCommand(Command);
+  return (CommandStatus == DAC960_V2_NormalCompletion);
+}
+
+
+/*
+  DAC960_V2_LogicalDeviceInfo executes a DAC960 V2 Firmware Controller Logical
+  Device Information Reading IOCTL Command and waits for completion.  It
+  returns true on success and false on failure.
+
+  Data is returned in the controller's V2.NewLogicalDeviceInformation
+*/
+
+static bool DAC960_V2_NewLogicalDeviceInfo(DAC960_Controller_T *Controller,
+					   unsigned short LogicalDeviceNumber)
+{
+  DAC960_Command_T *Command = DAC960_AllocateCommand(Controller);
+  DAC960_V2_CommandMailbox_T *CommandMailbox = &Command->V2.CommandMailbox;
+  DAC960_V2_CommandStatus_T CommandStatus;
+
+  DAC960_V2_ClearCommand(Command);
+  Command->CommandType = DAC960_ImmediateCommand;
+  CommandMailbox->LogicalDeviceInfo.CommandOpcode =
+				DAC960_V2_IOCTL;
+  CommandMailbox->LogicalDeviceInfo.CommandControlBits
+				   .DataTransferControllerToHost = true;
+  CommandMailbox->LogicalDeviceInfo.CommandControlBits
+				   .NoAutoRequestSense = true;
+  CommandMailbox->LogicalDeviceInfo.DataTransferSize = 
+				sizeof(DAC960_V2_LogicalDeviceInfo_T);
+  CommandMailbox->LogicalDeviceInfo.LogicalDevice.LogicalDeviceNumber =
+    LogicalDeviceNumber;
+  CommandMailbox->LogicalDeviceInfo.IOCTL_Opcode = DAC960_V2_GetLogicalDeviceInfoValid;
+  CommandMailbox->LogicalDeviceInfo.DataTransferMemoryAddress
+				   .ScatterGatherSegments[0]
+				   .SegmentDataPointer =
+    	Controller->V2.NewLogicalDeviceInformationDMA;
+  CommandMailbox->LogicalDeviceInfo.DataTransferMemoryAddress
+				   .ScatterGatherSegments[0]
+				   .SegmentByteCount =
+    CommandMailbox->LogicalDeviceInfo.DataTransferSize;
+  DAC960_ExecuteCommand(Command);
+  CommandStatus = Command->V2.CommandStatus;
+  DAC960_DeallocateCommand(Command);
+  return (CommandStatus == DAC960_V2_NormalCompletion);
+}
+
+
+/*
+  DAC960_V2_PhysicalDeviceInfo executes a DAC960 V2 Firmware Controller "Read
+  Physical Device Information" IOCTL Command and waits for completion.  It
+  returns true on success and false on failure.
+
+  The Channel, TargetID, LogicalUnit arguments should be 0 the first time
+  this function is called for a given controller.  This will return data
+  for the "first" device on that controller.  The returned data includes a
+  Channel, TargetID, LogicalUnit that can be passed in to this routine to
+  get data for the NEXT device on that controller.
+
+  Data is stored in the controller's V2.NewPhysicalDeviceInfo dma-able
+  memory buffer.
+
+*/
+
+static bool DAC960_V2_NewPhysicalDeviceInfo(DAC960_Controller_T *Controller,
+					    unsigned char Channel,
+					    unsigned char TargetID,
+					    unsigned char LogicalUnit)
+{
+  DAC960_Command_T *Command = DAC960_AllocateCommand(Controller);
+  DAC960_V2_CommandMailbox_T *CommandMailbox = &Command->V2.CommandMailbox;
+  DAC960_V2_CommandStatus_T CommandStatus;
+
+  DAC960_V2_ClearCommand(Command);
+  Command->CommandType = DAC960_ImmediateCommand;
+  CommandMailbox->PhysicalDeviceInfo.CommandOpcode = DAC960_V2_IOCTL;
+  CommandMailbox->PhysicalDeviceInfo.CommandControlBits
+				    .DataTransferControllerToHost = true;
+  CommandMailbox->PhysicalDeviceInfo.CommandControlBits
+				    .NoAutoRequestSense = true;
+  CommandMailbox->PhysicalDeviceInfo.DataTransferSize =
+				sizeof(DAC960_V2_PhysicalDeviceInfo_T);
+  CommandMailbox->PhysicalDeviceInfo.PhysicalDevice.LogicalUnit = LogicalUnit;
+  CommandMailbox->PhysicalDeviceInfo.PhysicalDevice.TargetID = TargetID;
+  CommandMailbox->PhysicalDeviceInfo.PhysicalDevice.Channel = Channel;
+  CommandMailbox->PhysicalDeviceInfo.IOCTL_Opcode =
+					DAC960_V2_GetPhysicalDeviceInfoValid;
+  CommandMailbox->PhysicalDeviceInfo.DataTransferMemoryAddress
+				    .ScatterGatherSegments[0]
+				    .SegmentDataPointer =
+    					Controller->V2.NewPhysicalDeviceInformationDMA;
+  CommandMailbox->PhysicalDeviceInfo.DataTransferMemoryAddress
+				    .ScatterGatherSegments[0]
+				    .SegmentByteCount =
+    CommandMailbox->PhysicalDeviceInfo.DataTransferSize;
+  DAC960_ExecuteCommand(Command);
+  CommandStatus = Command->V2.CommandStatus;
+  DAC960_DeallocateCommand(Command);
+  return (CommandStatus == DAC960_V2_NormalCompletion);
+}
+
+
+static void DAC960_V2_ConstructNewUnitSerialNumber(
+	DAC960_Controller_T *Controller,
+	DAC960_V2_CommandMailbox_T *CommandMailbox, int Channel, int TargetID,
+	int LogicalUnit)
+{
+      CommandMailbox->SCSI_10.CommandOpcode = DAC960_V2_SCSI_10_Passthru;
+      CommandMailbox->SCSI_10.CommandControlBits
+			     .DataTransferControllerToHost = true;
+      CommandMailbox->SCSI_10.CommandControlBits
+			     .NoAutoRequestSense = true;
+      CommandMailbox->SCSI_10.DataTransferSize =
+	sizeof(DAC960_SCSI_Inquiry_UnitSerialNumber_T);
+      CommandMailbox->SCSI_10.PhysicalDevice.LogicalUnit = LogicalUnit;
+      CommandMailbox->SCSI_10.PhysicalDevice.TargetID = TargetID;
+      CommandMailbox->SCSI_10.PhysicalDevice.Channel = Channel;
+      CommandMailbox->SCSI_10.CDBLength = 6;
+      CommandMailbox->SCSI_10.SCSI_CDB[0] = 0x12; /* INQUIRY */
+      CommandMailbox->SCSI_10.SCSI_CDB[1] = 1; /* EVPD = 1 */
+      CommandMailbox->SCSI_10.SCSI_CDB[2] = 0x80; /* Page Code */
+      CommandMailbox->SCSI_10.SCSI_CDB[3] = 0; /* Reserved */
+      CommandMailbox->SCSI_10.SCSI_CDB[4] =
+	sizeof(DAC960_SCSI_Inquiry_UnitSerialNumber_T);
+      CommandMailbox->SCSI_10.SCSI_CDB[5] = 0; /* Control */
+      CommandMailbox->SCSI_10.DataTransferMemoryAddress
+			     .ScatterGatherSegments[0]
+			     .SegmentDataPointer =
+		Controller->V2.NewInquiryUnitSerialNumberDMA;
+      CommandMailbox->SCSI_10.DataTransferMemoryAddress
+			     .ScatterGatherSegments[0]
+			     .SegmentByteCount =
+		CommandMailbox->SCSI_10.DataTransferSize;
+}
+
+
+/*
+  DAC960_V2_NewUnitSerialNumber executes an SCSI pass-through
+  Inquiry command to a SCSI device identified by Channel number,
+  Target id, Logical Unit Number.  This function Waits for completion
+  of the command.
+
+  The return data includes Unit Serial Number information for the
+  specified device.
+
+  Data is stored in the controller's V2.NewPhysicalDeviceInfo dma-able
+  memory buffer.
+*/
+
+static bool DAC960_V2_NewInquiryUnitSerialNumber(DAC960_Controller_T *Controller,
+			int Channel, int TargetID, int LogicalUnit)
+{
+      DAC960_Command_T *Command;
+      DAC960_V2_CommandMailbox_T *CommandMailbox;
+      DAC960_V2_CommandStatus_T CommandStatus;
+
+      Command = DAC960_AllocateCommand(Controller);
+      CommandMailbox = &Command->V2.CommandMailbox;
+      DAC960_V2_ClearCommand(Command);
+      Command->CommandType = DAC960_ImmediateCommand;
+
+      DAC960_V2_ConstructNewUnitSerialNumber(Controller, CommandMailbox,
+			Channel, TargetID, LogicalUnit);
+
+      DAC960_ExecuteCommand(Command);
+      CommandStatus = Command->V2.CommandStatus;
+      DAC960_DeallocateCommand(Command);
+      return (CommandStatus == DAC960_V2_NormalCompletion);
+}
+
+
+/*
+  DAC960_V2_DeviceOperation executes a DAC960 V2 Firmware Controller Device
+  Operation IOCTL Command and waits for completion.  It returns true on
+  success and false on failure.
+*/
+
+static bool DAC960_V2_DeviceOperation(DAC960_Controller_T *Controller,
+					 DAC960_V2_IOCTL_Opcode_T IOCTL_Opcode,
+					 DAC960_V2_OperationDevice_T
+					   OperationDevice)
+{
+  DAC960_Command_T *Command = DAC960_AllocateCommand(Controller);
+  DAC960_V2_CommandMailbox_T *CommandMailbox = &Command->V2.CommandMailbox;
+  DAC960_V2_CommandStatus_T CommandStatus;
+  DAC960_V2_ClearCommand(Command);
+  Command->CommandType = DAC960_ImmediateCommand;
+  CommandMailbox->DeviceOperation.CommandOpcode = DAC960_V2_IOCTL;
+  CommandMailbox->DeviceOperation.CommandControlBits
+				 .DataTransferControllerToHost = true;
+  CommandMailbox->DeviceOperation.CommandControlBits
+    				 .NoAutoRequestSense = true;
+  CommandMailbox->DeviceOperation.IOCTL_Opcode = IOCTL_Opcode;
+  CommandMailbox->DeviceOperation.OperationDevice = OperationDevice;
+  DAC960_ExecuteCommand(Command);
+  CommandStatus = Command->V2.CommandStatus;
+  DAC960_DeallocateCommand(Command);
+  return (CommandStatus == DAC960_V2_NormalCompletion);
+}
+
+
+/*
+  DAC960_V1_EnableMemoryMailboxInterface enables the Memory Mailbox Interface
+  for DAC960 V1 Firmware Controllers.
+
+  PD and P controller types have no memory mailbox, but still need the
+  other dma mapped memory.
+*/
+
+static bool DAC960_V1_EnableMemoryMailboxInterface(DAC960_Controller_T
+						      *Controller)
+{
+  void __iomem *ControllerBaseAddress = Controller->BaseAddress;
+  DAC960_HardwareType_T hw_type = Controller->HardwareType;
+  struct pci_dev *PCI_Device = Controller->PCIDevice;
+  struct dma_loaf *DmaPages = &Controller->DmaPages;
+  size_t DmaPagesSize;
+  size_t CommandMailboxesSize;
+  size_t StatusMailboxesSize;
+
+  DAC960_V1_CommandMailbox_T *CommandMailboxesMemory;
+  dma_addr_t CommandMailboxesMemoryDMA;
+
+  DAC960_V1_StatusMailbox_T *StatusMailboxesMemory;
+  dma_addr_t StatusMailboxesMemoryDMA;
+
+  DAC960_V1_CommandMailbox_T CommandMailbox;
+  DAC960_V1_CommandStatus_T CommandStatus;
+  int TimeoutCounter;
+  int i;
+
+  
+  if (pci_set_dma_mask(Controller->PCIDevice, DMA_BIT_MASK(32)))
+	return DAC960_Failure(Controller, "DMA mask out of range");
+  Controller->BounceBufferLimit = DMA_BIT_MASK(32);
+
+  if ((hw_type == DAC960_PD_Controller) || (hw_type == DAC960_P_Controller)) {
+    CommandMailboxesSize =  0;
+    StatusMailboxesSize = 0;
+  } else {
+    CommandMailboxesSize =  DAC960_V1_CommandMailboxCount * sizeof(DAC960_V1_CommandMailbox_T);
+    StatusMailboxesSize = DAC960_V1_StatusMailboxCount * sizeof(DAC960_V1_StatusMailbox_T);
+  }
+  DmaPagesSize = CommandMailboxesSize + StatusMailboxesSize + 
+	sizeof(DAC960_V1_DCDB_T) + sizeof(DAC960_V1_Enquiry_T) +
+	sizeof(DAC960_V1_ErrorTable_T) + sizeof(DAC960_V1_EventLogEntry_T) +
+	sizeof(DAC960_V1_RebuildProgress_T) +
+	sizeof(DAC960_V1_LogicalDriveInformationArray_T) +
+	sizeof(DAC960_V1_BackgroundInitializationStatus_T) +
+	sizeof(DAC960_V1_DeviceState_T) + sizeof(DAC960_SCSI_Inquiry_T) +
+	sizeof(DAC960_SCSI_Inquiry_UnitSerialNumber_T);
+
+  if (!init_dma_loaf(PCI_Device, DmaPages, DmaPagesSize))
+	return false;
+
+
+  if ((hw_type == DAC960_PD_Controller) || (hw_type == DAC960_P_Controller)) 
+	goto skip_mailboxes;
+
+  CommandMailboxesMemory = slice_dma_loaf(DmaPages,
+                CommandMailboxesSize, &CommandMailboxesMemoryDMA);
+  
+  /* These are the base addresses for the command memory mailbox array */
+  Controller->V1.FirstCommandMailbox = CommandMailboxesMemory;
+  Controller->V1.FirstCommandMailboxDMA = CommandMailboxesMemoryDMA;
+
+  CommandMailboxesMemory += DAC960_V1_CommandMailboxCount - 1;
+  Controller->V1.LastCommandMailbox = CommandMailboxesMemory;
+  Controller->V1.NextCommandMailbox = Controller->V1.FirstCommandMailbox;
+  Controller->V1.PreviousCommandMailbox1 = Controller->V1.LastCommandMailbox;
+  Controller->V1.PreviousCommandMailbox2 =
+	  				Controller->V1.LastCommandMailbox - 1;
+
+  /* These are the base addresses for the status memory mailbox array */
+  StatusMailboxesMemory = slice_dma_loaf(DmaPages,
+                StatusMailboxesSize, &StatusMailboxesMemoryDMA);
+
+  Controller->V1.FirstStatusMailbox = StatusMailboxesMemory;
+  Controller->V1.FirstStatusMailboxDMA = StatusMailboxesMemoryDMA;
+  StatusMailboxesMemory += DAC960_V1_StatusMailboxCount - 1;
+  Controller->V1.LastStatusMailbox = StatusMailboxesMemory;
+  Controller->V1.NextStatusMailbox = Controller->V1.FirstStatusMailbox;
+
+skip_mailboxes:
+  Controller->V1.MonitoringDCDB = slice_dma_loaf(DmaPages,
+                sizeof(DAC960_V1_DCDB_T),
+                &Controller->V1.MonitoringDCDB_DMA);
+
+  Controller->V1.NewEnquiry = slice_dma_loaf(DmaPages,
+                sizeof(DAC960_V1_Enquiry_T),
+                &Controller->V1.NewEnquiryDMA);
+
+  Controller->V1.NewErrorTable = slice_dma_loaf(DmaPages,
+                sizeof(DAC960_V1_ErrorTable_T),
+                &Controller->V1.NewErrorTableDMA);
+
+  Controller->V1.EventLogEntry = slice_dma_loaf(DmaPages,
+                sizeof(DAC960_V1_EventLogEntry_T),
+                &Controller->V1.EventLogEntryDMA);
+
+  Controller->V1.RebuildProgress = slice_dma_loaf(DmaPages,
+                sizeof(DAC960_V1_RebuildProgress_T),
+                &Controller->V1.RebuildProgressDMA);
+
+  Controller->V1.NewLogicalDriveInformation = slice_dma_loaf(DmaPages,
+                sizeof(DAC960_V1_LogicalDriveInformationArray_T),
+                &Controller->V1.NewLogicalDriveInformationDMA);
+
+  Controller->V1.BackgroundInitializationStatus = slice_dma_loaf(DmaPages,
+                sizeof(DAC960_V1_BackgroundInitializationStatus_T),
+                &Controller->V1.BackgroundInitializationStatusDMA);
+
+  Controller->V1.NewDeviceState = slice_dma_loaf(DmaPages,
+                sizeof(DAC960_V1_DeviceState_T),
+                &Controller->V1.NewDeviceStateDMA);
+
+  Controller->V1.NewInquiryStandardData = slice_dma_loaf(DmaPages,
+                sizeof(DAC960_SCSI_Inquiry_T),
+                &Controller->V1.NewInquiryStandardDataDMA);
+
+  Controller->V1.NewInquiryUnitSerialNumber = slice_dma_loaf(DmaPages,
+                sizeof(DAC960_SCSI_Inquiry_UnitSerialNumber_T),
+                &Controller->V1.NewInquiryUnitSerialNumberDMA);
+
+  if ((hw_type == DAC960_PD_Controller) || (hw_type == DAC960_P_Controller))
+	return true;
+ 
+  /* Enable the Memory Mailbox Interface. */
+  Controller->V1.DualModeMemoryMailboxInterface = true;
+  CommandMailbox.TypeX.CommandOpcode = 0x2B;
+  CommandMailbox.TypeX.CommandIdentifier = 0;
+  CommandMailbox.TypeX.CommandOpcode2 = 0x14;
+  CommandMailbox.TypeX.CommandMailboxesBusAddress =
+    				Controller->V1.FirstCommandMailboxDMA;
+  CommandMailbox.TypeX.StatusMailboxesBusAddress =
+    				Controller->V1.FirstStatusMailboxDMA;
+#define TIMEOUT_COUNT 1000000
+
+  for (i = 0; i < 2; i++)
+    switch (Controller->HardwareType)
+      {
+      case DAC960_LA_Controller:
+	TimeoutCounter = TIMEOUT_COUNT;
+	while (--TimeoutCounter >= 0)
+	  {
+	    if (!DAC960_LA_HardwareMailboxFullP(ControllerBaseAddress))
+	      break;
+	    udelay(10);
+	  }
+	if (TimeoutCounter < 0) return false;
+	DAC960_LA_WriteHardwareMailbox(ControllerBaseAddress, &CommandMailbox);
+	DAC960_LA_HardwareMailboxNewCommand(ControllerBaseAddress);
+	TimeoutCounter = TIMEOUT_COUNT;
+	while (--TimeoutCounter >= 0)
+	  {
+	    if (DAC960_LA_HardwareMailboxStatusAvailableP(
+		  ControllerBaseAddress))
+	      break;
+	    udelay(10);
+	  }
+	if (TimeoutCounter < 0) return false;
+	CommandStatus = DAC960_LA_ReadStatusRegister(ControllerBaseAddress);
+	DAC960_LA_AcknowledgeHardwareMailboxInterrupt(ControllerBaseAddress);
+	DAC960_LA_AcknowledgeHardwareMailboxStatus(ControllerBaseAddress);
+	if (CommandStatus == DAC960_V1_NormalCompletion) return true;
+	Controller->V1.DualModeMemoryMailboxInterface = false;
+	CommandMailbox.TypeX.CommandOpcode2 = 0x10;
+	break;
+      case DAC960_PG_Controller:
+	TimeoutCounter = TIMEOUT_COUNT;
+	while (--TimeoutCounter >= 0)
+	  {
+	    if (!DAC960_PG_HardwareMailboxFullP(ControllerBaseAddress))
+	      break;
+	    udelay(10);
+	  }
+	if (TimeoutCounter < 0) return false;
+	DAC960_PG_WriteHardwareMailbox(ControllerBaseAddress, &CommandMailbox);
+	DAC960_PG_HardwareMailboxNewCommand(ControllerBaseAddress);
+
+	TimeoutCounter = TIMEOUT_COUNT;
+	while (--TimeoutCounter >= 0)
+	  {
+	    if (DAC960_PG_HardwareMailboxStatusAvailableP(
+		  ControllerBaseAddress))
+	      break;
+	    udelay(10);
+	  }
+	if (TimeoutCounter < 0) return false;
+	CommandStatus = DAC960_PG_ReadStatusRegister(ControllerBaseAddress);
+	DAC960_PG_AcknowledgeHardwareMailboxInterrupt(ControllerBaseAddress);
+	DAC960_PG_AcknowledgeHardwareMailboxStatus(ControllerBaseAddress);
+	if (CommandStatus == DAC960_V1_NormalCompletion) return true;
+	Controller->V1.DualModeMemoryMailboxInterface = false;
+	CommandMailbox.TypeX.CommandOpcode2 = 0x10;
+	break;
+      default:
+        DAC960_Failure(Controller, "Unknown Controller Type\n");
+	break;
+      }
+  return false;
+}
+
+
+/*
+  DAC960_V2_EnableMemoryMailboxInterface enables the Memory Mailbox Interface
+  for DAC960 V2 Firmware Controllers.
+
+  Aggregate the space needed for the controller's memory mailbox and
+  the other data structures that will be targets of dma transfers with
+  the controller.  Allocate a dma-mapped region of memory to hold these
+  structures.  Then, save CPU pointers and dma_addr_t values to reference
+  the structures that are contained in that region.
+*/
+
+static bool DAC960_V2_EnableMemoryMailboxInterface(DAC960_Controller_T
+						      *Controller)
+{
+  void __iomem *ControllerBaseAddress = Controller->BaseAddress;
+  struct pci_dev *PCI_Device = Controller->PCIDevice;
+  struct dma_loaf *DmaPages = &Controller->DmaPages;
+  size_t DmaPagesSize;
+  size_t CommandMailboxesSize;
+  size_t StatusMailboxesSize;
+
+  DAC960_V2_CommandMailbox_T *CommandMailboxesMemory;
+  dma_addr_t CommandMailboxesMemoryDMA;
+
+  DAC960_V2_StatusMailbox_T *StatusMailboxesMemory;
+  dma_addr_t StatusMailboxesMemoryDMA;
+
+  DAC960_V2_CommandMailbox_T *CommandMailbox;
+  dma_addr_t	CommandMailboxDMA;
+  DAC960_V2_CommandStatus_T CommandStatus;
+
+	if (!pci_set_dma_mask(Controller->PCIDevice, DMA_BIT_MASK(64)))
+		Controller->BounceBufferLimit = DMA_BIT_MASK(64);
+	else if (!pci_set_dma_mask(Controller->PCIDevice, DMA_BIT_MASK(32)))
+		Controller->BounceBufferLimit = DMA_BIT_MASK(32);
+	else
+		return DAC960_Failure(Controller, "DMA mask out of range");
+
+  /* This is a temporary dma mapping, used only in the scope of this function */
+  CommandMailbox = pci_alloc_consistent(PCI_Device,
+		sizeof(DAC960_V2_CommandMailbox_T), &CommandMailboxDMA);
+  if (CommandMailbox == NULL)
+	  return false;
+
+  CommandMailboxesSize = DAC960_V2_CommandMailboxCount * sizeof(DAC960_V2_CommandMailbox_T);
+  StatusMailboxesSize = DAC960_V2_StatusMailboxCount * sizeof(DAC960_V2_StatusMailbox_T);
+  DmaPagesSize =
+    CommandMailboxesSize + StatusMailboxesSize +
+    sizeof(DAC960_V2_HealthStatusBuffer_T) +
+    sizeof(DAC960_V2_ControllerInfo_T) +
+    sizeof(DAC960_V2_LogicalDeviceInfo_T) +
+    sizeof(DAC960_V2_PhysicalDeviceInfo_T) +
+    sizeof(DAC960_SCSI_Inquiry_UnitSerialNumber_T) +
+    sizeof(DAC960_V2_Event_T) +
+    sizeof(DAC960_V2_PhysicalToLogicalDevice_T);
+
+  if (!init_dma_loaf(PCI_Device, DmaPages, DmaPagesSize)) {
+  	pci_free_consistent(PCI_Device, sizeof(DAC960_V2_CommandMailbox_T),
+					CommandMailbox, CommandMailboxDMA);
+	return false;
+  }
+
+  CommandMailboxesMemory = slice_dma_loaf(DmaPages,
+		CommandMailboxesSize, &CommandMailboxesMemoryDMA);
+
+  /* These are the base addresses for the command memory mailbox array */
+  Controller->V2.FirstCommandMailbox = CommandMailboxesMemory;
+  Controller->V2.FirstCommandMailboxDMA = CommandMailboxesMemoryDMA;
+
+  CommandMailboxesMemory += DAC960_V2_CommandMailboxCount - 1;
+  Controller->V2.LastCommandMailbox = CommandMailboxesMemory;
+  Controller->V2.NextCommandMailbox = Controller->V2.FirstCommandMailbox;
+  Controller->V2.PreviousCommandMailbox1 = Controller->V2.LastCommandMailbox;
+  Controller->V2.PreviousCommandMailbox2 =
+    					Controller->V2.LastCommandMailbox - 1;
+
+  /* These are the base addresses for the status memory mailbox array */
+  StatusMailboxesMemory = slice_dma_loaf(DmaPages,
+		StatusMailboxesSize, &StatusMailboxesMemoryDMA);
+
+  Controller->V2.FirstStatusMailbox = StatusMailboxesMemory;
+  Controller->V2.FirstStatusMailboxDMA = StatusMailboxesMemoryDMA;
+  StatusMailboxesMemory += DAC960_V2_StatusMailboxCount - 1;
+  Controller->V2.LastStatusMailbox = StatusMailboxesMemory;
+  Controller->V2.NextStatusMailbox = Controller->V2.FirstStatusMailbox;
+
+  Controller->V2.HealthStatusBuffer = slice_dma_loaf(DmaPages,
+		sizeof(DAC960_V2_HealthStatusBuffer_T),
+		&Controller->V2.HealthStatusBufferDMA);
+
+  Controller->V2.NewControllerInformation = slice_dma_loaf(DmaPages,
+                sizeof(DAC960_V2_ControllerInfo_T), 
+                &Controller->V2.NewControllerInformationDMA);
+
+  Controller->V2.NewLogicalDeviceInformation =  slice_dma_loaf(DmaPages,
+                sizeof(DAC960_V2_LogicalDeviceInfo_T),
+                &Controller->V2.NewLogicalDeviceInformationDMA);
+
+  Controller->V2.NewPhysicalDeviceInformation = slice_dma_loaf(DmaPages,
+                sizeof(DAC960_V2_PhysicalDeviceInfo_T),
+                &Controller->V2.NewPhysicalDeviceInformationDMA);
+
+  Controller->V2.NewInquiryUnitSerialNumber = slice_dma_loaf(DmaPages,
+                sizeof(DAC960_SCSI_Inquiry_UnitSerialNumber_T),
+                &Controller->V2.NewInquiryUnitSerialNumberDMA);
+
+  Controller->V2.Event = slice_dma_loaf(DmaPages,
+                sizeof(DAC960_V2_Event_T),
+                &Controller->V2.EventDMA);
+
+  Controller->V2.PhysicalToLogicalDevice = slice_dma_loaf(DmaPages,
+                sizeof(DAC960_V2_PhysicalToLogicalDevice_T),
+                &Controller->V2.PhysicalToLogicalDeviceDMA);
+
+  /*
+    Enable the Memory Mailbox Interface.
+    
+    I don't know why we can't just use one of the memory mailboxes
+    we just allocated to do this, instead of using this temporary one.
+    Try this change later.
+  */
+  memset(CommandMailbox, 0, sizeof(DAC960_V2_CommandMailbox_T));
+  CommandMailbox->SetMemoryMailbox.CommandIdentifier = 1;
+  CommandMailbox->SetMemoryMailbox.CommandOpcode = DAC960_V2_IOCTL;
+  CommandMailbox->SetMemoryMailbox.CommandControlBits.NoAutoRequestSense = true;
+  CommandMailbox->SetMemoryMailbox.FirstCommandMailboxSizeKB =
+    (DAC960_V2_CommandMailboxCount * sizeof(DAC960_V2_CommandMailbox_T)) >> 10;
+  CommandMailbox->SetMemoryMailbox.FirstStatusMailboxSizeKB =
+    (DAC960_V2_StatusMailboxCount * sizeof(DAC960_V2_StatusMailbox_T)) >> 10;
+  CommandMailbox->SetMemoryMailbox.SecondCommandMailboxSizeKB = 0;
+  CommandMailbox->SetMemoryMailbox.SecondStatusMailboxSizeKB = 0;
+  CommandMailbox->SetMemoryMailbox.RequestSenseSize = 0;
+  CommandMailbox->SetMemoryMailbox.IOCTL_Opcode = DAC960_V2_SetMemoryMailbox;
+  CommandMailbox->SetMemoryMailbox.HealthStatusBufferSizeKB = 1;
+  CommandMailbox->SetMemoryMailbox.HealthStatusBufferBusAddress =
+    					Controller->V2.HealthStatusBufferDMA;
+  CommandMailbox->SetMemoryMailbox.FirstCommandMailboxBusAddress =
+    					Controller->V2.FirstCommandMailboxDMA;
+  CommandMailbox->SetMemoryMailbox.FirstStatusMailboxBusAddress =
+    					Controller->V2.FirstStatusMailboxDMA;
+  switch (Controller->HardwareType)
+    {
+    case DAC960_GEM_Controller:
+      while (DAC960_GEM_HardwareMailboxFullP(ControllerBaseAddress))
+	udelay(1);
+      DAC960_GEM_WriteHardwareMailbox(ControllerBaseAddress, CommandMailboxDMA);
+      DAC960_GEM_HardwareMailboxNewCommand(ControllerBaseAddress);
+      while (!DAC960_GEM_HardwareMailboxStatusAvailableP(ControllerBaseAddress))
+	udelay(1);
+      CommandStatus = DAC960_GEM_ReadCommandStatus(ControllerBaseAddress);
+      DAC960_GEM_AcknowledgeHardwareMailboxInterrupt(ControllerBaseAddress);
+      DAC960_GEM_AcknowledgeHardwareMailboxStatus(ControllerBaseAddress);
+      break;
+    case DAC960_BA_Controller:
+      while (DAC960_BA_HardwareMailboxFullP(ControllerBaseAddress))
+	udelay(1);
+      DAC960_BA_WriteHardwareMailbox(ControllerBaseAddress, CommandMailboxDMA);
+      DAC960_BA_HardwareMailboxNewCommand(ControllerBaseAddress);
+      while (!DAC960_BA_HardwareMailboxStatusAvailableP(ControllerBaseAddress))
+	udelay(1);
+      CommandStatus = DAC960_BA_ReadCommandStatus(ControllerBaseAddress);
+      DAC960_BA_AcknowledgeHardwareMailboxInterrupt(ControllerBaseAddress);
+      DAC960_BA_AcknowledgeHardwareMailboxStatus(ControllerBaseAddress);
+      break;
+    case DAC960_LP_Controller:
+      while (DAC960_LP_HardwareMailboxFullP(ControllerBaseAddress))
+	udelay(1);
+      DAC960_LP_WriteHardwareMailbox(ControllerBaseAddress, CommandMailboxDMA);
+      DAC960_LP_HardwareMailboxNewCommand(ControllerBaseAddress);
+      while (!DAC960_LP_HardwareMailboxStatusAvailableP(ControllerBaseAddress))
+	udelay(1);
+      CommandStatus = DAC960_LP_ReadCommandStatus(ControllerBaseAddress);
+      DAC960_LP_AcknowledgeHardwareMailboxInterrupt(ControllerBaseAddress);
+      DAC960_LP_AcknowledgeHardwareMailboxStatus(ControllerBaseAddress);
+      break;
+    default:
+      DAC960_Failure(Controller, "Unknown Controller Type\n");
+      CommandStatus = DAC960_V2_AbormalCompletion;
+      break;
+    }
+  pci_free_consistent(PCI_Device, sizeof(DAC960_V2_CommandMailbox_T),
+					CommandMailbox, CommandMailboxDMA);
+  return (CommandStatus == DAC960_V2_NormalCompletion);
+}
+
+
+/*
+  DAC960_V1_ReadControllerConfiguration reads the Configuration Information
+  from DAC960 V1 Firmware Controllers and initializes the Controller structure.
+*/
+
+static bool DAC960_V1_ReadControllerConfiguration(DAC960_Controller_T
+						     *Controller)
+{
+  DAC960_V1_Enquiry2_T *Enquiry2;
+  dma_addr_t Enquiry2DMA;
+  DAC960_V1_Config2_T *Config2;
+  dma_addr_t Config2DMA;
+  int LogicalDriveNumber, Channel, TargetID;
+  struct dma_loaf local_dma;
+
+  if (!init_dma_loaf(Controller->PCIDevice, &local_dma,
+		sizeof(DAC960_V1_Enquiry2_T) + sizeof(DAC960_V1_Config2_T)))
+	return DAC960_Failure(Controller, "LOGICAL DEVICE ALLOCATION");
+
+  Enquiry2 = slice_dma_loaf(&local_dma, sizeof(DAC960_V1_Enquiry2_T), &Enquiry2DMA);
+  Config2 = slice_dma_loaf(&local_dma, sizeof(DAC960_V1_Config2_T), &Config2DMA);
+
+  if (!DAC960_V1_ExecuteType3(Controller, DAC960_V1_Enquiry,
+			      Controller->V1.NewEnquiryDMA)) {
+    free_dma_loaf(Controller->PCIDevice, &local_dma);
+    return DAC960_Failure(Controller, "ENQUIRY");
+  }
+  memcpy(&Controller->V1.Enquiry, Controller->V1.NewEnquiry,
+						sizeof(DAC960_V1_Enquiry_T));
+
+  if (!DAC960_V1_ExecuteType3(Controller, DAC960_V1_Enquiry2, Enquiry2DMA)) {
+    free_dma_loaf(Controller->PCIDevice, &local_dma);
+    return DAC960_Failure(Controller, "ENQUIRY2");
+  }
+
+  if (!DAC960_V1_ExecuteType3(Controller, DAC960_V1_ReadConfig2, Config2DMA)) {
+    free_dma_loaf(Controller->PCIDevice, &local_dma);
+    return DAC960_Failure(Controller, "READ CONFIG2");
+  }
+
+  if (!DAC960_V1_ExecuteType3(Controller, DAC960_V1_GetLogicalDriveInformation,
+			      Controller->V1.NewLogicalDriveInformationDMA)) {
+    free_dma_loaf(Controller->PCIDevice, &local_dma);
+    return DAC960_Failure(Controller, "GET LOGICAL DRIVE INFORMATION");
+  }
+  memcpy(&Controller->V1.LogicalDriveInformation,
+		Controller->V1.NewLogicalDriveInformation,
+		sizeof(DAC960_V1_LogicalDriveInformationArray_T));
+
+  for (Channel = 0; Channel < Enquiry2->ActualChannels; Channel++)
+    for (TargetID = 0; TargetID < Enquiry2->MaxTargets; TargetID++) {
+      if (!DAC960_V1_ExecuteType3D(Controller, DAC960_V1_GetDeviceState,
+				   Channel, TargetID,
+				   Controller->V1.NewDeviceStateDMA)) {
+    		free_dma_loaf(Controller->PCIDevice, &local_dma);
+		return DAC960_Failure(Controller, "GET DEVICE STATE");
+	}
+	memcpy(&Controller->V1.DeviceState[Channel][TargetID],
+		Controller->V1.NewDeviceState, sizeof(DAC960_V1_DeviceState_T));
+     }
+  /*
+    Initialize the Controller Model Name and Full Model Name fields.
+  */
+  switch (Enquiry2->HardwareID.SubModel)
+    {
+    case DAC960_V1_P_PD_PU:
+      if (Enquiry2->SCSICapability.BusSpeed == DAC960_V1_Ultra)
+	strcpy(Controller->ModelName, "DAC960PU");
+      else strcpy(Controller->ModelName, "DAC960PD");
+      break;
+    case DAC960_V1_PL:
+      strcpy(Controller->ModelName, "DAC960PL");
+      break;
+    case DAC960_V1_PG:
+      strcpy(Controller->ModelName, "DAC960PG");
+      break;
+    case DAC960_V1_PJ:
+      strcpy(Controller->ModelName, "DAC960PJ");
+      break;
+    case DAC960_V1_PR:
+      strcpy(Controller->ModelName, "DAC960PR");
+      break;
+    case DAC960_V1_PT:
+      strcpy(Controller->ModelName, "DAC960PT");
+      break;
+    case DAC960_V1_PTL0:
+      strcpy(Controller->ModelName, "DAC960PTL0");
+      break;
+    case DAC960_V1_PRL:
+      strcpy(Controller->ModelName, "DAC960PRL");
+      break;
+    case DAC960_V1_PTL1:
+      strcpy(Controller->ModelName, "DAC960PTL1");
+      break;
+    case DAC960_V1_1164P:
+      strcpy(Controller->ModelName, "DAC1164P");
+      break;
+    default:
+      free_dma_loaf(Controller->PCIDevice, &local_dma);
+      return DAC960_Failure(Controller, "MODEL VERIFICATION");
+    }
+  strcpy(Controller->FullModelName, "Mylex ");
+  strcat(Controller->FullModelName, Controller->ModelName);
+  /*
+    Initialize the Controller Firmware Version field and verify that it
+    is a supported firmware version.  The supported firmware versions are:
+
+    DAC1164P		    5.06 and above
+    DAC960PTL/PRL/PJ/PG	    4.06 and above
+    DAC960PU/PD/PL	    3.51 and above
+    DAC960PU/PD/PL/P	    2.73 and above
+  */
+#if defined(CONFIG_ALPHA)
+  /*
+    DEC Alpha machines were often equipped with DAC960 cards that were
+    OEMed from Mylex, and had their own custom firmware. Version 2.70,
+    the last custom FW revision to be released by DEC for these older
+    controllers, appears to work quite well with this driver.
+
+    Cards tested successfully were several versions each of the PD and
+    PU, called by DEC the KZPSC and KZPAC, respectively, and having
+    the Manufacturer Numbers (from Mylex), usually on a sticker on the
+    back of the board, of:
+
+    KZPSC:  D040347 (1-channel) or D040348 (2-channel) or D040349 (3-channel)
+    KZPAC:  D040395 (1-channel) or D040396 (2-channel) or D040397 (3-channel)
+  */
+# define FIRMWARE_27X	"2.70"
+#else
+# define FIRMWARE_27X	"2.73"
+#endif
+
+  if (Enquiry2->FirmwareID.MajorVersion == 0)
+    {
+      Enquiry2->FirmwareID.MajorVersion =
+	Controller->V1.Enquiry.MajorFirmwareVersion;
+      Enquiry2->FirmwareID.MinorVersion =
+	Controller->V1.Enquiry.MinorFirmwareVersion;
+      Enquiry2->FirmwareID.FirmwareType = '0';
+      Enquiry2->FirmwareID.TurnID = 0;
+    }
+  sprintf(Controller->FirmwareVersion, "%d.%02d-%c-%02d",
+	  Enquiry2->FirmwareID.MajorVersion, Enquiry2->FirmwareID.MinorVersion,
+	  Enquiry2->FirmwareID.FirmwareType, Enquiry2->FirmwareID.TurnID);
+  if (!((Controller->FirmwareVersion[0] == '5' &&
+	 strcmp(Controller->FirmwareVersion, "5.06") >= 0) ||
+	(Controller->FirmwareVersion[0] == '4' &&
+	 strcmp(Controller->FirmwareVersion, "4.06") >= 0) ||
+	(Controller->FirmwareVersion[0] == '3' &&
+	 strcmp(Controller->FirmwareVersion, "3.51") >= 0) ||
+	(Controller->FirmwareVersion[0] == '2' &&
+	 strcmp(Controller->FirmwareVersion, FIRMWARE_27X) >= 0)))
+    {
+      DAC960_Failure(Controller, "FIRMWARE VERSION VERIFICATION");
+      DAC960_Error("Firmware Version = '%s'\n", Controller,
+		   Controller->FirmwareVersion);
+      free_dma_loaf(Controller->PCIDevice, &local_dma);
+      return false;
+    }
+  /*
+    Initialize the Controller Channels, Targets, Memory Size, and SAF-TE
+    Enclosure Management Enabled fields.
+  */
+  Controller->Channels = Enquiry2->ActualChannels;
+  Controller->Targets = Enquiry2->MaxTargets;
+  Controller->MemorySize = Enquiry2->MemorySize >> 20;
+  Controller->V1.SAFTE_EnclosureManagementEnabled =
+    (Enquiry2->FaultManagementType == DAC960_V1_SAFTE);
+  /*
+    Initialize the Controller Queue Depth, Driver Queue Depth, Logical Drive
+    Count, Maximum Blocks per Command, Controller Scatter/Gather Limit, and
+    Driver Scatter/Gather Limit.  The Driver Queue Depth must be at most one
+    less than the Controller Queue Depth to allow for an automatic drive
+    rebuild operation.
+  */
+  Controller->ControllerQueueDepth = Controller->V1.Enquiry.MaxCommands;
+  Controller->DriverQueueDepth = Controller->ControllerQueueDepth - 1;
+  if (Controller->DriverQueueDepth > DAC960_MaxDriverQueueDepth)
+    Controller->DriverQueueDepth = DAC960_MaxDriverQueueDepth;
+  Controller->LogicalDriveCount =
+    Controller->V1.Enquiry.NumberOfLogicalDrives;
+  Controller->MaxBlocksPerCommand = Enquiry2->MaxBlocksPerCommand;
+  Controller->ControllerScatterGatherLimit = Enquiry2->MaxScatterGatherEntries;
+  Controller->DriverScatterGatherLimit =
+    Controller->ControllerScatterGatherLimit;
+  if (Controller->DriverScatterGatherLimit > DAC960_V1_ScatterGatherLimit)
+    Controller->DriverScatterGatherLimit = DAC960_V1_ScatterGatherLimit;
+  /*
+    Initialize the Stripe Size, Segment Size, and Geometry Translation.
+  */
+  Controller->V1.StripeSize = Config2->BlocksPerStripe * Config2->BlockFactor
+			      >> (10 - DAC960_BlockSizeBits);
+  Controller->V1.SegmentSize = Config2->BlocksPerCacheLine * Config2->BlockFactor
+			       >> (10 - DAC960_BlockSizeBits);
+  switch (Config2->DriveGeometry)
+    {
+    case DAC960_V1_Geometry_128_32:
+      Controller->V1.GeometryTranslationHeads = 128;
+      Controller->V1.GeometryTranslationSectors = 32;
+      break;
+    case DAC960_V1_Geometry_255_63:
+      Controller->V1.GeometryTranslationHeads = 255;
+      Controller->V1.GeometryTranslationSectors = 63;
+      break;
+    default:
+      free_dma_loaf(Controller->PCIDevice, &local_dma);
+      return DAC960_Failure(Controller, "CONFIG2 DRIVE GEOMETRY");
+    }
+  /*
+    Initialize the Background Initialization Status.
+  */
+  if ((Controller->FirmwareVersion[0] == '4' &&
+      strcmp(Controller->FirmwareVersion, "4.08") >= 0) ||
+      (Controller->FirmwareVersion[0] == '5' &&
+       strcmp(Controller->FirmwareVersion, "5.08") >= 0))
+    {
+      Controller->V1.BackgroundInitializationStatusSupported = true;
+      DAC960_V1_ExecuteType3B(Controller,
+			      DAC960_V1_BackgroundInitializationControl, 0x20,
+			      Controller->
+			       V1.BackgroundInitializationStatusDMA);
+      memcpy(&Controller->V1.LastBackgroundInitializationStatus,
+		Controller->V1.BackgroundInitializationStatus,
+		sizeof(DAC960_V1_BackgroundInitializationStatus_T));
+    }
+  /*
+    Initialize the Logical Drive Initially Accessible flag.
+  */
+  for (LogicalDriveNumber = 0;
+       LogicalDriveNumber < Controller->LogicalDriveCount;
+       LogicalDriveNumber++)
+    if (Controller->V1.LogicalDriveInformation
+		       [LogicalDriveNumber].LogicalDriveState !=
+	DAC960_V1_LogicalDrive_Offline)
+      Controller->LogicalDriveInitiallyAccessible[LogicalDriveNumber] = true;
+  Controller->V1.LastRebuildStatus = DAC960_V1_NoRebuildOrCheckInProgress;
+  free_dma_loaf(Controller->PCIDevice, &local_dma);
+  return true;
+}
+
+
+/*
+  DAC960_V2_ReadControllerConfiguration reads the Configuration Information
+  from DAC960 V2 Firmware Controllers and initializes the Controller structure.
+*/
+
+static bool DAC960_V2_ReadControllerConfiguration(DAC960_Controller_T
+						     *Controller)
+{
+  DAC960_V2_ControllerInfo_T *ControllerInfo =
+    		&Controller->V2.ControllerInformation;
+  unsigned short LogicalDeviceNumber = 0;
+  int ModelNameLength;
+
+  /* Get data into dma-able area, then copy into permanent location */
+  if (!DAC960_V2_NewControllerInfo(Controller))
+    return DAC960_Failure(Controller, "GET CONTROLLER INFO");
+  memcpy(ControllerInfo, Controller->V2.NewControllerInformation,
+			sizeof(DAC960_V2_ControllerInfo_T));
+	 
+  
+  if (!DAC960_V2_GeneralInfo(Controller))
+    return DAC960_Failure(Controller, "GET HEALTH STATUS");
+
+  /*
+    Initialize the Controller Model Name and Full Model Name fields.
+  */
+  ModelNameLength = sizeof(ControllerInfo->ControllerName);
+  if (ModelNameLength > sizeof(Controller->ModelName)-1)
+    ModelNameLength = sizeof(Controller->ModelName)-1;
+  memcpy(Controller->ModelName, ControllerInfo->ControllerName,
+	 ModelNameLength);
+  ModelNameLength--;
+  while (Controller->ModelName[ModelNameLength] == ' ' ||
+	 Controller->ModelName[ModelNameLength] == '\0')
+    ModelNameLength--;
+  Controller->ModelName[++ModelNameLength] = '\0';
+  strcpy(Controller->FullModelName, "Mylex ");
+  strcat(Controller->FullModelName, Controller->ModelName);
+  /*
+    Initialize the Controller Firmware Version field.
+  */
+  sprintf(Controller->FirmwareVersion, "%d.%02d-%02d",
+	  ControllerInfo->FirmwareMajorVersion,
+	  ControllerInfo->FirmwareMinorVersion,
+	  ControllerInfo->FirmwareTurnNumber);
+  if (ControllerInfo->FirmwareMajorVersion == 6 &&
+      ControllerInfo->FirmwareMinorVersion == 0 &&
+      ControllerInfo->FirmwareTurnNumber < 1)
+    {
+      DAC960_Info("FIRMWARE VERSION %s DOES NOT PROVIDE THE CONTROLLER\n",
+		  Controller, Controller->FirmwareVersion);
+      DAC960_Info("STATUS MONITORING FUNCTIONALITY NEEDED BY THIS DRIVER.\n",
+		  Controller);
+      DAC960_Info("PLEASE UPGRADE TO VERSION 6.00-01 OR ABOVE.\n",
+		  Controller);
+    }
+  /*
+    Initialize the Controller Channels, Targets, and Memory Size.
+  */
+  Controller->Channels = ControllerInfo->NumberOfPhysicalChannelsPresent;
+  Controller->Targets =
+    ControllerInfo->MaximumTargetsPerChannel
+		    [ControllerInfo->NumberOfPhysicalChannelsPresent-1];
+  Controller->MemorySize = ControllerInfo->MemorySizeMB;
+  /*
+    Initialize the Controller Queue Depth, Driver Queue Depth, Logical Drive
+    Count, Maximum Blocks per Command, Controller Scatter/Gather Limit, and
+    Driver Scatter/Gather Limit.  The Driver Queue Depth must be at most one
+    less than the Controller Queue Depth to allow for an automatic drive
+    rebuild operation.
+  */
+  Controller->ControllerQueueDepth = ControllerInfo->MaximumParallelCommands;
+  Controller->DriverQueueDepth = Controller->ControllerQueueDepth - 1;
+  if (Controller->DriverQueueDepth > DAC960_MaxDriverQueueDepth)
+    Controller->DriverQueueDepth = DAC960_MaxDriverQueueDepth;
+  Controller->LogicalDriveCount = ControllerInfo->LogicalDevicesPresent;
+  Controller->MaxBlocksPerCommand =
+    ControllerInfo->MaximumDataTransferSizeInBlocks;
+  Controller->ControllerScatterGatherLimit =
+    ControllerInfo->MaximumScatterGatherEntries;
+  Controller->DriverScatterGatherLimit =
+    Controller->ControllerScatterGatherLimit;
+  if (Controller->DriverScatterGatherLimit > DAC960_V2_ScatterGatherLimit)
+    Controller->DriverScatterGatherLimit = DAC960_V2_ScatterGatherLimit;
+  /*
+    Initialize the Logical Device Information.
+  */
+  while (true)
+    {
+      DAC960_V2_LogicalDeviceInfo_T *NewLogicalDeviceInfo =
+	Controller->V2.NewLogicalDeviceInformation;
+      DAC960_V2_LogicalDeviceInfo_T *LogicalDeviceInfo;
+      DAC960_V2_PhysicalDevice_T PhysicalDevice;
+
+      if (!DAC960_V2_NewLogicalDeviceInfo(Controller, LogicalDeviceNumber))
+	break;
+      LogicalDeviceNumber = NewLogicalDeviceInfo->LogicalDeviceNumber;
+      if (LogicalDeviceNumber >= DAC960_MaxLogicalDrives) {
+	DAC960_Error("DAC960: Logical Drive Number %d not supported\n",
+		       Controller, LogicalDeviceNumber);
+		break;
+      }
+      if (NewLogicalDeviceInfo->DeviceBlockSizeInBytes != DAC960_BlockSize) {
+	DAC960_Error("DAC960: Logical Drive Block Size %d not supported\n",
+	      Controller, NewLogicalDeviceInfo->DeviceBlockSizeInBytes);
+        LogicalDeviceNumber++;
+        continue;
+      }
+      PhysicalDevice.Controller = 0;
+      PhysicalDevice.Channel = NewLogicalDeviceInfo->Channel;
+      PhysicalDevice.TargetID = NewLogicalDeviceInfo->TargetID;
+      PhysicalDevice.LogicalUnit = NewLogicalDeviceInfo->LogicalUnit;
+      Controller->V2.LogicalDriveToVirtualDevice[LogicalDeviceNumber] =
+	PhysicalDevice;
+      if (NewLogicalDeviceInfo->LogicalDeviceState !=
+	  DAC960_V2_LogicalDevice_Offline)
+	Controller->LogicalDriveInitiallyAccessible[LogicalDeviceNumber] = true;
+      LogicalDeviceInfo = kmalloc(sizeof(DAC960_V2_LogicalDeviceInfo_T),
+				   GFP_ATOMIC);
+      if (LogicalDeviceInfo == NULL)
+	return DAC960_Failure(Controller, "LOGICAL DEVICE ALLOCATION");
+      Controller->V2.LogicalDeviceInformation[LogicalDeviceNumber] =
+	LogicalDeviceInfo;
+      memcpy(LogicalDeviceInfo, NewLogicalDeviceInfo,
+	     sizeof(DAC960_V2_LogicalDeviceInfo_T));
+      LogicalDeviceNumber++;
+    }
+  return true;
+}
+
+
+/*
+  DAC960_ReportControllerConfiguration reports the Configuration Information
+  for Controller.
+*/
+
+static bool DAC960_ReportControllerConfiguration(DAC960_Controller_T
+						    *Controller)
+{
+  DAC960_Info("Configuring Mylex %s PCI RAID Controller\n",
+	      Controller, Controller->ModelName);
+  DAC960_Info("  Firmware Version: %s, Channels: %d, Memory Size: %dMB\n",
+	      Controller, Controller->FirmwareVersion,
+	      Controller->Channels, Controller->MemorySize);
+  DAC960_Info("  PCI Bus: %d, Device: %d, Function: %d, I/O Address: ",
+	      Controller, Controller->Bus,
+	      Controller->Device, Controller->Function);
+  if (Controller->IO_Address == 0)
+    DAC960_Info("Unassigned\n", Controller);
+  else DAC960_Info("0x%X\n", Controller, Controller->IO_Address);
+  DAC960_Info("  PCI Address: 0x%X mapped at 0x%lX, IRQ Channel: %d\n",
+	      Controller, Controller->PCI_Address,
+	      (unsigned long) Controller->BaseAddress,
+	      Controller->IRQ_Channel);
+  DAC960_Info("  Controller Queue Depth: %d, "
+	      "Maximum Blocks per Command: %d\n",
+	      Controller, Controller->ControllerQueueDepth,
+	      Controller->MaxBlocksPerCommand);
+  DAC960_Info("  Driver Queue Depth: %d, "
+	      "Scatter/Gather Limit: %d of %d Segments\n",
+	      Controller, Controller->DriverQueueDepth,
+	      Controller->DriverScatterGatherLimit,
+	      Controller->ControllerScatterGatherLimit);
+  if (Controller->FirmwareType == DAC960_V1_Controller)
+    {
+      DAC960_Info("  Stripe Size: %dKB, Segment Size: %dKB, "
+		  "BIOS Geometry: %d/%d\n", Controller,
+		  Controller->V1.StripeSize,
+		  Controller->V1.SegmentSize,
+		  Controller->V1.GeometryTranslationHeads,
+		  Controller->V1.GeometryTranslationSectors);
+      if (Controller->V1.SAFTE_EnclosureManagementEnabled)
+	DAC960_Info("  SAF-TE Enclosure Management Enabled\n", Controller);
+    }
+  return true;
+}
+
+
+/*
+  DAC960_V1_ReadDeviceConfiguration reads the Device Configuration Information
+  for DAC960 V1 Firmware Controllers by requesting the SCSI Inquiry and SCSI
+  Inquiry Unit Serial Number information for each device connected to
+  Controller.
+*/
+
+static bool DAC960_V1_ReadDeviceConfiguration(DAC960_Controller_T
+						 *Controller)
+{
+  struct dma_loaf local_dma;
+
+  dma_addr_t DCDBs_dma[DAC960_V1_MaxChannels];
+  DAC960_V1_DCDB_T *DCDBs_cpu[DAC960_V1_MaxChannels];
+
+  dma_addr_t SCSI_Inquiry_dma[DAC960_V1_MaxChannels];
+  DAC960_SCSI_Inquiry_T *SCSI_Inquiry_cpu[DAC960_V1_MaxChannels];
+
+  dma_addr_t SCSI_NewInquiryUnitSerialNumberDMA[DAC960_V1_MaxChannels];
+  DAC960_SCSI_Inquiry_UnitSerialNumber_T *SCSI_NewInquiryUnitSerialNumberCPU[DAC960_V1_MaxChannels];
+
+  struct completion Completions[DAC960_V1_MaxChannels];
+  unsigned long flags;
+  int Channel, TargetID;
+
+  if (!init_dma_loaf(Controller->PCIDevice, &local_dma, 
+		DAC960_V1_MaxChannels*(sizeof(DAC960_V1_DCDB_T) +
+			sizeof(DAC960_SCSI_Inquiry_T) +
+			sizeof(DAC960_SCSI_Inquiry_UnitSerialNumber_T))))
+     return DAC960_Failure(Controller,
+                        "DMA ALLOCATION FAILED IN ReadDeviceConfiguration"); 
+   
+  for (Channel = 0; Channel < Controller->Channels; Channel++) {
+	DCDBs_cpu[Channel] = slice_dma_loaf(&local_dma,
+			sizeof(DAC960_V1_DCDB_T), DCDBs_dma + Channel);
+	SCSI_Inquiry_cpu[Channel] = slice_dma_loaf(&local_dma,
+			sizeof(DAC960_SCSI_Inquiry_T),
+			SCSI_Inquiry_dma + Channel);
+	SCSI_NewInquiryUnitSerialNumberCPU[Channel] = slice_dma_loaf(&local_dma,
+			sizeof(DAC960_SCSI_Inquiry_UnitSerialNumber_T),
+			SCSI_NewInquiryUnitSerialNumberDMA + Channel);
+  }
+		
+  for (TargetID = 0; TargetID < Controller->Targets; TargetID++)
+    {
+      /*
+       * For each channel, submit a probe for a device on that channel.
+       * The timeout interval for a device that is present is 10 seconds.
+       * With this approach, the timeout periods can elapse in parallel
+       * on each channel.
+       */
+      for (Channel = 0; Channel < Controller->Channels; Channel++)
+	{
+	  dma_addr_t NewInquiryStandardDataDMA = SCSI_Inquiry_dma[Channel];
+  	  DAC960_V1_DCDB_T *DCDB = DCDBs_cpu[Channel];
+  	  dma_addr_t DCDB_dma = DCDBs_dma[Channel];
+	  DAC960_Command_T *Command = Controller->Commands[Channel];
+          struct completion *Completion = &Completions[Channel];
+
+	  init_completion(Completion);
+	  DAC960_V1_ClearCommand(Command);
+	  Command->CommandType = DAC960_ImmediateCommand;
+	  Command->Completion = Completion;
+	  Command->V1.CommandMailbox.Type3.CommandOpcode = DAC960_V1_DCDB;
+	  Command->V1.CommandMailbox.Type3.BusAddress = DCDB_dma;
+	  DCDB->Channel = Channel;
+	  DCDB->TargetID = TargetID;
+	  DCDB->Direction = DAC960_V1_DCDB_DataTransferDeviceToSystem;
+	  DCDB->EarlyStatus = false;
+	  DCDB->Timeout = DAC960_V1_DCDB_Timeout_10_seconds;
+	  DCDB->NoAutomaticRequestSense = false;
+	  DCDB->DisconnectPermitted = true;
+	  DCDB->TransferLength = sizeof(DAC960_SCSI_Inquiry_T);
+	  DCDB->BusAddress = NewInquiryStandardDataDMA;
+	  DCDB->CDBLength = 6;
+	  DCDB->TransferLengthHigh4 = 0;
+	  DCDB->SenseLength = sizeof(DCDB->SenseData);
+	  DCDB->CDB[0] = 0x12; /* INQUIRY */
+	  DCDB->CDB[1] = 0; /* EVPD = 0 */
+	  DCDB->CDB[2] = 0; /* Page Code */
+	  DCDB->CDB[3] = 0; /* Reserved */
+	  DCDB->CDB[4] = sizeof(DAC960_SCSI_Inquiry_T);
+	  DCDB->CDB[5] = 0; /* Control */
+
+	  spin_lock_irqsave(&Controller->queue_lock, flags);
+	  DAC960_QueueCommand(Command);
+	  spin_unlock_irqrestore(&Controller->queue_lock, flags);
+	}
+      /*
+       * Wait for the problems submitted in the previous loop
+       * to complete.  On the probes that are successful, 
+       * get the serial number of the device that was found.
+       */
+      for (Channel = 0; Channel < Controller->Channels; Channel++)
+	{
+	  DAC960_SCSI_Inquiry_T *InquiryStandardData =
+	    &Controller->V1.InquiryStandardData[Channel][TargetID];
+	  DAC960_SCSI_Inquiry_T *NewInquiryStandardData = SCSI_Inquiry_cpu[Channel];
+	  dma_addr_t NewInquiryUnitSerialNumberDMA =
+			SCSI_NewInquiryUnitSerialNumberDMA[Channel];
+	  DAC960_SCSI_Inquiry_UnitSerialNumber_T *NewInquiryUnitSerialNumber =
+	    		SCSI_NewInquiryUnitSerialNumberCPU[Channel];
+	  DAC960_SCSI_Inquiry_UnitSerialNumber_T *InquiryUnitSerialNumber =
+	    &Controller->V1.InquiryUnitSerialNumber[Channel][TargetID];
+	  DAC960_Command_T *Command = Controller->Commands[Channel];
+  	  DAC960_V1_DCDB_T *DCDB = DCDBs_cpu[Channel];
+          struct completion *Completion = &Completions[Channel];
+
+	  wait_for_completion(Completion);
+
+	  if (Command->V1.CommandStatus != DAC960_V1_NormalCompletion) {
+	    memset(InquiryStandardData, 0, sizeof(DAC960_SCSI_Inquiry_T));
+	    InquiryStandardData->PeripheralDeviceType = 0x1F;
+	    continue;
+	  } else
+	    memcpy(InquiryStandardData, NewInquiryStandardData, sizeof(DAC960_SCSI_Inquiry_T));
+	
+	  /* Preserve Channel and TargetID values from the previous loop */
+	  Command->Completion = Completion;
+	  DCDB->TransferLength = sizeof(DAC960_SCSI_Inquiry_UnitSerialNumber_T);
+	  DCDB->BusAddress = NewInquiryUnitSerialNumberDMA;
+	  DCDB->SenseLength = sizeof(DCDB->SenseData);
+	  DCDB->CDB[0] = 0x12; /* INQUIRY */
+	  DCDB->CDB[1] = 1; /* EVPD = 1 */
+	  DCDB->CDB[2] = 0x80; /* Page Code */
+	  DCDB->CDB[3] = 0; /* Reserved */
+	  DCDB->CDB[4] = sizeof(DAC960_SCSI_Inquiry_UnitSerialNumber_T);
+	  DCDB->CDB[5] = 0; /* Control */
+
+	  spin_lock_irqsave(&Controller->queue_lock, flags);
+	  DAC960_QueueCommand(Command);
+	  spin_unlock_irqrestore(&Controller->queue_lock, flags);
+	  wait_for_completion(Completion);
+
+	  if (Command->V1.CommandStatus != DAC960_V1_NormalCompletion) {
+	  	memset(InquiryUnitSerialNumber, 0,
+			sizeof(DAC960_SCSI_Inquiry_UnitSerialNumber_T));
+	  	InquiryUnitSerialNumber->PeripheralDeviceType = 0x1F;
+	  } else
+	  	memcpy(InquiryUnitSerialNumber, NewInquiryUnitSerialNumber,
+			sizeof(DAC960_SCSI_Inquiry_UnitSerialNumber_T));
+	}
+    }
+    free_dma_loaf(Controller->PCIDevice, &local_dma);
+  return true;
+}
+
+
+/*
+  DAC960_V2_ReadDeviceConfiguration reads the Device Configuration Information
+  for DAC960 V2 Firmware Controllers by requesting the Physical Device
+  Information and SCSI Inquiry Unit Serial Number information for each
+  device connected to Controller.
+*/
+
+static bool DAC960_V2_ReadDeviceConfiguration(DAC960_Controller_T
+						 *Controller)
+{
+  unsigned char Channel = 0, TargetID = 0, LogicalUnit = 0;
+  unsigned short PhysicalDeviceIndex = 0;
+
+  while (true)
+    {
+      DAC960_V2_PhysicalDeviceInfo_T *NewPhysicalDeviceInfo =
+		Controller->V2.NewPhysicalDeviceInformation;
+      DAC960_V2_PhysicalDeviceInfo_T *PhysicalDeviceInfo;
+      DAC960_SCSI_Inquiry_UnitSerialNumber_T *NewInquiryUnitSerialNumber =
+		Controller->V2.NewInquiryUnitSerialNumber;
+      DAC960_SCSI_Inquiry_UnitSerialNumber_T *InquiryUnitSerialNumber;
+
+      if (!DAC960_V2_NewPhysicalDeviceInfo(Controller, Channel, TargetID, LogicalUnit))
+	  break;
+
+      PhysicalDeviceInfo = kmalloc(sizeof(DAC960_V2_PhysicalDeviceInfo_T),
+				    GFP_ATOMIC);
+      if (PhysicalDeviceInfo == NULL)
+		return DAC960_Failure(Controller, "PHYSICAL DEVICE ALLOCATION");
+      Controller->V2.PhysicalDeviceInformation[PhysicalDeviceIndex] =
+		PhysicalDeviceInfo;
+      memcpy(PhysicalDeviceInfo, NewPhysicalDeviceInfo,
+		sizeof(DAC960_V2_PhysicalDeviceInfo_T));
+
+      InquiryUnitSerialNumber = kmalloc(
+	      sizeof(DAC960_SCSI_Inquiry_UnitSerialNumber_T), GFP_ATOMIC);
+      if (InquiryUnitSerialNumber == NULL) {
+	kfree(PhysicalDeviceInfo);
+	return DAC960_Failure(Controller, "SERIAL NUMBER ALLOCATION");
+      }
+      Controller->V2.InquiryUnitSerialNumber[PhysicalDeviceIndex] =
+		InquiryUnitSerialNumber;
+
+      Channel = NewPhysicalDeviceInfo->Channel;
+      TargetID = NewPhysicalDeviceInfo->TargetID;
+      LogicalUnit = NewPhysicalDeviceInfo->LogicalUnit;
+
+      /*
+	 Some devices do NOT have Unit Serial Numbers.
+	 This command fails for them.  But, we still want to
+	 remember those devices are there.  Construct a
+	 UnitSerialNumber structure for the failure case.
+      */
+      if (!DAC960_V2_NewInquiryUnitSerialNumber(Controller, Channel, TargetID, LogicalUnit)) {
+      	memset(InquiryUnitSerialNumber, 0,
+             sizeof(DAC960_SCSI_Inquiry_UnitSerialNumber_T));
+     	InquiryUnitSerialNumber->PeripheralDeviceType = 0x1F;
+      } else
+      	memcpy(InquiryUnitSerialNumber, NewInquiryUnitSerialNumber,
+		sizeof(DAC960_SCSI_Inquiry_UnitSerialNumber_T));
+
+      PhysicalDeviceIndex++;
+      LogicalUnit++;
+    }
+  return true;
+}
+
+
+/*
+  DAC960_SanitizeInquiryData sanitizes the Vendor, Model, Revision, and
+  Product Serial Number fields of the Inquiry Standard Data and Inquiry
+  Unit Serial Number structures.
+*/
+
+static void DAC960_SanitizeInquiryData(DAC960_SCSI_Inquiry_T
+					 *InquiryStandardData,
+				       DAC960_SCSI_Inquiry_UnitSerialNumber_T
+					 *InquiryUnitSerialNumber,
+				       unsigned char *Vendor,
+				       unsigned char *Model,
+				       unsigned char *Revision,
+				       unsigned char *SerialNumber)
+{
+  int SerialNumberLength, i;
+  if (InquiryStandardData->PeripheralDeviceType == 0x1F) return;
+  for (i = 0; i < sizeof(InquiryStandardData->VendorIdentification); i++)
+    {
+      unsigned char VendorCharacter =
+	InquiryStandardData->VendorIdentification[i];
+      Vendor[i] = (VendorCharacter >= ' ' && VendorCharacter <= '~'
+		   ? VendorCharacter : ' ');
+    }
+  Vendor[sizeof(InquiryStandardData->VendorIdentification)] = '\0';
+  for (i = 0; i < sizeof(InquiryStandardData->ProductIdentification); i++)
+    {
+      unsigned char ModelCharacter =
+	InquiryStandardData->ProductIdentification[i];
+      Model[i] = (ModelCharacter >= ' ' && ModelCharacter <= '~'
+		  ? ModelCharacter : ' ');
+    }
+  Model[sizeof(InquiryStandardData->ProductIdentification)] = '\0';
+  for (i = 0; i < sizeof(InquiryStandardData->ProductRevisionLevel); i++)
+    {
+      unsigned char RevisionCharacter =
+	InquiryStandardData->ProductRevisionLevel[i];
+      Revision[i] = (RevisionCharacter >= ' ' && RevisionCharacter <= '~'
+		     ? RevisionCharacter : ' ');
+    }
+  Revision[sizeof(InquiryStandardData->ProductRevisionLevel)] = '\0';
+  if (InquiryUnitSerialNumber->PeripheralDeviceType == 0x1F) return;
+  SerialNumberLength = InquiryUnitSerialNumber->PageLength;
+  if (SerialNumberLength >
+      sizeof(InquiryUnitSerialNumber->ProductSerialNumber))
+    SerialNumberLength = sizeof(InquiryUnitSerialNumber->ProductSerialNumber);
+  for (i = 0; i < SerialNumberLength; i++)
+    {
+      unsigned char SerialNumberCharacter =
+	InquiryUnitSerialNumber->ProductSerialNumber[i];
+      SerialNumber[i] =
+	(SerialNumberCharacter >= ' ' && SerialNumberCharacter <= '~'
+	 ? SerialNumberCharacter : ' ');
+    }
+  SerialNumber[SerialNumberLength] = '\0';
+}
+
+
+/*
+  DAC960_V1_ReportDeviceConfiguration reports the Device Configuration
+  Information for DAC960 V1 Firmware Controllers.
+*/
+
+static bool DAC960_V1_ReportDeviceConfiguration(DAC960_Controller_T
+						   *Controller)
+{
+  int LogicalDriveNumber, Channel, TargetID;
+  DAC960_Info("  Physical Devices:\n", Controller);
+  for (Channel = 0; Channel < Controller->Channels; Channel++)
+    for (TargetID = 0; TargetID < Controller->Targets; TargetID++)
+      {
+	DAC960_SCSI_Inquiry_T *InquiryStandardData =
+	  &Controller->V1.InquiryStandardData[Channel][TargetID];
+	DAC960_SCSI_Inquiry_UnitSerialNumber_T *InquiryUnitSerialNumber =
+	  &Controller->V1.InquiryUnitSerialNumber[Channel][TargetID];
+	DAC960_V1_DeviceState_T *DeviceState =
+	  &Controller->V1.DeviceState[Channel][TargetID];
+	DAC960_V1_ErrorTableEntry_T *ErrorEntry =
+	  &Controller->V1.ErrorTable.ErrorTableEntries[Channel][TargetID];
+	char Vendor[1+sizeof(InquiryStandardData->VendorIdentification)];
+	char Model[1+sizeof(InquiryStandardData->ProductIdentification)];
+	char Revision[1+sizeof(InquiryStandardData->ProductRevisionLevel)];
+	char SerialNumber[1+sizeof(InquiryUnitSerialNumber
+				   ->ProductSerialNumber)];
+	if (InquiryStandardData->PeripheralDeviceType == 0x1F) continue;
+	DAC960_SanitizeInquiryData(InquiryStandardData, InquiryUnitSerialNumber,
+				   Vendor, Model, Revision, SerialNumber);
+	DAC960_Info("    %d:%d%s Vendor: %s  Model: %s  Revision: %s\n",
+		    Controller, Channel, TargetID, (TargetID < 10 ? " " : ""),
+		    Vendor, Model, Revision);
+	if (InquiryUnitSerialNumber->PeripheralDeviceType != 0x1F)
+	  DAC960_Info("         Serial Number: %s\n", Controller, SerialNumber);
+	if (DeviceState->Present &&
+	    DeviceState->DeviceType == DAC960_V1_DiskType)
+	  {
+	    if (Controller->V1.DeviceResetCount[Channel][TargetID] > 0)
+	      DAC960_Info("         Disk Status: %s, %u blocks, %d resets\n",
+			  Controller,
+			  (DeviceState->DeviceState == DAC960_V1_Device_Dead
+			   ? "Dead"
+			   : DeviceState->DeviceState
+			     == DAC960_V1_Device_WriteOnly
+			     ? "Write-Only"
+			     : DeviceState->DeviceState
+			       == DAC960_V1_Device_Online
+			       ? "Online" : "Standby"),
+			  DeviceState->DiskSize,
+			  Controller->V1.DeviceResetCount[Channel][TargetID]);
+	    else
+	      DAC960_Info("         Disk Status: %s, %u blocks\n", Controller,
+			  (DeviceState->DeviceState == DAC960_V1_Device_Dead
+			   ? "Dead"
+			   : DeviceState->DeviceState
+			     == DAC960_V1_Device_WriteOnly
+			     ? "Write-Only"
+			     : DeviceState->DeviceState
+			       == DAC960_V1_Device_Online
+			       ? "Online" : "Standby"),
+			  DeviceState->DiskSize);
+	  }
+	if (ErrorEntry->ParityErrorCount > 0 ||
+	    ErrorEntry->SoftErrorCount > 0 ||
+	    ErrorEntry->HardErrorCount > 0 ||
+	    ErrorEntry->MiscErrorCount > 0)
+	  DAC960_Info("         Errors - Parity: %d, Soft: %d, "
+		      "Hard: %d, Misc: %d\n", Controller,
+		      ErrorEntry->ParityErrorCount,
+		      ErrorEntry->SoftErrorCount,
+		      ErrorEntry->HardErrorCount,
+		      ErrorEntry->MiscErrorCount);
+      }
+  DAC960_Info("  Logical Drives:\n", Controller);
+  for (LogicalDriveNumber = 0;
+       LogicalDriveNumber < Controller->LogicalDriveCount;
+       LogicalDriveNumber++)
+    {
+      DAC960_V1_LogicalDriveInformation_T *LogicalDriveInformation =
+	&Controller->V1.LogicalDriveInformation[LogicalDriveNumber];
+      DAC960_Info("    /dev/rd/c%dd%d: RAID-%d, %s, %u blocks, %s\n",
+		  Controller, Controller->ControllerNumber, LogicalDriveNumber,
+		  LogicalDriveInformation->RAIDLevel,
+		  (LogicalDriveInformation->LogicalDriveState
+		   == DAC960_V1_LogicalDrive_Online
+		   ? "Online"
+		   : LogicalDriveInformation->LogicalDriveState
+		     == DAC960_V1_LogicalDrive_Critical
+		     ? "Critical" : "Offline"),
+		  LogicalDriveInformation->LogicalDriveSize,
+		  (LogicalDriveInformation->WriteBack
+		   ? "Write Back" : "Write Thru"));
+    }
+  return true;
+}
+
+
+/*
+  DAC960_V2_ReportDeviceConfiguration reports the Device Configuration
+  Information for DAC960 V2 Firmware Controllers.
+*/
+
+static bool DAC960_V2_ReportDeviceConfiguration(DAC960_Controller_T
+						   *Controller)
+{
+  int PhysicalDeviceIndex, LogicalDriveNumber;
+  DAC960_Info("  Physical Devices:\n", Controller);
+  for (PhysicalDeviceIndex = 0;
+       PhysicalDeviceIndex < DAC960_V2_MaxPhysicalDevices;
+       PhysicalDeviceIndex++)
+    {
+      DAC960_V2_PhysicalDeviceInfo_T *PhysicalDeviceInfo =
+	Controller->V2.PhysicalDeviceInformation[PhysicalDeviceIndex];
+      DAC960_SCSI_Inquiry_T *InquiryStandardData =
+	(DAC960_SCSI_Inquiry_T *) &PhysicalDeviceInfo->SCSI_InquiryData;
+      DAC960_SCSI_Inquiry_UnitSerialNumber_T *InquiryUnitSerialNumber =
+	Controller->V2.InquiryUnitSerialNumber[PhysicalDeviceIndex];
+      char Vendor[1+sizeof(InquiryStandardData->VendorIdentification)];
+      char Model[1+sizeof(InquiryStandardData->ProductIdentification)];
+      char Revision[1+sizeof(InquiryStandardData->ProductRevisionLevel)];
+      char SerialNumber[1+sizeof(InquiryUnitSerialNumber->ProductSerialNumber)];
+      if (PhysicalDeviceInfo == NULL) break;
+      DAC960_SanitizeInquiryData(InquiryStandardData, InquiryUnitSerialNumber,
+				 Vendor, Model, Revision, SerialNumber);
+      DAC960_Info("    %d:%d%s Vendor: %s  Model: %s  Revision: %s\n",
+		  Controller,
+		  PhysicalDeviceInfo->Channel,
+		  PhysicalDeviceInfo->TargetID,
+		  (PhysicalDeviceInfo->TargetID < 10 ? " " : ""),
+		  Vendor, Model, Revision);
+      if (PhysicalDeviceInfo->NegotiatedSynchronousMegaTransfers == 0)
+	DAC960_Info("         %sAsynchronous\n", Controller,
+		    (PhysicalDeviceInfo->NegotiatedDataWidthBits == 16
+		     ? "Wide " :""));
+      else
+	DAC960_Info("         %sSynchronous at %d MB/sec\n", Controller,
+		    (PhysicalDeviceInfo->NegotiatedDataWidthBits == 16
+		     ? "Wide " :""),
+		    (PhysicalDeviceInfo->NegotiatedSynchronousMegaTransfers
+		     * PhysicalDeviceInfo->NegotiatedDataWidthBits/8));
+      if (InquiryUnitSerialNumber->PeripheralDeviceType != 0x1F)
+	DAC960_Info("         Serial Number: %s\n", Controller, SerialNumber);
+      if (PhysicalDeviceInfo->PhysicalDeviceState ==
+	  DAC960_V2_Device_Unconfigured)
+	continue;
+      DAC960_Info("         Disk Status: %s, %u blocks\n", Controller,
+		  (PhysicalDeviceInfo->PhysicalDeviceState
+		   == DAC960_V2_Device_Online
+		   ? "Online"
+		   : PhysicalDeviceInfo->PhysicalDeviceState
+		     == DAC960_V2_Device_Rebuild
+		     ? "Rebuild"
+		     : PhysicalDeviceInfo->PhysicalDeviceState
+		       == DAC960_V2_Device_Missing
+		       ? "Missing"
+		       : PhysicalDeviceInfo->PhysicalDeviceState
+			 == DAC960_V2_Device_Critical
+			 ? "Critical"
+			 : PhysicalDeviceInfo->PhysicalDeviceState
+			   == DAC960_V2_Device_Dead
+			   ? "Dead"
+			   : PhysicalDeviceInfo->PhysicalDeviceState
+			     == DAC960_V2_Device_SuspectedDead
+			     ? "Suspected-Dead"
+			     : PhysicalDeviceInfo->PhysicalDeviceState
+			       == DAC960_V2_Device_CommandedOffline
+			       ? "Commanded-Offline"
+			       : PhysicalDeviceInfo->PhysicalDeviceState
+				 == DAC960_V2_Device_Standby
+				 ? "Standby" : "Unknown"),
+		  PhysicalDeviceInfo->ConfigurableDeviceSize);
+      if (PhysicalDeviceInfo->ParityErrors == 0 &&
+	  PhysicalDeviceInfo->SoftErrors == 0 &&
+	  PhysicalDeviceInfo->HardErrors == 0 &&
+	  PhysicalDeviceInfo->MiscellaneousErrors == 0 &&
+	  PhysicalDeviceInfo->CommandTimeouts == 0 &&
+	  PhysicalDeviceInfo->Retries == 0 &&
+	  PhysicalDeviceInfo->Aborts == 0 &&
+	  PhysicalDeviceInfo->PredictedFailuresDetected == 0)
+	continue;
+      DAC960_Info("         Errors - Parity: %d, Soft: %d, "
+		  "Hard: %d, Misc: %d\n", Controller,
+		  PhysicalDeviceInfo->ParityErrors,
+		  PhysicalDeviceInfo->SoftErrors,
+		  PhysicalDeviceInfo->HardErrors,
+		  PhysicalDeviceInfo->MiscellaneousErrors);
+      DAC960_Info("                  Timeouts: %d, Retries: %d, "
+		  "Aborts: %d, Predicted: %d\n", Controller,
+		  PhysicalDeviceInfo->CommandTimeouts,
+		  PhysicalDeviceInfo->Retries,
+		  PhysicalDeviceInfo->Aborts,
+		  PhysicalDeviceInfo->PredictedFailuresDetected);
+    }
+  DAC960_Info("  Logical Drives:\n", Controller);
+  for (LogicalDriveNumber = 0;
+       LogicalDriveNumber < DAC960_MaxLogicalDrives;
+       LogicalDriveNumber++)
+    {
+      DAC960_V2_LogicalDeviceInfo_T *LogicalDeviceInfo =
+	Controller->V2.LogicalDeviceInformation[LogicalDriveNumber];
+      unsigned char *ReadCacheStatus[] = { "Read Cache Disabled",
+					   "Read Cache Enabled",
+					   "Read Ahead Enabled",
+					   "Intelligent Read Ahead Enabled",
+					   "-", "-", "-", "-" };
+      unsigned char *WriteCacheStatus[] = { "Write Cache Disabled",
+					    "Logical Device Read Only",
+					    "Write Cache Enabled",
+					    "Intelligent Write Cache Enabled",
+					    "-", "-", "-", "-" };
+      unsigned char *GeometryTranslation;
+      if (LogicalDeviceInfo == NULL) continue;
+      switch (LogicalDeviceInfo->DriveGeometry)
+	{
+	case DAC960_V2_Geometry_128_32:
+	  GeometryTranslation = "128/32";
+	  break;
+	case DAC960_V2_Geometry_255_63:
+	  GeometryTranslation = "255/63";
+	  break;
+	default:
+	  GeometryTranslation = "Invalid";
+	  DAC960_Error("Illegal Logical Device Geometry %d\n",
+		       Controller, LogicalDeviceInfo->DriveGeometry);
+	  break;
+	}
+      DAC960_Info("    /dev/rd/c%dd%d: RAID-%d, %s, %u blocks\n",
+		  Controller, Controller->ControllerNumber, LogicalDriveNumber,
+		  LogicalDeviceInfo->RAIDLevel,
+		  (LogicalDeviceInfo->LogicalDeviceState
+		   == DAC960_V2_LogicalDevice_Online
+		   ? "Online"
+		   : LogicalDeviceInfo->LogicalDeviceState
+		     == DAC960_V2_LogicalDevice_Critical
+		     ? "Critical" : "Offline"),
+		  LogicalDeviceInfo->ConfigurableDeviceSize);
+      DAC960_Info("                  Logical Device %s, BIOS Geometry: %s\n",
+		  Controller,
+		  (LogicalDeviceInfo->LogicalDeviceControl
+				     .LogicalDeviceInitialized
+		   ? "Initialized" : "Uninitialized"),
+		  GeometryTranslation);
+      if (LogicalDeviceInfo->StripeSize == 0)
+	{
+	  if (LogicalDeviceInfo->CacheLineSize == 0)
+	    DAC960_Info("                  Stripe Size: N/A, "
+			"Segment Size: N/A\n", Controller);
+	  else
+	    DAC960_Info("                  Stripe Size: N/A, "
+			"Segment Size: %dKB\n", Controller,
+			1 << (LogicalDeviceInfo->CacheLineSize - 2));
+	}
+      else
+	{
+	  if (LogicalDeviceInfo->CacheLineSize == 0)
+	    DAC960_Info("                  Stripe Size: %dKB, "
+			"Segment Size: N/A\n", Controller,
+			1 << (LogicalDeviceInfo->StripeSize - 2));
+	  else
+	    DAC960_Info("                  Stripe Size: %dKB, "
+			"Segment Size: %dKB\n", Controller,
+			1 << (LogicalDeviceInfo->StripeSize - 2),
+			1 << (LogicalDeviceInfo->CacheLineSize - 2));
+	}
+      DAC960_Info("                  %s, %s\n", Controller,
+		  ReadCacheStatus[
+		    LogicalDeviceInfo->LogicalDeviceControl.ReadCache],
+		  WriteCacheStatus[
+		    LogicalDeviceInfo->LogicalDeviceControl.WriteCache]);
+      if (LogicalDeviceInfo->SoftErrors > 0 ||
+	  LogicalDeviceInfo->CommandsFailed > 0 ||
+	  LogicalDeviceInfo->DeferredWriteErrors)
+	DAC960_Info("                  Errors - Soft: %d, Failed: %d, "
+		    "Deferred Write: %d\n", Controller,
+		    LogicalDeviceInfo->SoftErrors,
+		    LogicalDeviceInfo->CommandsFailed,
+		    LogicalDeviceInfo->DeferredWriteErrors);
+
+    }
+  return true;
+}
+
+/*
+  DAC960_RegisterBlockDevice registers the Block Device structures
+  associated with Controller.
+*/
+
+static bool DAC960_RegisterBlockDevice(DAC960_Controller_T *Controller)
+{
+  int MajorNumber = DAC960_MAJOR + Controller->ControllerNumber;
+  int n;
+
+  /*
+    Register the Block Device Major Number for this DAC960 Controller.
+  */
+  if (register_blkdev(MajorNumber, "dac960") < 0)
+      return false;
+
+  for (n = 0; n < DAC960_MaxLogicalDrives; n++) {
+	struct gendisk *disk = Controller->disks[n];
+  	struct request_queue *RequestQueue;
+
+	/* for now, let all request queues share controller's lock */
+  	RequestQueue = blk_init_queue(DAC960_RequestFunction,&Controller->queue_lock);
+  	if (!RequestQueue) {
+		printk("DAC960: failure to allocate request queue\n");
+		continue;
+  	}
+  	Controller->RequestQueue[n] = RequestQueue;
+  	blk_queue_bounce_limit(RequestQueue, Controller->BounceBufferLimit);
+  	RequestQueue->queuedata = Controller;
+	blk_queue_max_segments(RequestQueue, Controller->DriverScatterGatherLimit);
+	blk_queue_max_hw_sectors(RequestQueue, Controller->MaxBlocksPerCommand);
+	disk->queue = RequestQueue;
+	sprintf(disk->disk_name, "rd/c%dd%d", Controller->ControllerNumber, n);
+	disk->major = MajorNumber;
+	disk->first_minor = n << DAC960_MaxPartitionsBits;
+	disk->fops = &DAC960_BlockDeviceOperations;
+   }
+  /*
+    Indicate the Block Device Registration completed successfully,
+  */
+  return true;
+}
+
+
+/*
+  DAC960_UnregisterBlockDevice unregisters the Block Device structures
+  associated with Controller.
+*/
+
+static void DAC960_UnregisterBlockDevice(DAC960_Controller_T *Controller)
+{
+  int MajorNumber = DAC960_MAJOR + Controller->ControllerNumber;
+  int disk;
+
+  /* does order matter when deleting gendisk and cleanup in request queue? */
+  for (disk = 0; disk < DAC960_MaxLogicalDrives; disk++) {
+	del_gendisk(Controller->disks[disk]);
+	blk_cleanup_queue(Controller->RequestQueue[disk]);
+	Controller->RequestQueue[disk] = NULL;
+  }
+
+  /*
+    Unregister the Block Device Major Number for this DAC960 Controller.
+  */
+  unregister_blkdev(MajorNumber, "dac960");
+}
+
+/*
+  DAC960_ComputeGenericDiskInfo computes the values for the Generic Disk
+  Information Partition Sector Counts and Block Sizes.
+*/
+
+static void DAC960_ComputeGenericDiskInfo(DAC960_Controller_T *Controller)
+{
+	int disk;
+	for (disk = 0; disk < DAC960_MaxLogicalDrives; disk++)
+		set_capacity(Controller->disks[disk], disk_size(Controller, disk));
+}
+
+/*
+  DAC960_ReportErrorStatus reports Controller BIOS Messages passed through
+  the Error Status Register when the driver performs the BIOS handshaking.
+  It returns true for fatal errors and false otherwise.
+*/
+
+static bool DAC960_ReportErrorStatus(DAC960_Controller_T *Controller,
+					unsigned char ErrorStatus,
+					unsigned char Parameter0,
+					unsigned char Parameter1)
+{
+  switch (ErrorStatus)
+    {
+    case 0x00:
+      DAC960_Notice("Physical Device %d:%d Not Responding\n",
+		    Controller, Parameter1, Parameter0);
+      break;
+    case 0x08:
+      if (Controller->DriveSpinUpMessageDisplayed) break;
+      DAC960_Notice("Spinning Up Drives\n", Controller);
+      Controller->DriveSpinUpMessageDisplayed = true;
+      break;
+    case 0x30:
+      DAC960_Notice("Configuration Checksum Error\n", Controller);
+      break;
+    case 0x60:
+      DAC960_Notice("Mirror Race Recovery Failed\n", Controller);
+      break;
+    case 0x70:
+      DAC960_Notice("Mirror Race Recovery In Progress\n", Controller);
+      break;
+    case 0x90:
+      DAC960_Notice("Physical Device %d:%d COD Mismatch\n",
+		    Controller, Parameter1, Parameter0);
+      break;
+    case 0xA0:
+      DAC960_Notice("Logical Drive Installation Aborted\n", Controller);
+      break;
+    case 0xB0:
+      DAC960_Notice("Mirror Race On A Critical Logical Drive\n", Controller);
+      break;
+    case 0xD0:
+      DAC960_Notice("New Controller Configuration Found\n", Controller);
+      break;
+    case 0xF0:
+      DAC960_Error("Fatal Memory Parity Error for Controller at\n", Controller);
+      return true;
+    default:
+      DAC960_Error("Unknown Initialization Error %02X for Controller at\n",
+		   Controller, ErrorStatus);
+      return true;
+    }
+  return false;
+}
+
+
+/*
+ * DAC960_DetectCleanup releases the resources that were allocated
+ * during DAC960_DetectController().  DAC960_DetectController can
+ * has several internal failure points, so not ALL resources may 
+ * have been allocated.  It's important to free only
+ * resources that HAVE been allocated.  The code below always
+ * tests that the resource has been allocated before attempting to
+ * free it.
+ */
+static void DAC960_DetectCleanup(DAC960_Controller_T *Controller)
+{
+  int i;
+
+  /* Free the memory mailbox, status, and related structures */
+  free_dma_loaf(Controller->PCIDevice, &Controller->DmaPages);
+  if (Controller->MemoryMappedAddress) {
+  	switch(Controller->HardwareType)
+  	{
+		case DAC960_GEM_Controller:
+			DAC960_GEM_DisableInterrupts(Controller->BaseAddress);
+			break;
+		case DAC960_BA_Controller:
+			DAC960_BA_DisableInterrupts(Controller->BaseAddress);
+			break;
+		case DAC960_LP_Controller:
+			DAC960_LP_DisableInterrupts(Controller->BaseAddress);
+			break;
+		case DAC960_LA_Controller:
+			DAC960_LA_DisableInterrupts(Controller->BaseAddress);
+			break;
+		case DAC960_PG_Controller:
+			DAC960_PG_DisableInterrupts(Controller->BaseAddress);
+			break;
+		case DAC960_PD_Controller:
+			DAC960_PD_DisableInterrupts(Controller->BaseAddress);
+			break;
+		case DAC960_P_Controller:
+			DAC960_PD_DisableInterrupts(Controller->BaseAddress);
+			break;
+  	}
+  	iounmap(Controller->MemoryMappedAddress);
+  }
+  if (Controller->IRQ_Channel)
+  	free_irq(Controller->IRQ_Channel, Controller);
+  if (Controller->IO_Address)
+	release_region(Controller->IO_Address, 0x80);
+  pci_disable_device(Controller->PCIDevice);
+  for (i = 0; (i < DAC960_MaxLogicalDrives) && Controller->disks[i]; i++)
+       put_disk(Controller->disks[i]);
+  DAC960_Controllers[Controller->ControllerNumber] = NULL;
+  kfree(Controller);
+}
+
+
+/*
+  DAC960_DetectController detects Mylex DAC960/AcceleRAID/eXtremeRAID
+  PCI RAID Controllers by interrogating the PCI Configuration Space for
+  Controller Type.
+*/
+
+static DAC960_Controller_T * 
+DAC960_DetectController(struct pci_dev *PCI_Device,
+			const struct pci_device_id *entry)
+{
+  struct DAC960_privdata *privdata =
+	  	(struct DAC960_privdata *)entry->driver_data;
+  irq_handler_t InterruptHandler = privdata->InterruptHandler;
+  unsigned int MemoryWindowSize = privdata->MemoryWindowSize;
+  DAC960_Controller_T *Controller = NULL;
+  unsigned char DeviceFunction = PCI_Device->devfn;
+  unsigned char ErrorStatus, Parameter0, Parameter1;
+  unsigned int IRQ_Channel;
+  void __iomem *BaseAddress;
+  int i;
+
+  Controller = kzalloc(sizeof(DAC960_Controller_T), GFP_ATOMIC);
+  if (Controller == NULL) {
+	DAC960_Error("Unable to allocate Controller structure for "
+                       "Controller at\n", NULL);
+	return NULL;
+  }
+  Controller->ControllerNumber = DAC960_ControllerCount;
+  DAC960_Controllers[DAC960_ControllerCount++] = Controller;
+  Controller->Bus = PCI_Device->bus->number;
+  Controller->FirmwareType = privdata->FirmwareType;
+  Controller->HardwareType = privdata->HardwareType;
+  Controller->Device = DeviceFunction >> 3;
+  Controller->Function = DeviceFunction & 0x7;
+  Controller->PCIDevice = PCI_Device;
+  strcpy(Controller->FullModelName, "DAC960");
+
+  if (pci_enable_device(PCI_Device))
+	goto Failure;
+
+  switch (Controller->HardwareType)
+  {
+	case DAC960_GEM_Controller:
+	  Controller->PCI_Address = pci_resource_start(PCI_Device, 0);
+	  break;
+	case DAC960_BA_Controller:
+	  Controller->PCI_Address = pci_resource_start(PCI_Device, 0);
+	  break;
+	case DAC960_LP_Controller:
+	  Controller->PCI_Address = pci_resource_start(PCI_Device, 0);
+	  break;
+	case DAC960_LA_Controller:
+	  Controller->PCI_Address = pci_resource_start(PCI_Device, 0);
+	  break;
+	case DAC960_PG_Controller:
+	  Controller->PCI_Address = pci_resource_start(PCI_Device, 0);
+	  break;
+	case DAC960_PD_Controller:
+	  Controller->IO_Address = pci_resource_start(PCI_Device, 0);
+	  Controller->PCI_Address = pci_resource_start(PCI_Device, 1);
+	  break;
+	case DAC960_P_Controller:
+	  Controller->IO_Address = pci_resource_start(PCI_Device, 0);
+	  Controller->PCI_Address = pci_resource_start(PCI_Device, 1);
+	  break;
+  }
+
+  pci_set_drvdata(PCI_Device, (void *)((long)Controller->ControllerNumber));
+  for (i = 0; i < DAC960_MaxLogicalDrives; i++) {
+	Controller->disks[i] = alloc_disk(1<<DAC960_MaxPartitionsBits);
+	if (!Controller->disks[i])
+		goto Failure;
+	Controller->disks[i]->private_data = (void *)((long)i);
+  }
+  init_waitqueue_head(&Controller->CommandWaitQueue);
+  init_waitqueue_head(&Controller->HealthStatusWaitQueue);
+  spin_lock_init(&Controller->queue_lock);
+  DAC960_AnnounceDriver(Controller);
+  /*
+    Map the Controller Register Window.
+  */
+ if (MemoryWindowSize < PAGE_SIZE)
+	MemoryWindowSize = PAGE_SIZE;
+  Controller->MemoryMappedAddress =
+	ioremap_nocache(Controller->PCI_Address & PAGE_MASK, MemoryWindowSize);
+  Controller->BaseAddress =
+	Controller->MemoryMappedAddress + (Controller->PCI_Address & ~PAGE_MASK);
+  if (Controller->MemoryMappedAddress == NULL)
+  {
+	  DAC960_Error("Unable to map Controller Register Window for "
+		       "Controller at\n", Controller);
+	  goto Failure;
+  }
+  BaseAddress = Controller->BaseAddress;
+  switch (Controller->HardwareType)
+  {
+	case DAC960_GEM_Controller:
+	  DAC960_GEM_DisableInterrupts(BaseAddress);
+	  DAC960_GEM_AcknowledgeHardwareMailboxStatus(BaseAddress);
+	  udelay(1000);
+	  while (DAC960_GEM_InitializationInProgressP(BaseAddress))
+	    {
+	      if (DAC960_GEM_ReadErrorStatus(BaseAddress, &ErrorStatus,
+					    &Parameter0, &Parameter1) &&
+		  DAC960_ReportErrorStatus(Controller, ErrorStatus,
+					   Parameter0, Parameter1))
+		goto Failure;
+	      udelay(10);
+	    }
+	  if (!DAC960_V2_EnableMemoryMailboxInterface(Controller))
+	    {
+	      DAC960_Error("Unable to Enable Memory Mailbox Interface "
+			   "for Controller at\n", Controller);
+	      goto Failure;
+	    }
+	  DAC960_GEM_EnableInterrupts(BaseAddress);
+	  Controller->QueueCommand = DAC960_GEM_QueueCommand;
+	  Controller->ReadControllerConfiguration =
+	    DAC960_V2_ReadControllerConfiguration;
+	  Controller->ReadDeviceConfiguration =
+	    DAC960_V2_ReadDeviceConfiguration;
+	  Controller->ReportDeviceConfiguration =
+	    DAC960_V2_ReportDeviceConfiguration;
+	  Controller->QueueReadWriteCommand =
+	    DAC960_V2_QueueReadWriteCommand;
+	  break;
+	case DAC960_BA_Controller:
+	  DAC960_BA_DisableInterrupts(BaseAddress);
+	  DAC960_BA_AcknowledgeHardwareMailboxStatus(BaseAddress);
+	  udelay(1000);
+	  while (DAC960_BA_InitializationInProgressP(BaseAddress))
+	    {
+	      if (DAC960_BA_ReadErrorStatus(BaseAddress, &ErrorStatus,
+					    &Parameter0, &Parameter1) &&
+		  DAC960_ReportErrorStatus(Controller, ErrorStatus,
+					   Parameter0, Parameter1))
+		goto Failure;
+	      udelay(10);
+	    }
+	  if (!DAC960_V2_EnableMemoryMailboxInterface(Controller))
+	    {
+	      DAC960_Error("Unable to Enable Memory Mailbox Interface "
+			   "for Controller at\n", Controller);
+	      goto Failure;
+	    }
+	  DAC960_BA_EnableInterrupts(BaseAddress);
+	  Controller->QueueCommand = DAC960_BA_QueueCommand;
+	  Controller->ReadControllerConfiguration =
+	    DAC960_V2_ReadControllerConfiguration;
+	  Controller->ReadDeviceConfiguration =
+	    DAC960_V2_ReadDeviceConfiguration;
+	  Controller->ReportDeviceConfiguration =
+	    DAC960_V2_ReportDeviceConfiguration;
+	  Controller->QueueReadWriteCommand =
+	    DAC960_V2_QueueReadWriteCommand;
+	  break;
+	case DAC960_LP_Controller:
+	  DAC960_LP_DisableInterrupts(BaseAddress);
+	  DAC960_LP_AcknowledgeHardwareMailboxStatus(BaseAddress);
+	  udelay(1000);
+	  while (DAC960_LP_InitializationInProgressP(BaseAddress))
+	    {
+	      if (DAC960_LP_ReadErrorStatus(BaseAddress, &ErrorStatus,
+					    &Parameter0, &Parameter1) &&
+		  DAC960_ReportErrorStatus(Controller, ErrorStatus,
+					   Parameter0, Parameter1))
+		goto Failure;
+	      udelay(10);
+	    }
+	  if (!DAC960_V2_EnableMemoryMailboxInterface(Controller))
+	    {
+	      DAC960_Error("Unable to Enable Memory Mailbox Interface "
+			   "for Controller at\n", Controller);
+	      goto Failure;
+	    }
+	  DAC960_LP_EnableInterrupts(BaseAddress);
+	  Controller->QueueCommand = DAC960_LP_QueueCommand;
+	  Controller->ReadControllerConfiguration =
+	    DAC960_V2_ReadControllerConfiguration;
+	  Controller->ReadDeviceConfiguration =
+	    DAC960_V2_ReadDeviceConfiguration;
+	  Controller->ReportDeviceConfiguration =
+	    DAC960_V2_ReportDeviceConfiguration;
+	  Controller->QueueReadWriteCommand =
+	    DAC960_V2_QueueReadWriteCommand;
+	  break;
+	case DAC960_LA_Controller:
+	  DAC960_LA_DisableInterrupts(BaseAddress);
+	  DAC960_LA_AcknowledgeHardwareMailboxStatus(BaseAddress);
+	  udelay(1000);
+	  while (DAC960_LA_InitializationInProgressP(BaseAddress))
+	    {
+	      if (DAC960_LA_ReadErrorStatus(BaseAddress, &ErrorStatus,
+					    &Parameter0, &Parameter1) &&
+		  DAC960_ReportErrorStatus(Controller, ErrorStatus,
+					   Parameter0, Parameter1))
+		goto Failure;
+	      udelay(10);
+	    }
+	  if (!DAC960_V1_EnableMemoryMailboxInterface(Controller))
+	    {
+	      DAC960_Error("Unable to Enable Memory Mailbox Interface "
+			   "for Controller at\n", Controller);
+	      goto Failure;
+	    }
+	  DAC960_LA_EnableInterrupts(BaseAddress);
+	  if (Controller->V1.DualModeMemoryMailboxInterface)
+	    Controller->QueueCommand = DAC960_LA_QueueCommandDualMode;
+	  else Controller->QueueCommand = DAC960_LA_QueueCommandSingleMode;
+	  Controller->ReadControllerConfiguration =
+	    DAC960_V1_ReadControllerConfiguration;
+	  Controller->ReadDeviceConfiguration =
+	    DAC960_V1_ReadDeviceConfiguration;
+	  Controller->ReportDeviceConfiguration =
+	    DAC960_V1_ReportDeviceConfiguration;
+	  Controller->QueueReadWriteCommand =
+	    DAC960_V1_QueueReadWriteCommand;
+	  break;
+	case DAC960_PG_Controller:
+	  DAC960_PG_DisableInterrupts(BaseAddress);
+	  DAC960_PG_AcknowledgeHardwareMailboxStatus(BaseAddress);
+	  udelay(1000);
+	  while (DAC960_PG_InitializationInProgressP(BaseAddress))
+	    {
+	      if (DAC960_PG_ReadErrorStatus(BaseAddress, &ErrorStatus,
+					    &Parameter0, &Parameter1) &&
+		  DAC960_ReportErrorStatus(Controller, ErrorStatus,
+					   Parameter0, Parameter1))
+		goto Failure;
+	      udelay(10);
+	    }
+	  if (!DAC960_V1_EnableMemoryMailboxInterface(Controller))
+	    {
+	      DAC960_Error("Unable to Enable Memory Mailbox Interface "
+			   "for Controller at\n", Controller);
+	      goto Failure;
+	    }
+	  DAC960_PG_EnableInterrupts(BaseAddress);
+	  if (Controller->V1.DualModeMemoryMailboxInterface)
+	    Controller->QueueCommand = DAC960_PG_QueueCommandDualMode;
+	  else Controller->QueueCommand = DAC960_PG_QueueCommandSingleMode;
+	  Controller->ReadControllerConfiguration =
+	    DAC960_V1_ReadControllerConfiguration;
+	  Controller->ReadDeviceConfiguration =
+	    DAC960_V1_ReadDeviceConfiguration;
+	  Controller->ReportDeviceConfiguration =
+	    DAC960_V1_ReportDeviceConfiguration;
+	  Controller->QueueReadWriteCommand =
+	    DAC960_V1_QueueReadWriteCommand;
+	  break;
+	case DAC960_PD_Controller:
+	  if (!request_region(Controller->IO_Address, 0x80,
+			      Controller->FullModelName)) {
+		DAC960_Error("IO port 0x%d busy for Controller at\n",
+			     Controller, Controller->IO_Address);
+		goto Failure;
+	  }
+	  DAC960_PD_DisableInterrupts(BaseAddress);
+	  DAC960_PD_AcknowledgeStatus(BaseAddress);
+	  udelay(1000);
+	  while (DAC960_PD_InitializationInProgressP(BaseAddress))
+	    {
+	      if (DAC960_PD_ReadErrorStatus(BaseAddress, &ErrorStatus,
+					    &Parameter0, &Parameter1) &&
+		  DAC960_ReportErrorStatus(Controller, ErrorStatus,
+					   Parameter0, Parameter1))
+		goto Failure;
+	      udelay(10);
+	    }
+	  if (!DAC960_V1_EnableMemoryMailboxInterface(Controller))
+	    {
+	      DAC960_Error("Unable to allocate DMA mapped memory "
+			   "for Controller at\n", Controller);
+	      goto Failure;
+	    }
+	  DAC960_PD_EnableInterrupts(BaseAddress);
+	  Controller->QueueCommand = DAC960_PD_QueueCommand;
+	  Controller->ReadControllerConfiguration =
+	    DAC960_V1_ReadControllerConfiguration;
+	  Controller->ReadDeviceConfiguration =
+	    DAC960_V1_ReadDeviceConfiguration;
+	  Controller->ReportDeviceConfiguration =
+	    DAC960_V1_ReportDeviceConfiguration;
+	  Controller->QueueReadWriteCommand =
+	    DAC960_V1_QueueReadWriteCommand;
+	  break;
+	case DAC960_P_Controller:
+	  if (!request_region(Controller->IO_Address, 0x80,
+			      Controller->FullModelName)){
+		DAC960_Error("IO port 0x%d busy for Controller at\n",
+		   	     Controller, Controller->IO_Address);
+		goto Failure;
+	  }
+	  DAC960_PD_DisableInterrupts(BaseAddress);
+	  DAC960_PD_AcknowledgeStatus(BaseAddress);
+	  udelay(1000);
+	  while (DAC960_PD_InitializationInProgressP(BaseAddress))
+	    {
+	      if (DAC960_PD_ReadErrorStatus(BaseAddress, &ErrorStatus,
+					    &Parameter0, &Parameter1) &&
+		  DAC960_ReportErrorStatus(Controller, ErrorStatus,
+					   Parameter0, Parameter1))
+		goto Failure;
+	      udelay(10);
+	    }
+	  if (!DAC960_V1_EnableMemoryMailboxInterface(Controller))
+	    {
+	      DAC960_Error("Unable to allocate DMA mapped memory"
+			   "for Controller at\n", Controller);
+	      goto Failure;
+	    }
+	  DAC960_PD_EnableInterrupts(BaseAddress);
+	  Controller->QueueCommand = DAC960_P_QueueCommand;
+	  Controller->ReadControllerConfiguration =
+	    DAC960_V1_ReadControllerConfiguration;
+	  Controller->ReadDeviceConfiguration =
+	    DAC960_V1_ReadDeviceConfiguration;
+	  Controller->ReportDeviceConfiguration =
+	    DAC960_V1_ReportDeviceConfiguration;
+	  Controller->QueueReadWriteCommand =
+	    DAC960_V1_QueueReadWriteCommand;
+	  break;
+  }
+  /*
+     Acquire shared access to the IRQ Channel.
+  */
+  IRQ_Channel = PCI_Device->irq;
+  if (request_irq(IRQ_Channel, InterruptHandler, IRQF_SHARED,
+		      Controller->FullModelName, Controller) < 0)
+  {
+	DAC960_Error("Unable to acquire IRQ Channel %d for Controller at\n",
+		       Controller, Controller->IRQ_Channel);
+	goto Failure;
+  }
+  Controller->IRQ_Channel = IRQ_Channel;
+  Controller->InitialCommand.CommandIdentifier = 1;
+  Controller->InitialCommand.Controller = Controller;
+  Controller->Commands[0] = &Controller->InitialCommand;
+  Controller->FreeCommands = &Controller->InitialCommand;
+  return Controller;
+      
+Failure:
+  if (Controller->IO_Address == 0)
+	DAC960_Error("PCI Bus %d Device %d Function %d I/O Address N/A "
+		     "PCI Address 0x%X\n", Controller,
+		     Controller->Bus, Controller->Device,
+		     Controller->Function, Controller->PCI_Address);
+  else
+	DAC960_Error("PCI Bus %d Device %d Function %d I/O Address "
+			"0x%X PCI Address 0x%X\n", Controller,
+			Controller->Bus, Controller->Device,
+			Controller->Function, Controller->IO_Address,
+			Controller->PCI_Address);
+  DAC960_DetectCleanup(Controller);
+  DAC960_ControllerCount--;
+  return NULL;
+}
+
+/*
+  DAC960_InitializeController initializes Controller.
+*/
+
+static bool 
+DAC960_InitializeController(DAC960_Controller_T *Controller)
+{
+  if (DAC960_ReadControllerConfiguration(Controller) &&
+      DAC960_ReportControllerConfiguration(Controller) &&
+      DAC960_CreateAuxiliaryStructures(Controller) &&
+      DAC960_ReadDeviceConfiguration(Controller) &&
+      DAC960_ReportDeviceConfiguration(Controller) &&
+      DAC960_RegisterBlockDevice(Controller))
+    {
+      /*
+	Initialize the Monitoring Timer.
+      */
+      init_timer(&Controller->MonitoringTimer);
+      Controller->MonitoringTimer.expires =
+	jiffies + DAC960_MonitoringTimerInterval;
+      Controller->MonitoringTimer.data = (unsigned long) Controller;
+      Controller->MonitoringTimer.function = DAC960_MonitoringTimerFunction;
+      add_timer(&Controller->MonitoringTimer);
+      Controller->ControllerInitialized = true;
+      return true;
+    }
+  return false;
+}
+
+
+/*
+  DAC960_FinalizeController finalizes Controller.
+*/
+
+static void DAC960_FinalizeController(DAC960_Controller_T *Controller)
+{
+  if (Controller->ControllerInitialized)
+    {
+      unsigned long flags;
+
+      /*
+       * Acquiring and releasing lock here eliminates
+       * a very low probability race.
+       *
+       * The code below allocates controller command structures
+       * from the free list without holding the controller lock.
+       * This is safe assuming there is no other activity on
+       * the controller at the time.
+       * 
+       * But, there might be a monitoring command still
+       * in progress.  Setting the Shutdown flag while holding
+       * the lock ensures that there is no monitoring command
+       * in the interrupt handler currently, and any monitoring
+       * commands that complete from this time on will NOT return
+       * their command structure to the free list.
+       */
+
+      spin_lock_irqsave(&Controller->queue_lock, flags);
+      Controller->ShutdownMonitoringTimer = 1;
+      spin_unlock_irqrestore(&Controller->queue_lock, flags);
+
+      del_timer_sync(&Controller->MonitoringTimer);
+      if (Controller->FirmwareType == DAC960_V1_Controller)
+	{
+	  DAC960_Notice("Flushing Cache...", Controller);
+	  DAC960_V1_ExecuteType3(Controller, DAC960_V1_Flush, 0);
+	  DAC960_Notice("done\n", Controller);
+
+	  if (Controller->HardwareType == DAC960_PD_Controller)
+	      release_region(Controller->IO_Address, 0x80);
+	}
+      else
+	{
+	  DAC960_Notice("Flushing Cache...", Controller);
+	  DAC960_V2_DeviceOperation(Controller, DAC960_V2_PauseDevice,
+				    DAC960_V2_RAID_Controller);
+	  DAC960_Notice("done\n", Controller);
+	}
+    }
+  DAC960_UnregisterBlockDevice(Controller);
+  DAC960_DestroyAuxiliaryStructures(Controller);
+  DAC960_DestroyProcEntries(Controller);
+  DAC960_DetectCleanup(Controller);
+}
+
+
+/*
+  DAC960_Probe verifies controller's existence and
+  initializes the DAC960 Driver for that controller.
+*/
+
+static int 
+DAC960_Probe(struct pci_dev *dev, const struct pci_device_id *entry)
+{
+  int disk;
+  DAC960_Controller_T *Controller;
+
+  if (DAC960_ControllerCount == DAC960_MaxControllers)
+  {
+	DAC960_Error("More than %d DAC960 Controllers detected - "
+                       "ignoring from Controller at\n",
+                       NULL, DAC960_MaxControllers);
+	return -ENODEV;
+  }
+
+  Controller = DAC960_DetectController(dev, entry);
+  if (!Controller)
+	return -ENODEV;
+
+  if (!DAC960_InitializeController(Controller)) {
+  	DAC960_FinalizeController(Controller);
+	return -ENODEV;
+  }
+
+  for (disk = 0; disk < DAC960_MaxLogicalDrives; disk++) {
+        set_capacity(Controller->disks[disk], disk_size(Controller, disk));
+        add_disk(Controller->disks[disk]);
+  }
+  DAC960_CreateProcEntries(Controller);
+  return 0;
+}
+
+
+/*
+  DAC960_Finalize finalizes the DAC960 Driver.
+*/
+
+static void DAC960_Remove(struct pci_dev *PCI_Device)
+{
+  int Controller_Number = (long)pci_get_drvdata(PCI_Device);
+  DAC960_Controller_T *Controller = DAC960_Controllers[Controller_Number];
+  if (Controller != NULL)
+      DAC960_FinalizeController(Controller);
+}
+
+
+/*
+  DAC960_V1_QueueReadWriteCommand prepares and queues a Read/Write Command for
+  DAC960 V1 Firmware Controllers.
+*/
+
+static void DAC960_V1_QueueReadWriteCommand(DAC960_Command_T *Command)
+{
+  DAC960_Controller_T *Controller = Command->Controller;
+  DAC960_V1_CommandMailbox_T *CommandMailbox = &Command->V1.CommandMailbox;
+  DAC960_V1_ScatterGatherSegment_T *ScatterGatherList =
+					Command->V1.ScatterGatherList;
+  struct scatterlist *ScatterList = Command->V1.ScatterList;
+
+  DAC960_V1_ClearCommand(Command);
+
+  if (Command->SegmentCount == 1)
+    {
+      if (Command->DmaDirection == PCI_DMA_FROMDEVICE)
+	CommandMailbox->Type5.CommandOpcode = DAC960_V1_Read;
+      else 
+        CommandMailbox->Type5.CommandOpcode = DAC960_V1_Write;
+
+      CommandMailbox->Type5.LD.TransferLength = Command->BlockCount;
+      CommandMailbox->Type5.LD.LogicalDriveNumber = Command->LogicalDriveNumber;
+      CommandMailbox->Type5.LogicalBlockAddress = Command->BlockNumber;
+      CommandMailbox->Type5.BusAddress =
+			(DAC960_BusAddress32_T)sg_dma_address(ScatterList);	
+    }
+  else
+    {
+      int i;
+
+      if (Command->DmaDirection == PCI_DMA_FROMDEVICE)
+	CommandMailbox->Type5.CommandOpcode = DAC960_V1_ReadWithScatterGather;
+      else
+	CommandMailbox->Type5.CommandOpcode = DAC960_V1_WriteWithScatterGather;
+
+      CommandMailbox->Type5.LD.TransferLength = Command->BlockCount;
+      CommandMailbox->Type5.LD.LogicalDriveNumber = Command->LogicalDriveNumber;
+      CommandMailbox->Type5.LogicalBlockAddress = Command->BlockNumber;
+      CommandMailbox->Type5.BusAddress = Command->V1.ScatterGatherListDMA;
+
+      CommandMailbox->Type5.ScatterGatherCount = Command->SegmentCount;
+
+      for (i = 0; i < Command->SegmentCount; i++, ScatterList++, ScatterGatherList++) {
+		ScatterGatherList->SegmentDataPointer =
+			(DAC960_BusAddress32_T)sg_dma_address(ScatterList);
+		ScatterGatherList->SegmentByteCount =
+			(DAC960_ByteCount32_T)sg_dma_len(ScatterList);
+      }
+    }
+  DAC960_QueueCommand(Command);
+}
+
+
+/*
+  DAC960_V2_QueueReadWriteCommand prepares and queues a Read/Write Command for
+  DAC960 V2 Firmware Controllers.
+*/
+
+static void DAC960_V2_QueueReadWriteCommand(DAC960_Command_T *Command)
+{
+  DAC960_Controller_T *Controller = Command->Controller;
+  DAC960_V2_CommandMailbox_T *CommandMailbox = &Command->V2.CommandMailbox;
+  struct scatterlist *ScatterList = Command->V2.ScatterList;
+
+  DAC960_V2_ClearCommand(Command);
+
+  CommandMailbox->SCSI_10.CommandOpcode = DAC960_V2_SCSI_10;
+  CommandMailbox->SCSI_10.CommandControlBits.DataTransferControllerToHost =
+    (Command->DmaDirection == PCI_DMA_FROMDEVICE);
+  CommandMailbox->SCSI_10.DataTransferSize =
+    Command->BlockCount << DAC960_BlockSizeBits;
+  CommandMailbox->SCSI_10.RequestSenseBusAddress = Command->V2.RequestSenseDMA;
+  CommandMailbox->SCSI_10.PhysicalDevice =
+    Controller->V2.LogicalDriveToVirtualDevice[Command->LogicalDriveNumber];
+  CommandMailbox->SCSI_10.RequestSenseSize = sizeof(DAC960_SCSI_RequestSense_T);
+  CommandMailbox->SCSI_10.CDBLength = 10;
+  CommandMailbox->SCSI_10.SCSI_CDB[0] =
+    (Command->DmaDirection == PCI_DMA_FROMDEVICE ? 0x28 : 0x2A);
+  CommandMailbox->SCSI_10.SCSI_CDB[2] = Command->BlockNumber >> 24;
+  CommandMailbox->SCSI_10.SCSI_CDB[3] = Command->BlockNumber >> 16;
+  CommandMailbox->SCSI_10.SCSI_CDB[4] = Command->BlockNumber >> 8;
+  CommandMailbox->SCSI_10.SCSI_CDB[5] = Command->BlockNumber;
+  CommandMailbox->SCSI_10.SCSI_CDB[7] = Command->BlockCount >> 8;
+  CommandMailbox->SCSI_10.SCSI_CDB[8] = Command->BlockCount;
+
+  if (Command->SegmentCount == 1)
+    {
+      CommandMailbox->SCSI_10.DataTransferMemoryAddress
+			     .ScatterGatherSegments[0]
+			     .SegmentDataPointer =
+	(DAC960_BusAddress64_T)sg_dma_address(ScatterList);
+      CommandMailbox->SCSI_10.DataTransferMemoryAddress
+			     .ScatterGatherSegments[0]
+			     .SegmentByteCount =
+	CommandMailbox->SCSI_10.DataTransferSize;
+    }
+  else
+    {
+      DAC960_V2_ScatterGatherSegment_T *ScatterGatherList;
+      int i;
+
+      if (Command->SegmentCount > 2)
+	{
+          ScatterGatherList = Command->V2.ScatterGatherList;
+	  CommandMailbox->SCSI_10.CommandControlBits
+			 .AdditionalScatterGatherListMemory = true;
+	  CommandMailbox->SCSI_10.DataTransferMemoryAddress
+		.ExtendedScatterGather.ScatterGatherList0Length = Command->SegmentCount;
+	  CommandMailbox->SCSI_10.DataTransferMemoryAddress
+			 .ExtendedScatterGather.ScatterGatherList0Address =
+	    Command->V2.ScatterGatherListDMA;
+	}
+      else
+	ScatterGatherList = CommandMailbox->SCSI_10.DataTransferMemoryAddress
+				 .ScatterGatherSegments;
+
+      for (i = 0; i < Command->SegmentCount; i++, ScatterList++, ScatterGatherList++) {
+		ScatterGatherList->SegmentDataPointer =
+			(DAC960_BusAddress64_T)sg_dma_address(ScatterList);
+		ScatterGatherList->SegmentByteCount =
+			(DAC960_ByteCount64_T)sg_dma_len(ScatterList);
+      }
+    }
+  DAC960_QueueCommand(Command);
+}
+
+
+static int DAC960_process_queue(DAC960_Controller_T *Controller, struct request_queue *req_q)
+{
+	struct request *Request;
+	DAC960_Command_T *Command;
+
+   while(1) {
+	Request = blk_peek_request(req_q);
+	if (!Request)
+		return 1;
+
+	Command = DAC960_AllocateCommand(Controller);
+	if (Command == NULL)
+		return 0;
+
+	if (rq_data_dir(Request) == READ) {
+		Command->DmaDirection = PCI_DMA_FROMDEVICE;
+		Command->CommandType = DAC960_ReadCommand;
+	} else {
+		Command->DmaDirection = PCI_DMA_TODEVICE;
+		Command->CommandType = DAC960_WriteCommand;
+	}
+	Command->Completion = Request->end_io_data;
+	Command->LogicalDriveNumber = (long)Request->rq_disk->private_data;
+	Command->BlockNumber = blk_rq_pos(Request);
+	Command->BlockCount = blk_rq_sectors(Request);
+	Command->Request = Request;
+	blk_start_request(Request);
+	Command->SegmentCount = blk_rq_map_sg(req_q,
+		  Command->Request, Command->cmd_sglist);
+	/* pci_map_sg MAY change the value of SegCount */
+	Command->SegmentCount = pci_map_sg(Controller->PCIDevice, Command->cmd_sglist,
+		 Command->SegmentCount, Command->DmaDirection);
+
+	DAC960_QueueReadWriteCommand(Command);
+  }
+}
+
+/*
+  DAC960_ProcessRequest attempts to remove one I/O Request from Controller's
+  I/O Request Queue and queues it to the Controller.  WaitForCommand is true if
+  this function should wait for a Command to become available if necessary.
+  This function returns true if an I/O Request was queued and false otherwise.
+*/
+static void DAC960_ProcessRequest(DAC960_Controller_T *controller)
+{
+	int i;
+
+	if (!controller->ControllerInitialized)
+		return;
+
+	/* Do this better later! */
+	for (i = controller->req_q_index; i < DAC960_MaxLogicalDrives; i++) {
+		struct request_queue *req_q = controller->RequestQueue[i];
+
+		if (req_q == NULL)
+			continue;
+
+		if (!DAC960_process_queue(controller, req_q)) {
+			controller->req_q_index = i;
+			return;
+		}
+	}
+
+	if (controller->req_q_index == 0)
+		return;
+
+	for (i = 0; i < controller->req_q_index; i++) {
+		struct request_queue *req_q = controller->RequestQueue[i];
+
+		if (req_q == NULL)
+			continue;
+
+		if (!DAC960_process_queue(controller, req_q)) {
+			controller->req_q_index = i;
+			return;
+		}
+	}
+}
+
+
+/*
+  DAC960_queue_partial_rw extracts one bio from the request already
+  associated with argument command, and construct a new command block to retry I/O
+  only on that bio.  Queue that command to the controller.
+
+  This function re-uses a previously-allocated Command,
+  	there is no failure mode from trying to allocate a command.
+*/
+
+static void DAC960_queue_partial_rw(DAC960_Command_T *Command)
+{
+  DAC960_Controller_T *Controller = Command->Controller;
+  struct request *Request = Command->Request;
+  struct request_queue *req_q = Controller->RequestQueue[Command->LogicalDriveNumber];
+
+  if (Command->DmaDirection == PCI_DMA_FROMDEVICE)
+    Command->CommandType = DAC960_ReadRetryCommand;
+  else
+    Command->CommandType = DAC960_WriteRetryCommand;
+
+  /*
+   * We could be more efficient with these mapping requests
+   * and map only the portions that we need.  But since this
+   * code should almost never be called, just go with a
+   * simple coding.
+   */
+  (void)blk_rq_map_sg(req_q, Command->Request, Command->cmd_sglist);
+
+  (void)pci_map_sg(Controller->PCIDevice, Command->cmd_sglist, 1, Command->DmaDirection);
+  /*
+   * Resubmitting the request sector at a time is really tedious.
+   * But, this should almost never happen.  So, we're willing to pay
+   * this price so that in the end, as much of the transfer is completed
+   * successfully as possible.
+   */
+  Command->SegmentCount = 1;
+  Command->BlockNumber = blk_rq_pos(Request);
+  Command->BlockCount = 1;
+  DAC960_QueueReadWriteCommand(Command);
+  return;
+}
+
+/*
+  DAC960_RequestFunction is the I/O Request Function for DAC960 Controllers.
+*/
+
+static void DAC960_RequestFunction(struct request_queue *RequestQueue)
+{
+	DAC960_ProcessRequest(RequestQueue->queuedata);
+}
+
+/*
+  DAC960_ProcessCompletedBuffer performs completion processing for an
+  individual Buffer.
+*/
+
+static inline bool DAC960_ProcessCompletedRequest(DAC960_Command_T *Command,
+						 bool SuccessfulIO)
+{
+	struct request *Request = Command->Request;
+	int Error = SuccessfulIO ? 0 : -EIO;
+
+	pci_unmap_sg(Command->Controller->PCIDevice, Command->cmd_sglist,
+		Command->SegmentCount, Command->DmaDirection);
+
+	 if (!__blk_end_request(Request, Error, Command->BlockCount << 9)) {
+		if (Command->Completion) {
+			complete(Command->Completion);
+			Command->Completion = NULL;
+		}
+		return true;
+	}
+	return false;
+}
+
+/*
+  DAC960_V1_ReadWriteError prints an appropriate error message for Command
+  when an error occurs on a Read or Write operation.
+*/
+
+static void DAC960_V1_ReadWriteError(DAC960_Command_T *Command)
+{
+  DAC960_Controller_T *Controller = Command->Controller;
+  unsigned char *CommandName = "UNKNOWN";
+  switch (Command->CommandType)
+    {
+    case DAC960_ReadCommand:
+    case DAC960_ReadRetryCommand:
+      CommandName = "READ";
+      break;
+    case DAC960_WriteCommand:
+    case DAC960_WriteRetryCommand:
+      CommandName = "WRITE";
+      break;
+    case DAC960_MonitoringCommand:
+    case DAC960_ImmediateCommand:
+    case DAC960_QueuedCommand:
+      break;
+    }
+  switch (Command->V1.CommandStatus)
+    {
+    case DAC960_V1_IrrecoverableDataError:
+      DAC960_Error("Irrecoverable Data Error on %s:\n",
+		   Controller, CommandName);
+      break;
+    case DAC960_V1_LogicalDriveNonexistentOrOffline:
+      DAC960_Error("Logical Drive Nonexistent or Offline on %s:\n",
+		   Controller, CommandName);
+      break;
+    case DAC960_V1_AccessBeyondEndOfLogicalDrive:
+      DAC960_Error("Attempt to Access Beyond End of Logical Drive "
+		   "on %s:\n", Controller, CommandName);
+      break;
+    case DAC960_V1_BadDataEncountered:
+      DAC960_Error("Bad Data Encountered on %s:\n", Controller, CommandName);
+      break;
+    default:
+      DAC960_Error("Unexpected Error Status %04X on %s:\n",
+		   Controller, Command->V1.CommandStatus, CommandName);
+      break;
+    }
+  DAC960_Error("  /dev/rd/c%dd%d:   absolute blocks %u..%u\n",
+	       Controller, Controller->ControllerNumber,
+	       Command->LogicalDriveNumber, Command->BlockNumber,
+	       Command->BlockNumber + Command->BlockCount - 1);
+}
+
+
+/*
+  DAC960_V1_ProcessCompletedCommand performs completion processing for Command
+  for DAC960 V1 Firmware Controllers.
+*/
+
+static void DAC960_V1_ProcessCompletedCommand(DAC960_Command_T *Command)
+{
+  DAC960_Controller_T *Controller = Command->Controller;
+  DAC960_CommandType_T CommandType = Command->CommandType;
+  DAC960_V1_CommandOpcode_T CommandOpcode =
+    Command->V1.CommandMailbox.Common.CommandOpcode;
+  DAC960_V1_CommandStatus_T CommandStatus = Command->V1.CommandStatus;
+
+  if (CommandType == DAC960_ReadCommand ||
+      CommandType == DAC960_WriteCommand)
+    {
+
+#ifdef FORCE_RETRY_DEBUG
+      CommandStatus = DAC960_V1_IrrecoverableDataError;
+#endif
+
+      if (CommandStatus == DAC960_V1_NormalCompletion) {
+
+		if (!DAC960_ProcessCompletedRequest(Command, true))
+			BUG();
+
+      } else if (CommandStatus == DAC960_V1_IrrecoverableDataError ||
+		CommandStatus == DAC960_V1_BadDataEncountered)
+	{
+	  /*
+	   * break the command down into pieces and resubmit each
+	   * piece, hoping that some of them will succeed.
+	   */
+	   DAC960_queue_partial_rw(Command);
+	   return;
+	}
+      else
+	{
+	  if (CommandStatus != DAC960_V1_LogicalDriveNonexistentOrOffline)
+	    DAC960_V1_ReadWriteError(Command);
+
+	 if (!DAC960_ProcessCompletedRequest(Command, false))
+		BUG();
+	}
+    }
+  else if (CommandType == DAC960_ReadRetryCommand ||
+	   CommandType == DAC960_WriteRetryCommand)
+    {
+      bool normal_completion;
+#ifdef FORCE_RETRY_FAILURE_DEBUG
+      static int retry_count = 1;
+#endif
+      /*
+        Perform completion processing for the portion that was
+        retried, and submit the next portion, if any.
+      */
+      normal_completion = true;
+      if (CommandStatus != DAC960_V1_NormalCompletion) {
+        normal_completion = false;
+        if (CommandStatus != DAC960_V1_LogicalDriveNonexistentOrOffline)
+            DAC960_V1_ReadWriteError(Command);
+      }
+
+#ifdef FORCE_RETRY_FAILURE_DEBUG
+      if (!(++retry_count % 10000)) {
+	      printk("V1 error retry failure test\n");
+	      normal_completion = false;
+              DAC960_V1_ReadWriteError(Command);
+      }
+#endif
+
+      if (!DAC960_ProcessCompletedRequest(Command, normal_completion)) {
+        DAC960_queue_partial_rw(Command);
+        return;
+      }
+    }
+
+  else if (CommandType == DAC960_MonitoringCommand)
+    {
+      if (Controller->ShutdownMonitoringTimer)
+	      return;
+      if (CommandOpcode == DAC960_V1_Enquiry)
+	{
+	  DAC960_V1_Enquiry_T *OldEnquiry = &Controller->V1.Enquiry;
+	  DAC960_V1_Enquiry_T *NewEnquiry = Controller->V1.NewEnquiry;
+	  unsigned int OldCriticalLogicalDriveCount =
+	    OldEnquiry->CriticalLogicalDriveCount;
+	  unsigned int NewCriticalLogicalDriveCount =
+	    NewEnquiry->CriticalLogicalDriveCount;
+	  if (NewEnquiry->NumberOfLogicalDrives > Controller->LogicalDriveCount)
+	    {
+	      int LogicalDriveNumber = Controller->LogicalDriveCount - 1;
+	      while (++LogicalDriveNumber < NewEnquiry->NumberOfLogicalDrives)
+		DAC960_Critical("Logical Drive %d (/dev/rd/c%dd%d) "
+				"Now Exists\n", Controller,
+				LogicalDriveNumber,
+				Controller->ControllerNumber,
+				LogicalDriveNumber);
+	      Controller->LogicalDriveCount = NewEnquiry->NumberOfLogicalDrives;
+	      DAC960_ComputeGenericDiskInfo(Controller);
+	    }
+	  if (NewEnquiry->NumberOfLogicalDrives < Controller->LogicalDriveCount)
+	    {
+	      int LogicalDriveNumber = NewEnquiry->NumberOfLogicalDrives - 1;
+	      while (++LogicalDriveNumber < Controller->LogicalDriveCount)
+		DAC960_Critical("Logical Drive %d (/dev/rd/c%dd%d) "
+				"No Longer Exists\n", Controller,
+				LogicalDriveNumber,
+				Controller->ControllerNumber,
+				LogicalDriveNumber);
+	      Controller->LogicalDriveCount = NewEnquiry->NumberOfLogicalDrives;
+	      DAC960_ComputeGenericDiskInfo(Controller);
+	    }
+	  if (NewEnquiry->StatusFlags.DeferredWriteError !=
+	      OldEnquiry->StatusFlags.DeferredWriteError)
+	    DAC960_Critical("Deferred Write Error Flag is now %s\n", Controller,
+			    (NewEnquiry->StatusFlags.DeferredWriteError
+			     ? "TRUE" : "FALSE"));
+	  if ((NewCriticalLogicalDriveCount > 0 ||
+	       NewCriticalLogicalDriveCount != OldCriticalLogicalDriveCount) ||
+	      (NewEnquiry->OfflineLogicalDriveCount > 0 ||
+	       NewEnquiry->OfflineLogicalDriveCount !=
+	       OldEnquiry->OfflineLogicalDriveCount) ||
+	      (NewEnquiry->DeadDriveCount > 0 ||
+	       NewEnquiry->DeadDriveCount !=
+	       OldEnquiry->DeadDriveCount) ||
+	      (NewEnquiry->EventLogSequenceNumber !=
+	       OldEnquiry->EventLogSequenceNumber) ||
+	      Controller->MonitoringTimerCount == 0 ||
+	      time_after_eq(jiffies, Controller->SecondaryMonitoringTime
+	       + DAC960_SecondaryMonitoringInterval))
+	    {
+	      Controller->V1.NeedLogicalDriveInformation = true;
+	      Controller->V1.NewEventLogSequenceNumber =
+		NewEnquiry->EventLogSequenceNumber;
+	      Controller->V1.NeedErrorTableInformation = true;
+	      Controller->V1.NeedDeviceStateInformation = true;
+	      Controller->V1.StartDeviceStateScan = true;
+	      Controller->V1.NeedBackgroundInitializationStatus =
+		Controller->V1.BackgroundInitializationStatusSupported;
+	      Controller->SecondaryMonitoringTime = jiffies;
+	    }
+	  if (NewEnquiry->RebuildFlag == DAC960_V1_StandbyRebuildInProgress ||
+	      NewEnquiry->RebuildFlag
+	      == DAC960_V1_BackgroundRebuildInProgress ||
+	      OldEnquiry->RebuildFlag == DAC960_V1_StandbyRebuildInProgress ||
+	      OldEnquiry->RebuildFlag == DAC960_V1_BackgroundRebuildInProgress)
+	    {
+	      Controller->V1.NeedRebuildProgress = true;
+	      Controller->V1.RebuildProgressFirst =
+		(NewEnquiry->CriticalLogicalDriveCount <
+		 OldEnquiry->CriticalLogicalDriveCount);
+	    }
+	  if (OldEnquiry->RebuildFlag == DAC960_V1_BackgroundCheckInProgress)
+	    switch (NewEnquiry->RebuildFlag)
+	      {
+	      case DAC960_V1_NoStandbyRebuildOrCheckInProgress:
+		DAC960_Progress("Consistency Check Completed Successfully\n",
+				Controller);
+		break;
+	      case DAC960_V1_StandbyRebuildInProgress:
+	      case DAC960_V1_BackgroundRebuildInProgress:
+		break;
+	      case DAC960_V1_BackgroundCheckInProgress:
+		Controller->V1.NeedConsistencyCheckProgress = true;
+		break;
+	      case DAC960_V1_StandbyRebuildCompletedWithError:
+		DAC960_Progress("Consistency Check Completed with Error\n",
+				Controller);
+		break;
+	      case DAC960_V1_BackgroundRebuildOrCheckFailed_DriveFailed:
+		DAC960_Progress("Consistency Check Failed - "
+				"Physical Device Failed\n", Controller);
+		break;
+	      case DAC960_V1_BackgroundRebuildOrCheckFailed_LogicalDriveFailed:
+		DAC960_Progress("Consistency Check Failed - "
+				"Logical Drive Failed\n", Controller);
+		break;
+	      case DAC960_V1_BackgroundRebuildOrCheckFailed_OtherCauses:
+		DAC960_Progress("Consistency Check Failed - Other Causes\n",
+				Controller);
+		break;
+	      case DAC960_V1_BackgroundRebuildOrCheckSuccessfullyTerminated:
+		DAC960_Progress("Consistency Check Successfully Terminated\n",
+				Controller);
+		break;
+	      }
+	  else if (NewEnquiry->RebuildFlag
+		   == DAC960_V1_BackgroundCheckInProgress)
+	    Controller->V1.NeedConsistencyCheckProgress = true;
+	  Controller->MonitoringAlertMode =
+	    (NewEnquiry->CriticalLogicalDriveCount > 0 ||
+	     NewEnquiry->OfflineLogicalDriveCount > 0 ||
+	     NewEnquiry->DeadDriveCount > 0);
+	  if (NewEnquiry->RebuildFlag > DAC960_V1_BackgroundCheckInProgress)
+	    {
+	      Controller->V1.PendingRebuildFlag = NewEnquiry->RebuildFlag;
+	      Controller->V1.RebuildFlagPending = true;
+	    }
+	  memcpy(&Controller->V1.Enquiry, &Controller->V1.NewEnquiry,
+		 sizeof(DAC960_V1_Enquiry_T));
+	}
+      else if (CommandOpcode == DAC960_V1_PerformEventLogOperation)
+	{
+	  static char
+	    *DAC960_EventMessages[] =
+	       { "killed because write recovery failed",
+		 "killed because of SCSI bus reset failure",
+		 "killed because of double check condition",
+		 "killed because it was removed",
+		 "killed because of gross error on SCSI chip",
+		 "killed because of bad tag returned from drive",
+		 "killed because of timeout on SCSI command",
+		 "killed because of reset SCSI command issued from system",
+		 "killed because busy or parity error count exceeded limit",
+		 "killed because of 'kill drive' command from system",
+		 "killed because of selection timeout",
+		 "killed due to SCSI phase sequence error",
+		 "killed due to unknown status" };
+	  DAC960_V1_EventLogEntry_T *EventLogEntry =
+	    	Controller->V1.EventLogEntry;
+	  if (EventLogEntry->SequenceNumber ==
+	      Controller->V1.OldEventLogSequenceNumber)
+	    {
+	      unsigned char SenseKey = EventLogEntry->SenseKey;
+	      unsigned char AdditionalSenseCode =
+		EventLogEntry->AdditionalSenseCode;
+	      unsigned char AdditionalSenseCodeQualifier =
+		EventLogEntry->AdditionalSenseCodeQualifier;
+	      if (SenseKey == DAC960_SenseKey_VendorSpecific &&
+		  AdditionalSenseCode == 0x80 &&
+		  AdditionalSenseCodeQualifier <
+		  ARRAY_SIZE(DAC960_EventMessages))
+		DAC960_Critical("Physical Device %d:%d %s\n", Controller,
+				EventLogEntry->Channel,
+				EventLogEntry->TargetID,
+				DAC960_EventMessages[
+				  AdditionalSenseCodeQualifier]);
+	      else if (SenseKey == DAC960_SenseKey_UnitAttention &&
+		       AdditionalSenseCode == 0x29)
+		{
+		  if (Controller->MonitoringTimerCount > 0)
+		    Controller->V1.DeviceResetCount[EventLogEntry->Channel]
+						   [EventLogEntry->TargetID]++;
+		}
+	      else if (!(SenseKey == DAC960_SenseKey_NoSense ||
+			 (SenseKey == DAC960_SenseKey_NotReady &&
+			  AdditionalSenseCode == 0x04 &&
+			  (AdditionalSenseCodeQualifier == 0x01 ||
+			   AdditionalSenseCodeQualifier == 0x02))))
+		{
+		  DAC960_Critical("Physical Device %d:%d Error Log: "
+				  "Sense Key = %X, ASC = %02X, ASCQ = %02X\n",
+				  Controller,
+				  EventLogEntry->Channel,
+				  EventLogEntry->TargetID,
+				  SenseKey,
+				  AdditionalSenseCode,
+				  AdditionalSenseCodeQualifier);
+		  DAC960_Critical("Physical Device %d:%d Error Log: "
+				  "Information = %02X%02X%02X%02X "
+				  "%02X%02X%02X%02X\n",
+				  Controller,
+				  EventLogEntry->Channel,
+				  EventLogEntry->TargetID,
+				  EventLogEntry->Information[0],
+				  EventLogEntry->Information[1],
+				  EventLogEntry->Information[2],
+				  EventLogEntry->Information[3],
+				  EventLogEntry->CommandSpecificInformation[0],
+				  EventLogEntry->CommandSpecificInformation[1],
+				  EventLogEntry->CommandSpecificInformation[2],
+				  EventLogEntry->CommandSpecificInformation[3]);
+		}
+	    }
+	  Controller->V1.OldEventLogSequenceNumber++;
+	}
+      else if (CommandOpcode == DAC960_V1_GetErrorTable)
+	{
+	  DAC960_V1_ErrorTable_T *OldErrorTable = &Controller->V1.ErrorTable;
+	  DAC960_V1_ErrorTable_T *NewErrorTable = Controller->V1.NewErrorTable;
+	  int Channel, TargetID;
+	  for (Channel = 0; Channel < Controller->Channels; Channel++)
+	    for (TargetID = 0; TargetID < Controller->Targets; TargetID++)
+	      {
+		DAC960_V1_ErrorTableEntry_T *NewErrorEntry =
+		  &NewErrorTable->ErrorTableEntries[Channel][TargetID];
+		DAC960_V1_ErrorTableEntry_T *OldErrorEntry =
+		  &OldErrorTable->ErrorTableEntries[Channel][TargetID];
+		if ((NewErrorEntry->ParityErrorCount !=
+		     OldErrorEntry->ParityErrorCount) ||
+		    (NewErrorEntry->SoftErrorCount !=
+		     OldErrorEntry->SoftErrorCount) ||
+		    (NewErrorEntry->HardErrorCount !=
+		     OldErrorEntry->HardErrorCount) ||
+		    (NewErrorEntry->MiscErrorCount !=
+		     OldErrorEntry->MiscErrorCount))
+		  DAC960_Critical("Physical Device %d:%d Errors: "
+				  "Parity = %d, Soft = %d, "
+				  "Hard = %d, Misc = %d\n",
+				  Controller, Channel, TargetID,
+				  NewErrorEntry->ParityErrorCount,
+				  NewErrorEntry->SoftErrorCount,
+				  NewErrorEntry->HardErrorCount,
+				  NewErrorEntry->MiscErrorCount);
+	      }
+	  memcpy(&Controller->V1.ErrorTable, Controller->V1.NewErrorTable,
+		 sizeof(DAC960_V1_ErrorTable_T));
+	}
+      else if (CommandOpcode == DAC960_V1_GetDeviceState)
+	{
+	  DAC960_V1_DeviceState_T *OldDeviceState =
+	    &Controller->V1.DeviceState[Controller->V1.DeviceStateChannel]
+				       [Controller->V1.DeviceStateTargetID];
+	  DAC960_V1_DeviceState_T *NewDeviceState =
+	    Controller->V1.NewDeviceState;
+	  if (NewDeviceState->DeviceState != OldDeviceState->DeviceState)
+	    DAC960_Critical("Physical Device %d:%d is now %s\n", Controller,
+			    Controller->V1.DeviceStateChannel,
+			    Controller->V1.DeviceStateTargetID,
+			    (NewDeviceState->DeviceState
+			     == DAC960_V1_Device_Dead
+			     ? "DEAD"
+			     : NewDeviceState->DeviceState
+			       == DAC960_V1_Device_WriteOnly
+			       ? "WRITE-ONLY"
+			       : NewDeviceState->DeviceState
+				 == DAC960_V1_Device_Online
+				 ? "ONLINE" : "STANDBY"));
+	  if (OldDeviceState->DeviceState == DAC960_V1_Device_Dead &&
+	      NewDeviceState->DeviceState != DAC960_V1_Device_Dead)
+	    {
+	      Controller->V1.NeedDeviceInquiryInformation = true;
+	      Controller->V1.NeedDeviceSerialNumberInformation = true;
+	      Controller->V1.DeviceResetCount
+			     [Controller->V1.DeviceStateChannel]
+			     [Controller->V1.DeviceStateTargetID] = 0;
+	    }
+	  memcpy(OldDeviceState, NewDeviceState,
+		 sizeof(DAC960_V1_DeviceState_T));
+	}
+      else if (CommandOpcode == DAC960_V1_GetLogicalDriveInformation)
+	{
+	  int LogicalDriveNumber;
+	  for (LogicalDriveNumber = 0;
+	       LogicalDriveNumber < Controller->LogicalDriveCount;
+	       LogicalDriveNumber++)
+	    {
+	      DAC960_V1_LogicalDriveInformation_T *OldLogicalDriveInformation =
+		&Controller->V1.LogicalDriveInformation[LogicalDriveNumber];
+	      DAC960_V1_LogicalDriveInformation_T *NewLogicalDriveInformation =
+		&(*Controller->V1.NewLogicalDriveInformation)[LogicalDriveNumber];
+	      if (NewLogicalDriveInformation->LogicalDriveState !=
+		  OldLogicalDriveInformation->LogicalDriveState)
+		DAC960_Critical("Logical Drive %d (/dev/rd/c%dd%d) "
+				"is now %s\n", Controller,
+				LogicalDriveNumber,
+				Controller->ControllerNumber,
+				LogicalDriveNumber,
+				(NewLogicalDriveInformation->LogicalDriveState
+				 == DAC960_V1_LogicalDrive_Online
+				 ? "ONLINE"
+				 : NewLogicalDriveInformation->LogicalDriveState
+				   == DAC960_V1_LogicalDrive_Critical
+				   ? "CRITICAL" : "OFFLINE"));
+	      if (NewLogicalDriveInformation->WriteBack !=
+		  OldLogicalDriveInformation->WriteBack)
+		DAC960_Critical("Logical Drive %d (/dev/rd/c%dd%d) "
+				"is now %s\n", Controller,
+				LogicalDriveNumber,
+				Controller->ControllerNumber,
+				LogicalDriveNumber,
+				(NewLogicalDriveInformation->WriteBack
+				 ? "WRITE BACK" : "WRITE THRU"));
+	    }
+	  memcpy(&Controller->V1.LogicalDriveInformation,
+		 Controller->V1.NewLogicalDriveInformation,
+		 sizeof(DAC960_V1_LogicalDriveInformationArray_T));
+	}
+      else if (CommandOpcode == DAC960_V1_GetRebuildProgress)
+	{
+	  unsigned int LogicalDriveNumber =
+	    Controller->V1.RebuildProgress->LogicalDriveNumber;
+	  unsigned int LogicalDriveSize =
+	    Controller->V1.RebuildProgress->LogicalDriveSize;
+	  unsigned int BlocksCompleted =
+	    LogicalDriveSize - Controller->V1.RebuildProgress->RemainingBlocks;
+	  if (CommandStatus == DAC960_V1_NoRebuildOrCheckInProgress &&
+	      Controller->V1.LastRebuildStatus == DAC960_V1_NormalCompletion)
+	    CommandStatus = DAC960_V1_RebuildSuccessful;
+	  switch (CommandStatus)
+	    {
+	    case DAC960_V1_NormalCompletion:
+	      Controller->EphemeralProgressMessage = true;
+	      DAC960_Progress("Rebuild in Progress: "
+			      "Logical Drive %d (/dev/rd/c%dd%d) "
+			      "%d%% completed\n",
+			      Controller, LogicalDriveNumber,
+			      Controller->ControllerNumber,
+			      LogicalDriveNumber,
+			      (100 * (BlocksCompleted >> 7))
+			      / (LogicalDriveSize >> 7));
+	      Controller->EphemeralProgressMessage = false;
+	      break;
+	    case DAC960_V1_RebuildFailed_LogicalDriveFailure:
+	      DAC960_Progress("Rebuild Failed due to "
+			      "Logical Drive Failure\n", Controller);
+	      break;
+	    case DAC960_V1_RebuildFailed_BadBlocksOnOther:
+	      DAC960_Progress("Rebuild Failed due to "
+			      "Bad Blocks on Other Drives\n", Controller);
+	      break;
+	    case DAC960_V1_RebuildFailed_NewDriveFailed:
+	      DAC960_Progress("Rebuild Failed due to "
+			      "Failure of Drive Being Rebuilt\n", Controller);
+	      break;
+	    case DAC960_V1_NoRebuildOrCheckInProgress:
+	      break;
+	    case DAC960_V1_RebuildSuccessful:
+	      DAC960_Progress("Rebuild Completed Successfully\n", Controller);
+	      break;
+	    case DAC960_V1_RebuildSuccessfullyTerminated:
+	      DAC960_Progress("Rebuild Successfully Terminated\n", Controller);
+	      break;
+	    }
+	  Controller->V1.LastRebuildStatus = CommandStatus;
+	  if (CommandType != DAC960_MonitoringCommand &&
+	      Controller->V1.RebuildStatusPending)
+	    {
+	      Command->V1.CommandStatus = Controller->V1.PendingRebuildStatus;
+	      Controller->V1.RebuildStatusPending = false;
+	    }
+	  else if (CommandType == DAC960_MonitoringCommand &&
+		   CommandStatus != DAC960_V1_NormalCompletion &&
+		   CommandStatus != DAC960_V1_NoRebuildOrCheckInProgress)
+	    {
+	      Controller->V1.PendingRebuildStatus = CommandStatus;
+	      Controller->V1.RebuildStatusPending = true;
+	    }
+	}
+      else if (CommandOpcode == DAC960_V1_RebuildStat)
+	{
+	  unsigned int LogicalDriveNumber =
+	    Controller->V1.RebuildProgress->LogicalDriveNumber;
+	  unsigned int LogicalDriveSize =
+	    Controller->V1.RebuildProgress->LogicalDriveSize;
+	  unsigned int BlocksCompleted =
+	    LogicalDriveSize - Controller->V1.RebuildProgress->RemainingBlocks;
+	  if (CommandStatus == DAC960_V1_NormalCompletion)
+	    {
+	      Controller->EphemeralProgressMessage = true;
+	      DAC960_Progress("Consistency Check in Progress: "
+			      "Logical Drive %d (/dev/rd/c%dd%d) "
+			      "%d%% completed\n",
+			      Controller, LogicalDriveNumber,
+			      Controller->ControllerNumber,
+			      LogicalDriveNumber,
+			      (100 * (BlocksCompleted >> 7))
+			      / (LogicalDriveSize >> 7));
+	      Controller->EphemeralProgressMessage = false;
+	    }
+	}
+      else if (CommandOpcode == DAC960_V1_BackgroundInitializationControl)
+	{
+	  unsigned int LogicalDriveNumber =
+	    Controller->V1.BackgroundInitializationStatus->LogicalDriveNumber;
+	  unsigned int LogicalDriveSize =
+	    Controller->V1.BackgroundInitializationStatus->LogicalDriveSize;
+	  unsigned int BlocksCompleted =
+	    Controller->V1.BackgroundInitializationStatus->BlocksCompleted;
+	  switch (CommandStatus)
+	    {
+	    case DAC960_V1_NormalCompletion:
+	      switch (Controller->V1.BackgroundInitializationStatus->Status)
+		{
+		case DAC960_V1_BackgroundInitializationInvalid:
+		  break;
+		case DAC960_V1_BackgroundInitializationStarted:
+		  DAC960_Progress("Background Initialization Started\n",
+				  Controller);
+		  break;
+		case DAC960_V1_BackgroundInitializationInProgress:
+		  if (BlocksCompleted ==
+		      Controller->V1.LastBackgroundInitializationStatus.
+				BlocksCompleted &&
+		      LogicalDriveNumber ==
+		      Controller->V1.LastBackgroundInitializationStatus.
+				LogicalDriveNumber)
+		    break;
+		  Controller->EphemeralProgressMessage = true;
+		  DAC960_Progress("Background Initialization in Progress: "
+				  "Logical Drive %d (/dev/rd/c%dd%d) "
+				  "%d%% completed\n",
+				  Controller, LogicalDriveNumber,
+				  Controller->ControllerNumber,
+				  LogicalDriveNumber,
+				  (100 * (BlocksCompleted >> 7))
+				  / (LogicalDriveSize >> 7));
+		  Controller->EphemeralProgressMessage = false;
+		  break;
+		case DAC960_V1_BackgroundInitializationSuspended:
+		  DAC960_Progress("Background Initialization Suspended\n",
+				  Controller);
+		  break;
+		case DAC960_V1_BackgroundInitializationCancelled:
+		  DAC960_Progress("Background Initialization Cancelled\n",
+				  Controller);
+		  break;
+		}
+	      memcpy(&Controller->V1.LastBackgroundInitializationStatus,
+		     Controller->V1.BackgroundInitializationStatus,
+		     sizeof(DAC960_V1_BackgroundInitializationStatus_T));
+	      break;
+	    case DAC960_V1_BackgroundInitSuccessful:
+	      if (Controller->V1.BackgroundInitializationStatus->Status ==
+		  DAC960_V1_BackgroundInitializationInProgress)
+		DAC960_Progress("Background Initialization "
+				"Completed Successfully\n", Controller);
+	      Controller->V1.BackgroundInitializationStatus->Status =
+		DAC960_V1_BackgroundInitializationInvalid;
+	      break;
+	    case DAC960_V1_BackgroundInitAborted:
+	      if (Controller->V1.BackgroundInitializationStatus->Status ==
+		  DAC960_V1_BackgroundInitializationInProgress)
+		DAC960_Progress("Background Initialization Aborted\n",
+				Controller);
+	      Controller->V1.BackgroundInitializationStatus->Status =
+		DAC960_V1_BackgroundInitializationInvalid;
+	      break;
+	    case DAC960_V1_NoBackgroundInitInProgress:
+	      break;
+	    }
+	} 
+      else if (CommandOpcode == DAC960_V1_DCDB)
+	{
+	   /*
+	     This is a bit ugly.
+
+	     The InquiryStandardData and 
+	     the InquiryUntitSerialNumber information
+	     retrieval operations BOTH use the DAC960_V1_DCDB
+	     commands.  the test above can't distinguish between
+	     these two cases.
+
+	     Instead, we rely on the order of code later in this
+             function to ensure that DeviceInquiryInformation commands
+             are submitted before DeviceSerialNumber commands.
+	   */
+	   if (Controller->V1.NeedDeviceInquiryInformation)
+	     {
+	        DAC960_SCSI_Inquiry_T *InquiryStandardData =
+			&Controller->V1.InquiryStandardData
+				[Controller->V1.DeviceStateChannel]
+				[Controller->V1.DeviceStateTargetID];
+	        if (CommandStatus != DAC960_V1_NormalCompletion)
+		   {
+			memset(InquiryStandardData, 0,
+				sizeof(DAC960_SCSI_Inquiry_T));
+	      		InquiryStandardData->PeripheralDeviceType = 0x1F;
+		    }
+	         else
+			memcpy(InquiryStandardData, 
+				Controller->V1.NewInquiryStandardData,
+				sizeof(DAC960_SCSI_Inquiry_T));
+	         Controller->V1.NeedDeviceInquiryInformation = false;
+              }
+	   else if (Controller->V1.NeedDeviceSerialNumberInformation) 
+              {
+	        DAC960_SCSI_Inquiry_UnitSerialNumber_T *InquiryUnitSerialNumber =
+		  &Controller->V1.InquiryUnitSerialNumber
+				[Controller->V1.DeviceStateChannel]
+				[Controller->V1.DeviceStateTargetID];
+	         if (CommandStatus != DAC960_V1_NormalCompletion)
+		   {
+			memset(InquiryUnitSerialNumber, 0,
+				sizeof(DAC960_SCSI_Inquiry_UnitSerialNumber_T));
+	      		InquiryUnitSerialNumber->PeripheralDeviceType = 0x1F;
+		    }
+	          else
+			memcpy(InquiryUnitSerialNumber, 
+				Controller->V1.NewInquiryUnitSerialNumber,
+				sizeof(DAC960_SCSI_Inquiry_UnitSerialNumber_T));
+	      Controller->V1.NeedDeviceSerialNumberInformation = false;
+	     }
+	}
+      /*
+        Begin submitting new monitoring commands.
+       */
+      if (Controller->V1.NewEventLogSequenceNumber
+	  - Controller->V1.OldEventLogSequenceNumber > 0)
+	{
+	  Command->V1.CommandMailbox.Type3E.CommandOpcode =
+	    DAC960_V1_PerformEventLogOperation;
+	  Command->V1.CommandMailbox.Type3E.OperationType =
+	    DAC960_V1_GetEventLogEntry;
+	  Command->V1.CommandMailbox.Type3E.OperationQualifier = 1;
+	  Command->V1.CommandMailbox.Type3E.SequenceNumber =
+	    Controller->V1.OldEventLogSequenceNumber;
+	  Command->V1.CommandMailbox.Type3E.BusAddress =
+	    	Controller->V1.EventLogEntryDMA;
+	  DAC960_QueueCommand(Command);
+	  return;
+	}
+      if (Controller->V1.NeedErrorTableInformation)
+	{
+	  Controller->V1.NeedErrorTableInformation = false;
+	  Command->V1.CommandMailbox.Type3.CommandOpcode =
+	    DAC960_V1_GetErrorTable;
+	  Command->V1.CommandMailbox.Type3.BusAddress =
+	    	Controller->V1.NewErrorTableDMA;
+	  DAC960_QueueCommand(Command);
+	  return;
+	}
+      if (Controller->V1.NeedRebuildProgress &&
+	  Controller->V1.RebuildProgressFirst)
+	{
+	  Controller->V1.NeedRebuildProgress = false;
+	  Command->V1.CommandMailbox.Type3.CommandOpcode =
+	    DAC960_V1_GetRebuildProgress;
+	  Command->V1.CommandMailbox.Type3.BusAddress =
+	    Controller->V1.RebuildProgressDMA;
+	  DAC960_QueueCommand(Command);
+	  return;
+	}
+      if (Controller->V1.NeedDeviceStateInformation)
+	{
+	  if (Controller->V1.NeedDeviceInquiryInformation)
+	    {
+	      DAC960_V1_DCDB_T *DCDB = Controller->V1.MonitoringDCDB;
+	      dma_addr_t DCDB_DMA = Controller->V1.MonitoringDCDB_DMA;
+
+	      dma_addr_t NewInquiryStandardDataDMA =
+		Controller->V1.NewInquiryStandardDataDMA;
+
+	      Command->V1.CommandMailbox.Type3.CommandOpcode = DAC960_V1_DCDB;
+	      Command->V1.CommandMailbox.Type3.BusAddress = DCDB_DMA;
+	      DCDB->Channel = Controller->V1.DeviceStateChannel;
+	      DCDB->TargetID = Controller->V1.DeviceStateTargetID;
+	      DCDB->Direction = DAC960_V1_DCDB_DataTransferDeviceToSystem;
+	      DCDB->EarlyStatus = false;
+	      DCDB->Timeout = DAC960_V1_DCDB_Timeout_10_seconds;
+	      DCDB->NoAutomaticRequestSense = false;
+	      DCDB->DisconnectPermitted = true;
+	      DCDB->TransferLength = sizeof(DAC960_SCSI_Inquiry_T);
+	      DCDB->BusAddress = NewInquiryStandardDataDMA;
+	      DCDB->CDBLength = 6;
+	      DCDB->TransferLengthHigh4 = 0;
+	      DCDB->SenseLength = sizeof(DCDB->SenseData);
+	      DCDB->CDB[0] = 0x12; /* INQUIRY */
+	      DCDB->CDB[1] = 0; /* EVPD = 0 */
+	      DCDB->CDB[2] = 0; /* Page Code */
+	      DCDB->CDB[3] = 0; /* Reserved */
+	      DCDB->CDB[4] = sizeof(DAC960_SCSI_Inquiry_T);
+	      DCDB->CDB[5] = 0; /* Control */
+	      DAC960_QueueCommand(Command);
+	      return;
+	    }
+	  if (Controller->V1.NeedDeviceSerialNumberInformation)
+	    {
+	      DAC960_V1_DCDB_T *DCDB = Controller->V1.MonitoringDCDB;
+	      dma_addr_t DCDB_DMA = Controller->V1.MonitoringDCDB_DMA;
+	      dma_addr_t NewInquiryUnitSerialNumberDMA = 
+			Controller->V1.NewInquiryUnitSerialNumberDMA;
+
+	      Command->V1.CommandMailbox.Type3.CommandOpcode = DAC960_V1_DCDB;
+	      Command->V1.CommandMailbox.Type3.BusAddress = DCDB_DMA;
+	      DCDB->Channel = Controller->V1.DeviceStateChannel;
+	      DCDB->TargetID = Controller->V1.DeviceStateTargetID;
+	      DCDB->Direction = DAC960_V1_DCDB_DataTransferDeviceToSystem;
+	      DCDB->EarlyStatus = false;
+	      DCDB->Timeout = DAC960_V1_DCDB_Timeout_10_seconds;
+	      DCDB->NoAutomaticRequestSense = false;
+	      DCDB->DisconnectPermitted = true;
+	      DCDB->TransferLength =
+		sizeof(DAC960_SCSI_Inquiry_UnitSerialNumber_T);
+	      DCDB->BusAddress = NewInquiryUnitSerialNumberDMA;
+	      DCDB->CDBLength = 6;
+	      DCDB->TransferLengthHigh4 = 0;
+	      DCDB->SenseLength = sizeof(DCDB->SenseData);
+	      DCDB->CDB[0] = 0x12; /* INQUIRY */
+	      DCDB->CDB[1] = 1; /* EVPD = 1 */
+	      DCDB->CDB[2] = 0x80; /* Page Code */
+	      DCDB->CDB[3] = 0; /* Reserved */
+	      DCDB->CDB[4] = sizeof(DAC960_SCSI_Inquiry_UnitSerialNumber_T);
+	      DCDB->CDB[5] = 0; /* Control */
+	      DAC960_QueueCommand(Command);
+	      return;
+	    }
+	  if (Controller->V1.StartDeviceStateScan)
+	    {
+	      Controller->V1.DeviceStateChannel = 0;
+	      Controller->V1.DeviceStateTargetID = 0;
+	      Controller->V1.StartDeviceStateScan = false;
+	    }
+	  else if (++Controller->V1.DeviceStateTargetID == Controller->Targets)
+	    {
+	      Controller->V1.DeviceStateChannel++;
+	      Controller->V1.DeviceStateTargetID = 0;
+	    }
+	  if (Controller->V1.DeviceStateChannel < Controller->Channels)
+	    {
+	      Controller->V1.NewDeviceState->DeviceState =
+		DAC960_V1_Device_Dead;
+	      Command->V1.CommandMailbox.Type3D.CommandOpcode =
+		DAC960_V1_GetDeviceState;
+	      Command->V1.CommandMailbox.Type3D.Channel =
+		Controller->V1.DeviceStateChannel;
+	      Command->V1.CommandMailbox.Type3D.TargetID =
+		Controller->V1.DeviceStateTargetID;
+	      Command->V1.CommandMailbox.Type3D.BusAddress =
+		Controller->V1.NewDeviceStateDMA;
+	      DAC960_QueueCommand(Command);
+	      return;
+	    }
+	  Controller->V1.NeedDeviceStateInformation = false;
+	}
+      if (Controller->V1.NeedLogicalDriveInformation)
+	{
+	  Controller->V1.NeedLogicalDriveInformation = false;
+	  Command->V1.CommandMailbox.Type3.CommandOpcode =
+	    DAC960_V1_GetLogicalDriveInformation;
+	  Command->V1.CommandMailbox.Type3.BusAddress =
+	    Controller->V1.NewLogicalDriveInformationDMA;
+	  DAC960_QueueCommand(Command);
+	  return;
+	}
+      if (Controller->V1.NeedRebuildProgress)
+	{
+	  Controller->V1.NeedRebuildProgress = false;
+	  Command->V1.CommandMailbox.Type3.CommandOpcode =
+	    DAC960_V1_GetRebuildProgress;
+	  Command->V1.CommandMailbox.Type3.BusAddress =
+	    	Controller->V1.RebuildProgressDMA;
+	  DAC960_QueueCommand(Command);
+	  return;
+	}
+      if (Controller->V1.NeedConsistencyCheckProgress)
+	{
+	  Controller->V1.NeedConsistencyCheckProgress = false;
+	  Command->V1.CommandMailbox.Type3.CommandOpcode =
+	    DAC960_V1_RebuildStat;
+	  Command->V1.CommandMailbox.Type3.BusAddress =
+	    Controller->V1.RebuildProgressDMA;
+	  DAC960_QueueCommand(Command);
+	  return;
+	}
+      if (Controller->V1.NeedBackgroundInitializationStatus)
+	{
+	  Controller->V1.NeedBackgroundInitializationStatus = false;
+	  Command->V1.CommandMailbox.Type3B.CommandOpcode =
+	    DAC960_V1_BackgroundInitializationControl;
+	  Command->V1.CommandMailbox.Type3B.CommandOpcode2 = 0x20;
+	  Command->V1.CommandMailbox.Type3B.BusAddress =
+	    Controller->V1.BackgroundInitializationStatusDMA;
+	  DAC960_QueueCommand(Command);
+	  return;
+	}
+      Controller->MonitoringTimerCount++;
+      Controller->MonitoringTimer.expires =
+	jiffies + DAC960_MonitoringTimerInterval;
+      	add_timer(&Controller->MonitoringTimer);
+    }
+  if (CommandType == DAC960_ImmediateCommand)
+    {
+      complete(Command->Completion);
+      Command->Completion = NULL;
+      return;
+    }
+  if (CommandType == DAC960_QueuedCommand)
+    {
+      DAC960_V1_KernelCommand_T *KernelCommand = Command->V1.KernelCommand;
+      KernelCommand->CommandStatus = Command->V1.CommandStatus;
+      Command->V1.KernelCommand = NULL;
+      if (CommandOpcode == DAC960_V1_DCDB)
+	Controller->V1.DirectCommandActive[KernelCommand->DCDB->Channel]
+					  [KernelCommand->DCDB->TargetID] =
+	  false;
+      DAC960_DeallocateCommand(Command);
+      KernelCommand->CompletionFunction(KernelCommand);
+      return;
+    }
+  /*
+    Queue a Status Monitoring Command to the Controller using the just
+    completed Command if one was deferred previously due to lack of a
+    free Command when the Monitoring Timer Function was called.
+  */
+  if (Controller->MonitoringCommandDeferred)
+    {
+      Controller->MonitoringCommandDeferred = false;
+      DAC960_V1_QueueMonitoringCommand(Command);
+      return;
+    }
+  /*
+    Deallocate the Command.
+  */
+  DAC960_DeallocateCommand(Command);
+  /*
+    Wake up any processes waiting on a free Command.
+  */
+  wake_up(&Controller->CommandWaitQueue);
+}
+
+
+/*
+  DAC960_V2_ReadWriteError prints an appropriate error message for Command
+  when an error occurs on a Read or Write operation.
+*/
+
+static void DAC960_V2_ReadWriteError(DAC960_Command_T *Command)
+{
+  DAC960_Controller_T *Controller = Command->Controller;
+  unsigned char *SenseErrors[] = { "NO SENSE", "RECOVERED ERROR",
+				   "NOT READY", "MEDIUM ERROR",
+				   "HARDWARE ERROR", "ILLEGAL REQUEST",
+				   "UNIT ATTENTION", "DATA PROTECT",
+				   "BLANK CHECK", "VENDOR-SPECIFIC",
+				   "COPY ABORTED", "ABORTED COMMAND",
+				   "EQUAL", "VOLUME OVERFLOW",
+				   "MISCOMPARE", "RESERVED" };
+  unsigned char *CommandName = "UNKNOWN";
+  switch (Command->CommandType)
+    {
+    case DAC960_ReadCommand:
+    case DAC960_ReadRetryCommand:
+      CommandName = "READ";
+      break;
+    case DAC960_WriteCommand:
+    case DAC960_WriteRetryCommand:
+      CommandName = "WRITE";
+      break;
+    case DAC960_MonitoringCommand:
+    case DAC960_ImmediateCommand:
+    case DAC960_QueuedCommand:
+      break;
+    }
+  DAC960_Error("Error Condition %s on %s:\n", Controller,
+	       SenseErrors[Command->V2.RequestSense->SenseKey], CommandName);
+  DAC960_Error("  /dev/rd/c%dd%d:   absolute blocks %u..%u\n",
+	       Controller, Controller->ControllerNumber,
+	       Command->LogicalDriveNumber, Command->BlockNumber,
+	       Command->BlockNumber + Command->BlockCount - 1);
+}
+
+
+/*
+  DAC960_V2_ReportEvent prints an appropriate message when a Controller Event
+  occurs.
+*/
+
+static void DAC960_V2_ReportEvent(DAC960_Controller_T *Controller,
+				  DAC960_V2_Event_T *Event)
+{
+  DAC960_SCSI_RequestSense_T *RequestSense =
+    (DAC960_SCSI_RequestSense_T *) &Event->RequestSenseData;
+  unsigned char MessageBuffer[DAC960_LineBufferSize];
+  static struct { int EventCode; unsigned char *EventMessage; } EventList[] =
+    { /* Physical Device Events (0x0000 - 0x007F) */
+      { 0x0001, "P Online" },
+      { 0x0002, "P Standby" },
+      { 0x0005, "P Automatic Rebuild Started" },
+      { 0x0006, "P Manual Rebuild Started" },
+      { 0x0007, "P Rebuild Completed" },
+      { 0x0008, "P Rebuild Cancelled" },
+      { 0x0009, "P Rebuild Failed for Unknown Reasons" },
+      { 0x000A, "P Rebuild Failed due to New Physical Device" },
+      { 0x000B, "P Rebuild Failed due to Logical Drive Failure" },
+      { 0x000C, "S Offline" },
+      { 0x000D, "P Found" },
+      { 0x000E, "P Removed" },
+      { 0x000F, "P Unconfigured" },
+      { 0x0010, "P Expand Capacity Started" },
+      { 0x0011, "P Expand Capacity Completed" },
+      { 0x0012, "P Expand Capacity Failed" },
+      { 0x0013, "P Command Timed Out" },
+      { 0x0014, "P Command Aborted" },
+      { 0x0015, "P Command Retried" },
+      { 0x0016, "P Parity Error" },
+      { 0x0017, "P Soft Error" },
+      { 0x0018, "P Miscellaneous Error" },
+      { 0x0019, "P Reset" },
+      { 0x001A, "P Active Spare Found" },
+      { 0x001B, "P Warm Spare Found" },
+      { 0x001C, "S Sense Data Received" },
+      { 0x001D, "P Initialization Started" },
+      { 0x001E, "P Initialization Completed" },
+      { 0x001F, "P Initialization Failed" },
+      { 0x0020, "P Initialization Cancelled" },
+      { 0x0021, "P Failed because Write Recovery Failed" },
+      { 0x0022, "P Failed because SCSI Bus Reset Failed" },
+      { 0x0023, "P Failed because of Double Check Condition" },
+      { 0x0024, "P Failed because Device Cannot Be Accessed" },
+      { 0x0025, "P Failed because of Gross Error on SCSI Processor" },
+      { 0x0026, "P Failed because of Bad Tag from Device" },
+      { 0x0027, "P Failed because of Command Timeout" },
+      { 0x0028, "P Failed because of System Reset" },
+      { 0x0029, "P Failed because of Busy Status or Parity Error" },
+      { 0x002A, "P Failed because Host Set Device to Failed State" },
+      { 0x002B, "P Failed because of Selection Timeout" },
+      { 0x002C, "P Failed because of SCSI Bus Phase Error" },
+      { 0x002D, "P Failed because Device Returned Unknown Status" },
+      { 0x002E, "P Failed because Device Not Ready" },
+      { 0x002F, "P Failed because Device Not Found at Startup" },
+      { 0x0030, "P Failed because COD Write Operation Failed" },
+      { 0x0031, "P Failed because BDT Write Operation Failed" },
+      { 0x0039, "P Missing at Startup" },
+      { 0x003A, "P Start Rebuild Failed due to Physical Drive Too Small" },
+      { 0x003C, "P Temporarily Offline Device Automatically Made Online" },
+      { 0x003D, "P Standby Rebuild Started" },
+      /* Logical Device Events (0x0080 - 0x00FF) */
+      { 0x0080, "M Consistency Check Started" },
+      { 0x0081, "M Consistency Check Completed" },
+      { 0x0082, "M Consistency Check Cancelled" },
+      { 0x0083, "M Consistency Check Completed With Errors" },
+      { 0x0084, "M Consistency Check Failed due to Logical Drive Failure" },
+      { 0x0085, "M Consistency Check Failed due to Physical Device Failure" },
+      { 0x0086, "L Offline" },
+      { 0x0087, "L Critical" },
+      { 0x0088, "L Online" },
+      { 0x0089, "M Automatic Rebuild Started" },
+      { 0x008A, "M Manual Rebuild Started" },
+      { 0x008B, "M Rebuild Completed" },
+      { 0x008C, "M Rebuild Cancelled" },
+      { 0x008D, "M Rebuild Failed for Unknown Reasons" },
+      { 0x008E, "M Rebuild Failed due to New Physical Device" },
+      { 0x008F, "M Rebuild Failed due to Logical Drive Failure" },
+      { 0x0090, "M Initialization Started" },
+      { 0x0091, "M Initialization Completed" },
+      { 0x0092, "M Initialization Cancelled" },
+      { 0x0093, "M Initialization Failed" },
+      { 0x0094, "L Found" },
+      { 0x0095, "L Deleted" },
+      { 0x0096, "M Expand Capacity Started" },
+      { 0x0097, "M Expand Capacity Completed" },
+      { 0x0098, "M Expand Capacity Failed" },
+      { 0x0099, "L Bad Block Found" },
+      { 0x009A, "L Size Changed" },
+      { 0x009B, "L Type Changed" },
+      { 0x009C, "L Bad Data Block Found" },
+      { 0x009E, "L Read of Data Block in BDT" },
+      { 0x009F, "L Write Back Data for Disk Block Lost" },
+      { 0x00A0, "L Temporarily Offline RAID-5/3 Drive Made Online" },
+      { 0x00A1, "L Temporarily Offline RAID-6/1/0/7 Drive Made Online" },
+      { 0x00A2, "L Standby Rebuild Started" },
+      /* Fault Management Events (0x0100 - 0x017F) */
+      { 0x0140, "E Fan %d Failed" },
+      { 0x0141, "E Fan %d OK" },
+      { 0x0142, "E Fan %d Not Present" },
+      { 0x0143, "E Power Supply %d Failed" },
+      { 0x0144, "E Power Supply %d OK" },
+      { 0x0145, "E Power Supply %d Not Present" },
+      { 0x0146, "E Temperature Sensor %d Temperature Exceeds Safe Limit" },
+      { 0x0147, "E Temperature Sensor %d Temperature Exceeds Working Limit" },
+      { 0x0148, "E Temperature Sensor %d Temperature Normal" },
+      { 0x0149, "E Temperature Sensor %d Not Present" },
+      { 0x014A, "E Enclosure Management Unit %d Access Critical" },
+      { 0x014B, "E Enclosure Management Unit %d Access OK" },
+      { 0x014C, "E Enclosure Management Unit %d Access Offline" },
+      /* Controller Events (0x0180 - 0x01FF) */
+      { 0x0181, "C Cache Write Back Error" },
+      { 0x0188, "C Battery Backup Unit Found" },
+      { 0x0189, "C Battery Backup Unit Charge Level Low" },
+      { 0x018A, "C Battery Backup Unit Charge Level OK" },
+      { 0x0193, "C Installation Aborted" },
+      { 0x0195, "C Battery Backup Unit Physically Removed" },
+      { 0x0196, "C Memory Error During Warm Boot" },
+      { 0x019E, "C Memory Soft ECC Error Corrected" },
+      { 0x019F, "C Memory Hard ECC Error Corrected" },
+      { 0x01A2, "C Battery Backup Unit Failed" },
+      { 0x01AB, "C Mirror Race Recovery Failed" },
+      { 0x01AC, "C Mirror Race on Critical Drive" },
+      /* Controller Internal Processor Events */
+      { 0x0380, "C Internal Controller Hung" },
+      { 0x0381, "C Internal Controller Firmware Breakpoint" },
+      { 0x0390, "C Internal Controller i960 Processor Specific Error" },
+      { 0x03A0, "C Internal Controller StrongARM Processor Specific Error" },
+      { 0, "" } };
+  int EventListIndex = 0, EventCode;
+  unsigned char EventType, *EventMessage;
+  if (Event->EventCode == 0x1C &&
+      RequestSense->SenseKey == DAC960_SenseKey_VendorSpecific &&
+      (RequestSense->AdditionalSenseCode == 0x80 ||
+       RequestSense->AdditionalSenseCode == 0x81))
+    Event->EventCode = ((RequestSense->AdditionalSenseCode - 0x80) << 8) |
+		       RequestSense->AdditionalSenseCodeQualifier;
+  while (true)
+    {
+      EventCode = EventList[EventListIndex].EventCode;
+      if (EventCode == Event->EventCode || EventCode == 0) break;
+      EventListIndex++;
+    }
+  EventType = EventList[EventListIndex].EventMessage[0];
+  EventMessage = &EventList[EventListIndex].EventMessage[2];
+  if (EventCode == 0)
+    {
+      DAC960_Critical("Unknown Controller Event Code %04X\n",
+		      Controller, Event->EventCode);
+      return;
+    }
+  switch (EventType)
+    {
+    case 'P':
+      DAC960_Critical("Physical Device %d:%d %s\n", Controller,
+		      Event->Channel, Event->TargetID, EventMessage);
+      break;
+    case 'L':
+      DAC960_Critical("Logical Drive %d (/dev/rd/c%dd%d) %s\n", Controller,
+		      Event->LogicalUnit, Controller->ControllerNumber,
+		      Event->LogicalUnit, EventMessage);
+      break;
+    case 'M':
+      DAC960_Progress("Logical Drive %d (/dev/rd/c%dd%d) %s\n", Controller,
+		      Event->LogicalUnit, Controller->ControllerNumber,
+		      Event->LogicalUnit, EventMessage);
+      break;
+    case 'S':
+      if (RequestSense->SenseKey == DAC960_SenseKey_NoSense ||
+	  (RequestSense->SenseKey == DAC960_SenseKey_NotReady &&
+	   RequestSense->AdditionalSenseCode == 0x04 &&
+	   (RequestSense->AdditionalSenseCodeQualifier == 0x01 ||
+	    RequestSense->AdditionalSenseCodeQualifier == 0x02)))
+	break;
+      DAC960_Critical("Physical Device %d:%d %s\n", Controller,
+		      Event->Channel, Event->TargetID, EventMessage);
+      DAC960_Critical("Physical Device %d:%d Request Sense: "
+		      "Sense Key = %X, ASC = %02X, ASCQ = %02X\n",
+		      Controller,
+		      Event->Channel,
+		      Event->TargetID,
+		      RequestSense->SenseKey,
+		      RequestSense->AdditionalSenseCode,
+		      RequestSense->AdditionalSenseCodeQualifier);
+      DAC960_Critical("Physical Device %d:%d Request Sense: "
+		      "Information = %02X%02X%02X%02X "
+		      "%02X%02X%02X%02X\n",
+		      Controller,
+		      Event->Channel,
+		      Event->TargetID,
+		      RequestSense->Information[0],
+		      RequestSense->Information[1],
+		      RequestSense->Information[2],
+		      RequestSense->Information[3],
+		      RequestSense->CommandSpecificInformation[0],
+		      RequestSense->CommandSpecificInformation[1],
+		      RequestSense->CommandSpecificInformation[2],
+		      RequestSense->CommandSpecificInformation[3]);
+      break;
+    case 'E':
+      if (Controller->SuppressEnclosureMessages) break;
+      sprintf(MessageBuffer, EventMessage, Event->LogicalUnit);
+      DAC960_Critical("Enclosure %d %s\n", Controller,
+		      Event->TargetID, MessageBuffer);
+      break;
+    case 'C':
+      DAC960_Critical("Controller %s\n", Controller, EventMessage);
+      break;
+    default:
+      DAC960_Critical("Unknown Controller Event Code %04X\n",
+		      Controller, Event->EventCode);
+      break;
+    }
+}
+
+
+/*
+  DAC960_V2_ReportProgress prints an appropriate progress message for
+  Logical Device Long Operations.
+*/
+
+static void DAC960_V2_ReportProgress(DAC960_Controller_T *Controller,
+				     unsigned char *MessageString,
+				     unsigned int LogicalDeviceNumber,
+				     unsigned long BlocksCompleted,
+				     unsigned long LogicalDeviceSize)
+{
+  Controller->EphemeralProgressMessage = true;
+  DAC960_Progress("%s in Progress: Logical Drive %d (/dev/rd/c%dd%d) "
+		  "%d%% completed\n", Controller,
+		  MessageString,
+		  LogicalDeviceNumber,
+		  Controller->ControllerNumber,
+		  LogicalDeviceNumber,
+		  (100 * (BlocksCompleted >> 7)) / (LogicalDeviceSize >> 7));
+  Controller->EphemeralProgressMessage = false;
+}
+
+
+/*
+  DAC960_V2_ProcessCompletedCommand performs completion processing for Command
+  for DAC960 V2 Firmware Controllers.
+*/
+
+static void DAC960_V2_ProcessCompletedCommand(DAC960_Command_T *Command)
+{
+  DAC960_Controller_T *Controller = Command->Controller;
+  DAC960_CommandType_T CommandType = Command->CommandType;
+  DAC960_V2_CommandMailbox_T *CommandMailbox = &Command->V2.CommandMailbox;
+  DAC960_V2_IOCTL_Opcode_T CommandOpcode = CommandMailbox->Common.IOCTL_Opcode;
+  DAC960_V2_CommandStatus_T CommandStatus = Command->V2.CommandStatus;
+
+  if (CommandType == DAC960_ReadCommand ||
+      CommandType == DAC960_WriteCommand)
+    {
+
+#ifdef FORCE_RETRY_DEBUG
+      CommandStatus = DAC960_V2_AbormalCompletion;
+#endif
+      Command->V2.RequestSense->SenseKey = DAC960_SenseKey_MediumError;
+
+      if (CommandStatus == DAC960_V2_NormalCompletion) {
+
+		if (!DAC960_ProcessCompletedRequest(Command, true))
+			BUG();
+
+      } else if (Command->V2.RequestSense->SenseKey == DAC960_SenseKey_MediumError)
+	{
+	  /*
+	   * break the command down into pieces and resubmit each
+	   * piece, hoping that some of them will succeed.
+	   */
+	   DAC960_queue_partial_rw(Command);
+	   return;
+	}
+      else
+	{
+	  if (Command->V2.RequestSense->SenseKey != DAC960_SenseKey_NotReady)
+	    DAC960_V2_ReadWriteError(Command);
+	  /*
+	    Perform completion processing for all buffers in this I/O Request.
+	  */
+          (void)DAC960_ProcessCompletedRequest(Command, false);
+	}
+    }
+  else if (CommandType == DAC960_ReadRetryCommand ||
+	   CommandType == DAC960_WriteRetryCommand)
+    {
+      bool normal_completion;
+
+#ifdef FORCE_RETRY_FAILURE_DEBUG
+      static int retry_count = 1;
+#endif
+      /*
+        Perform completion processing for the portion that was
+	retried, and submit the next portion, if any.
+      */
+      normal_completion = true;
+      if (CommandStatus != DAC960_V2_NormalCompletion) {
+	normal_completion = false;
+	if (Command->V2.RequestSense->SenseKey != DAC960_SenseKey_NotReady)
+	    DAC960_V2_ReadWriteError(Command);
+      }
+
+#ifdef FORCE_RETRY_FAILURE_DEBUG
+      if (!(++retry_count % 10000)) {
+	      printk("V2 error retry failure test\n");
+	      normal_completion = false;
+	      DAC960_V2_ReadWriteError(Command);
+      }
+#endif
+
+      if (!DAC960_ProcessCompletedRequest(Command, normal_completion)) {
+		DAC960_queue_partial_rw(Command);
+        	return;
+      }
+    }
+  else if (CommandType == DAC960_MonitoringCommand)
+    {
+      if (Controller->ShutdownMonitoringTimer)
+	      return;
+      if (CommandOpcode == DAC960_V2_GetControllerInfo)
+	{
+	  DAC960_V2_ControllerInfo_T *NewControllerInfo =
+	    Controller->V2.NewControllerInformation;
+	  DAC960_V2_ControllerInfo_T *ControllerInfo =
+	    &Controller->V2.ControllerInformation;
+	  Controller->LogicalDriveCount =
+	    NewControllerInfo->LogicalDevicesPresent;
+	  Controller->V2.NeedLogicalDeviceInformation = true;
+	  Controller->V2.NeedPhysicalDeviceInformation = true;
+	  Controller->V2.StartLogicalDeviceInformationScan = true;
+	  Controller->V2.StartPhysicalDeviceInformationScan = true;
+	  Controller->MonitoringAlertMode =
+	    (NewControllerInfo->LogicalDevicesCritical > 0 ||
+	     NewControllerInfo->LogicalDevicesOffline > 0 ||
+	     NewControllerInfo->PhysicalDisksCritical > 0 ||
+	     NewControllerInfo->PhysicalDisksOffline > 0);
+	  memcpy(ControllerInfo, NewControllerInfo,
+		 sizeof(DAC960_V2_ControllerInfo_T));
+	}
+      else if (CommandOpcode == DAC960_V2_GetEvent)
+	{
+	  if (CommandStatus == DAC960_V2_NormalCompletion) {
+	    DAC960_V2_ReportEvent(Controller, Controller->V2.Event);
+	  }
+	  Controller->V2.NextEventSequenceNumber++;
+	}
+      else if (CommandOpcode == DAC960_V2_GetPhysicalDeviceInfoValid &&
+	       CommandStatus == DAC960_V2_NormalCompletion)
+	{
+	  DAC960_V2_PhysicalDeviceInfo_T *NewPhysicalDeviceInfo =
+	    Controller->V2.NewPhysicalDeviceInformation;
+	  unsigned int PhysicalDeviceIndex = Controller->V2.PhysicalDeviceIndex;
+	  DAC960_V2_PhysicalDeviceInfo_T *PhysicalDeviceInfo =
+	    Controller->V2.PhysicalDeviceInformation[PhysicalDeviceIndex];
+	  DAC960_SCSI_Inquiry_UnitSerialNumber_T *InquiryUnitSerialNumber =
+	    Controller->V2.InquiryUnitSerialNumber[PhysicalDeviceIndex];
+	  unsigned int DeviceIndex;
+	  while (PhysicalDeviceInfo != NULL &&
+		 (NewPhysicalDeviceInfo->Channel >
+		  PhysicalDeviceInfo->Channel ||
+		  (NewPhysicalDeviceInfo->Channel ==
+		   PhysicalDeviceInfo->Channel &&
+		   (NewPhysicalDeviceInfo->TargetID >
+		    PhysicalDeviceInfo->TargetID ||
+		   (NewPhysicalDeviceInfo->TargetID ==
+		    PhysicalDeviceInfo->TargetID &&
+		    NewPhysicalDeviceInfo->LogicalUnit >
+		    PhysicalDeviceInfo->LogicalUnit)))))
+	    {
+	      DAC960_Critical("Physical Device %d:%d No Longer Exists\n",
+			      Controller,
+			      PhysicalDeviceInfo->Channel,
+			      PhysicalDeviceInfo->TargetID);
+	      Controller->V2.PhysicalDeviceInformation
+			     [PhysicalDeviceIndex] = NULL;
+	      Controller->V2.InquiryUnitSerialNumber
+			     [PhysicalDeviceIndex] = NULL;
+	      kfree(PhysicalDeviceInfo);
+	      kfree(InquiryUnitSerialNumber);
+	      for (DeviceIndex = PhysicalDeviceIndex;
+		   DeviceIndex < DAC960_V2_MaxPhysicalDevices - 1;
+		   DeviceIndex++)
+		{
+		  Controller->V2.PhysicalDeviceInformation[DeviceIndex] =
+		    Controller->V2.PhysicalDeviceInformation[DeviceIndex+1];
+		  Controller->V2.InquiryUnitSerialNumber[DeviceIndex] =
+		    Controller->V2.InquiryUnitSerialNumber[DeviceIndex+1];
+		}
+	      Controller->V2.PhysicalDeviceInformation
+			     [DAC960_V2_MaxPhysicalDevices-1] = NULL;
+	      Controller->V2.InquiryUnitSerialNumber
+			     [DAC960_V2_MaxPhysicalDevices-1] = NULL;
+	      PhysicalDeviceInfo =
+		Controller->V2.PhysicalDeviceInformation[PhysicalDeviceIndex];
+	      InquiryUnitSerialNumber =
+		Controller->V2.InquiryUnitSerialNumber[PhysicalDeviceIndex];
+	    }
+	  if (PhysicalDeviceInfo == NULL ||
+	      (NewPhysicalDeviceInfo->Channel !=
+	       PhysicalDeviceInfo->Channel) ||
+	      (NewPhysicalDeviceInfo->TargetID !=
+	       PhysicalDeviceInfo->TargetID) ||
+	      (NewPhysicalDeviceInfo->LogicalUnit !=
+	       PhysicalDeviceInfo->LogicalUnit))
+	    {
+	      PhysicalDeviceInfo =
+		kmalloc(sizeof(DAC960_V2_PhysicalDeviceInfo_T), GFP_ATOMIC);
+	      InquiryUnitSerialNumber =
+		  kmalloc(sizeof(DAC960_SCSI_Inquiry_UnitSerialNumber_T),
+			  GFP_ATOMIC);
+	      if (InquiryUnitSerialNumber == NULL ||
+		  PhysicalDeviceInfo == NULL)
+		{
+		  kfree(InquiryUnitSerialNumber);
+		  InquiryUnitSerialNumber = NULL;
+		  kfree(PhysicalDeviceInfo);
+		  PhysicalDeviceInfo = NULL;
+		}
+	      DAC960_Critical("Physical Device %d:%d Now Exists%s\n",
+			      Controller,
+			      NewPhysicalDeviceInfo->Channel,
+			      NewPhysicalDeviceInfo->TargetID,
+			      (PhysicalDeviceInfo != NULL
+			       ? "" : " - Allocation Failed"));
+	      if (PhysicalDeviceInfo != NULL)
+		{
+		  memset(PhysicalDeviceInfo, 0,
+			 sizeof(DAC960_V2_PhysicalDeviceInfo_T));
+		  PhysicalDeviceInfo->PhysicalDeviceState =
+		    DAC960_V2_Device_InvalidState;
+		  memset(InquiryUnitSerialNumber, 0,
+			 sizeof(DAC960_SCSI_Inquiry_UnitSerialNumber_T));
+		  InquiryUnitSerialNumber->PeripheralDeviceType = 0x1F;
+		  for (DeviceIndex = DAC960_V2_MaxPhysicalDevices - 1;
+		       DeviceIndex > PhysicalDeviceIndex;
+		       DeviceIndex--)
+		    {
+		      Controller->V2.PhysicalDeviceInformation[DeviceIndex] =
+			Controller->V2.PhysicalDeviceInformation[DeviceIndex-1];
+		      Controller->V2.InquiryUnitSerialNumber[DeviceIndex] =
+			Controller->V2.InquiryUnitSerialNumber[DeviceIndex-1];
+		    }
+		  Controller->V2.PhysicalDeviceInformation
+				 [PhysicalDeviceIndex] =
+		    PhysicalDeviceInfo;
+		  Controller->V2.InquiryUnitSerialNumber
+				 [PhysicalDeviceIndex] =
+		    InquiryUnitSerialNumber;
+		  Controller->V2.NeedDeviceSerialNumberInformation = true;
+		}
+	    }
+	  if (PhysicalDeviceInfo != NULL)
+	    {
+	      if (NewPhysicalDeviceInfo->PhysicalDeviceState !=
+		  PhysicalDeviceInfo->PhysicalDeviceState)
+		DAC960_Critical(
+		  "Physical Device %d:%d is now %s\n", Controller,
+		  NewPhysicalDeviceInfo->Channel,
+		  NewPhysicalDeviceInfo->TargetID,
+		  (NewPhysicalDeviceInfo->PhysicalDeviceState
+		   == DAC960_V2_Device_Online
+		   ? "ONLINE"
+		   : NewPhysicalDeviceInfo->PhysicalDeviceState
+		     == DAC960_V2_Device_Rebuild
+		     ? "REBUILD"
+		     : NewPhysicalDeviceInfo->PhysicalDeviceState
+		       == DAC960_V2_Device_Missing
+		       ? "MISSING"
+		       : NewPhysicalDeviceInfo->PhysicalDeviceState
+			 == DAC960_V2_Device_Critical
+			 ? "CRITICAL"
+			 : NewPhysicalDeviceInfo->PhysicalDeviceState
+			   == DAC960_V2_Device_Dead
+			   ? "DEAD"
+			   : NewPhysicalDeviceInfo->PhysicalDeviceState
+			     == DAC960_V2_Device_SuspectedDead
+			     ? "SUSPECTED-DEAD"
+			     : NewPhysicalDeviceInfo->PhysicalDeviceState
+			       == DAC960_V2_Device_CommandedOffline
+			       ? "COMMANDED-OFFLINE"
+			       : NewPhysicalDeviceInfo->PhysicalDeviceState
+				 == DAC960_V2_Device_Standby
+				 ? "STANDBY" : "UNKNOWN"));
+	      if ((NewPhysicalDeviceInfo->ParityErrors !=
+		   PhysicalDeviceInfo->ParityErrors) ||
+		  (NewPhysicalDeviceInfo->SoftErrors !=
+		   PhysicalDeviceInfo->SoftErrors) ||
+		  (NewPhysicalDeviceInfo->HardErrors !=
+		   PhysicalDeviceInfo->HardErrors) ||
+		  (NewPhysicalDeviceInfo->MiscellaneousErrors !=
+		   PhysicalDeviceInfo->MiscellaneousErrors) ||
+		  (NewPhysicalDeviceInfo->CommandTimeouts !=
+		   PhysicalDeviceInfo->CommandTimeouts) ||
+		  (NewPhysicalDeviceInfo->Retries !=
+		   PhysicalDeviceInfo->Retries) ||
+		  (NewPhysicalDeviceInfo->Aborts !=
+		   PhysicalDeviceInfo->Aborts) ||
+		  (NewPhysicalDeviceInfo->PredictedFailuresDetected !=
+		   PhysicalDeviceInfo->PredictedFailuresDetected))
+		{
+		  DAC960_Critical("Physical Device %d:%d Errors: "
+				  "Parity = %d, Soft = %d, "
+				  "Hard = %d, Misc = %d\n",
+				  Controller,
+				  NewPhysicalDeviceInfo->Channel,
+				  NewPhysicalDeviceInfo->TargetID,
+				  NewPhysicalDeviceInfo->ParityErrors,
+				  NewPhysicalDeviceInfo->SoftErrors,
+				  NewPhysicalDeviceInfo->HardErrors,
+				  NewPhysicalDeviceInfo->MiscellaneousErrors);
+		  DAC960_Critical("Physical Device %d:%d Errors: "
+				  "Timeouts = %d, Retries = %d, "
+				  "Aborts = %d, Predicted = %d\n",
+				  Controller,
+				  NewPhysicalDeviceInfo->Channel,
+				  NewPhysicalDeviceInfo->TargetID,
+				  NewPhysicalDeviceInfo->CommandTimeouts,
+				  NewPhysicalDeviceInfo->Retries,
+				  NewPhysicalDeviceInfo->Aborts,
+				  NewPhysicalDeviceInfo
+				  ->PredictedFailuresDetected);
+		}
+	      if ((PhysicalDeviceInfo->PhysicalDeviceState
+		   == DAC960_V2_Device_Dead ||
+		   PhysicalDeviceInfo->PhysicalDeviceState
+		   == DAC960_V2_Device_InvalidState) &&
+		  NewPhysicalDeviceInfo->PhysicalDeviceState
+		  != DAC960_V2_Device_Dead)
+		Controller->V2.NeedDeviceSerialNumberInformation = true;
+	      memcpy(PhysicalDeviceInfo, NewPhysicalDeviceInfo,
+		     sizeof(DAC960_V2_PhysicalDeviceInfo_T));
+	    }
+	  NewPhysicalDeviceInfo->LogicalUnit++;
+	  Controller->V2.PhysicalDeviceIndex++;
+	}
+      else if (CommandOpcode == DAC960_V2_GetPhysicalDeviceInfoValid)
+	{
+	  unsigned int DeviceIndex;
+	  for (DeviceIndex = Controller->V2.PhysicalDeviceIndex;
+	       DeviceIndex < DAC960_V2_MaxPhysicalDevices;
+	       DeviceIndex++)
+	    {
+	      DAC960_V2_PhysicalDeviceInfo_T *PhysicalDeviceInfo =
+		Controller->V2.PhysicalDeviceInformation[DeviceIndex];
+	      DAC960_SCSI_Inquiry_UnitSerialNumber_T *InquiryUnitSerialNumber =
+		Controller->V2.InquiryUnitSerialNumber[DeviceIndex];
+	      if (PhysicalDeviceInfo == NULL) break;
+	      DAC960_Critical("Physical Device %d:%d No Longer Exists\n",
+			      Controller,
+			      PhysicalDeviceInfo->Channel,
+			      PhysicalDeviceInfo->TargetID);
+	      Controller->V2.PhysicalDeviceInformation[DeviceIndex] = NULL;
+	      Controller->V2.InquiryUnitSerialNumber[DeviceIndex] = NULL;
+	      kfree(PhysicalDeviceInfo);
+	      kfree(InquiryUnitSerialNumber);
+	    }
+	  Controller->V2.NeedPhysicalDeviceInformation = false;
+	}
+      else if (CommandOpcode == DAC960_V2_GetLogicalDeviceInfoValid &&
+	       CommandStatus == DAC960_V2_NormalCompletion)
+	{
+	  DAC960_V2_LogicalDeviceInfo_T *NewLogicalDeviceInfo =
+	    Controller->V2.NewLogicalDeviceInformation;
+	  unsigned short LogicalDeviceNumber =
+	    NewLogicalDeviceInfo->LogicalDeviceNumber;
+	  DAC960_V2_LogicalDeviceInfo_T *LogicalDeviceInfo =
+	    Controller->V2.LogicalDeviceInformation[LogicalDeviceNumber];
+	  if (LogicalDeviceInfo == NULL)
+	    {
+	      DAC960_V2_PhysicalDevice_T PhysicalDevice;
+	      PhysicalDevice.Controller = 0;
+	      PhysicalDevice.Channel = NewLogicalDeviceInfo->Channel;
+	      PhysicalDevice.TargetID = NewLogicalDeviceInfo->TargetID;
+	      PhysicalDevice.LogicalUnit = NewLogicalDeviceInfo->LogicalUnit;
+	      Controller->V2.LogicalDriveToVirtualDevice[LogicalDeviceNumber] =
+		PhysicalDevice;
+	      LogicalDeviceInfo = kmalloc(sizeof(DAC960_V2_LogicalDeviceInfo_T),
+					  GFP_ATOMIC);
+	      Controller->V2.LogicalDeviceInformation[LogicalDeviceNumber] =
+		LogicalDeviceInfo;
+	      DAC960_Critical("Logical Drive %d (/dev/rd/c%dd%d) "
+			      "Now Exists%s\n", Controller,
+			      LogicalDeviceNumber,
+			      Controller->ControllerNumber,
+			      LogicalDeviceNumber,
+			      (LogicalDeviceInfo != NULL
+			       ? "" : " - Allocation Failed"));
+	      if (LogicalDeviceInfo != NULL)
+		{
+		  memset(LogicalDeviceInfo, 0,
+			 sizeof(DAC960_V2_LogicalDeviceInfo_T));
+		  DAC960_ComputeGenericDiskInfo(Controller);
+		}
+	    }
+	  if (LogicalDeviceInfo != NULL)
+	    {
+	      unsigned long LogicalDeviceSize =
+		NewLogicalDeviceInfo->ConfigurableDeviceSize;
+	      if (NewLogicalDeviceInfo->LogicalDeviceState !=
+		  LogicalDeviceInfo->LogicalDeviceState)
+		DAC960_Critical("Logical Drive %d (/dev/rd/c%dd%d) "
+				"is now %s\n", Controller,
+				LogicalDeviceNumber,
+				Controller->ControllerNumber,
+				LogicalDeviceNumber,
+				(NewLogicalDeviceInfo->LogicalDeviceState
+				 == DAC960_V2_LogicalDevice_Online
+				 ? "ONLINE"
+				 : NewLogicalDeviceInfo->LogicalDeviceState
+				   == DAC960_V2_LogicalDevice_Critical
+				   ? "CRITICAL" : "OFFLINE"));
+	      if ((NewLogicalDeviceInfo->SoftErrors !=
+		   LogicalDeviceInfo->SoftErrors) ||
+		  (NewLogicalDeviceInfo->CommandsFailed !=
+		   LogicalDeviceInfo->CommandsFailed) ||
+		  (NewLogicalDeviceInfo->DeferredWriteErrors !=
+		   LogicalDeviceInfo->DeferredWriteErrors))
+		DAC960_Critical("Logical Drive %d (/dev/rd/c%dd%d) Errors: "
+				"Soft = %d, Failed = %d, Deferred Write = %d\n",
+				Controller, LogicalDeviceNumber,
+				Controller->ControllerNumber,
+				LogicalDeviceNumber,
+				NewLogicalDeviceInfo->SoftErrors,
+				NewLogicalDeviceInfo->CommandsFailed,
+				NewLogicalDeviceInfo->DeferredWriteErrors);
+	      if (NewLogicalDeviceInfo->ConsistencyCheckInProgress)
+		DAC960_V2_ReportProgress(Controller,
+					 "Consistency Check",
+					 LogicalDeviceNumber,
+					 NewLogicalDeviceInfo
+					 ->ConsistencyCheckBlockNumber,
+					 LogicalDeviceSize);
+	      else if (NewLogicalDeviceInfo->RebuildInProgress)
+		DAC960_V2_ReportProgress(Controller,
+					 "Rebuild",
+					 LogicalDeviceNumber,
+					 NewLogicalDeviceInfo
+					 ->RebuildBlockNumber,
+					 LogicalDeviceSize);
+	      else if (NewLogicalDeviceInfo->BackgroundInitializationInProgress)
+		DAC960_V2_ReportProgress(Controller,
+					 "Background Initialization",
+					 LogicalDeviceNumber,
+					 NewLogicalDeviceInfo
+					 ->BackgroundInitializationBlockNumber,
+					 LogicalDeviceSize);
+	      else if (NewLogicalDeviceInfo->ForegroundInitializationInProgress)
+		DAC960_V2_ReportProgress(Controller,
+					 "Foreground Initialization",
+					 LogicalDeviceNumber,
+					 NewLogicalDeviceInfo
+					 ->ForegroundInitializationBlockNumber,
+					 LogicalDeviceSize);
+	      else if (NewLogicalDeviceInfo->DataMigrationInProgress)
+		DAC960_V2_ReportProgress(Controller,
+					 "Data Migration",
+					 LogicalDeviceNumber,
+					 NewLogicalDeviceInfo
+					 ->DataMigrationBlockNumber,
+					 LogicalDeviceSize);
+	      else if (NewLogicalDeviceInfo->PatrolOperationInProgress)
+		DAC960_V2_ReportProgress(Controller,
+					 "Patrol Operation",
+					 LogicalDeviceNumber,
+					 NewLogicalDeviceInfo
+					 ->PatrolOperationBlockNumber,
+					 LogicalDeviceSize);
+	      if (LogicalDeviceInfo->BackgroundInitializationInProgress &&
+		  !NewLogicalDeviceInfo->BackgroundInitializationInProgress)
+		DAC960_Progress("Logical Drive %d (/dev/rd/c%dd%d) "
+				"Background Initialization %s\n",
+				Controller,
+				LogicalDeviceNumber,
+				Controller->ControllerNumber,
+				LogicalDeviceNumber,
+				(NewLogicalDeviceInfo->LogicalDeviceControl
+						      .LogicalDeviceInitialized
+				 ? "Completed" : "Failed"));
+	      memcpy(LogicalDeviceInfo, NewLogicalDeviceInfo,
+		     sizeof(DAC960_V2_LogicalDeviceInfo_T));
+	    }
+	  Controller->V2.LogicalDriveFoundDuringScan
+			 [LogicalDeviceNumber] = true;
+	  NewLogicalDeviceInfo->LogicalDeviceNumber++;
+	}
+      else if (CommandOpcode == DAC960_V2_GetLogicalDeviceInfoValid)
+	{
+	  int LogicalDriveNumber;
+	  for (LogicalDriveNumber = 0;
+	       LogicalDriveNumber < DAC960_MaxLogicalDrives;
+	       LogicalDriveNumber++)
+	    {
+	      DAC960_V2_LogicalDeviceInfo_T *LogicalDeviceInfo =
+		Controller->V2.LogicalDeviceInformation[LogicalDriveNumber];
+	      if (LogicalDeviceInfo == NULL ||
+		  Controller->V2.LogicalDriveFoundDuringScan
+				 [LogicalDriveNumber])
+		continue;
+	      DAC960_Critical("Logical Drive %d (/dev/rd/c%dd%d) "
+			      "No Longer Exists\n", Controller,
+			      LogicalDriveNumber,
+			      Controller->ControllerNumber,
+			      LogicalDriveNumber);
+	      Controller->V2.LogicalDeviceInformation
+			     [LogicalDriveNumber] = NULL;
+	      kfree(LogicalDeviceInfo);
+	      Controller->LogicalDriveInitiallyAccessible
+			  [LogicalDriveNumber] = false;
+	      DAC960_ComputeGenericDiskInfo(Controller);
+	    }
+	  Controller->V2.NeedLogicalDeviceInformation = false;
+	}
+      else if (CommandOpcode == DAC960_V2_SCSI_10_Passthru)
+        {
+	    DAC960_SCSI_Inquiry_UnitSerialNumber_T *InquiryUnitSerialNumber =
+		Controller->V2.InquiryUnitSerialNumber[Controller->V2.PhysicalDeviceIndex - 1];
+
+	    if (CommandStatus != DAC960_V2_NormalCompletion) {
+		memset(InquiryUnitSerialNumber,
+			0, sizeof(DAC960_SCSI_Inquiry_UnitSerialNumber_T));
+		InquiryUnitSerialNumber->PeripheralDeviceType = 0x1F;
+	    } else
+	  	memcpy(InquiryUnitSerialNumber,
+			Controller->V2.NewInquiryUnitSerialNumber,
+			sizeof(DAC960_SCSI_Inquiry_UnitSerialNumber_T));
+
+	     Controller->V2.NeedDeviceSerialNumberInformation = false;
+        }
+
+      if (Controller->V2.HealthStatusBuffer->NextEventSequenceNumber
+	  - Controller->V2.NextEventSequenceNumber > 0)
+	{
+	  CommandMailbox->GetEvent.CommandOpcode = DAC960_V2_IOCTL;
+	  CommandMailbox->GetEvent.DataTransferSize = sizeof(DAC960_V2_Event_T);
+	  CommandMailbox->GetEvent.EventSequenceNumberHigh16 =
+	    Controller->V2.NextEventSequenceNumber >> 16;
+	  CommandMailbox->GetEvent.ControllerNumber = 0;
+	  CommandMailbox->GetEvent.IOCTL_Opcode =
+	    DAC960_V2_GetEvent;
+	  CommandMailbox->GetEvent.EventSequenceNumberLow16 =
+	    Controller->V2.NextEventSequenceNumber & 0xFFFF;
+	  CommandMailbox->GetEvent.DataTransferMemoryAddress
+				  .ScatterGatherSegments[0]
+				  .SegmentDataPointer =
+	    Controller->V2.EventDMA;
+	  CommandMailbox->GetEvent.DataTransferMemoryAddress
+				  .ScatterGatherSegments[0]
+				  .SegmentByteCount =
+	    CommandMailbox->GetEvent.DataTransferSize;
+	  DAC960_QueueCommand(Command);
+	  return;
+	}
+      if (Controller->V2.NeedPhysicalDeviceInformation)
+	{
+	  if (Controller->V2.NeedDeviceSerialNumberInformation)
+	    {
+	      DAC960_SCSI_Inquiry_UnitSerialNumber_T *InquiryUnitSerialNumber =
+                Controller->V2.NewInquiryUnitSerialNumber;
+	      InquiryUnitSerialNumber->PeripheralDeviceType = 0x1F;
+
+	      DAC960_V2_ConstructNewUnitSerialNumber(Controller, CommandMailbox,
+			Controller->V2.NewPhysicalDeviceInformation->Channel,
+			Controller->V2.NewPhysicalDeviceInformation->TargetID,
+		Controller->V2.NewPhysicalDeviceInformation->LogicalUnit - 1);
+
+
+	      DAC960_QueueCommand(Command);
+	      return;
+	    }
+	  if (Controller->V2.StartPhysicalDeviceInformationScan)
+	    {
+	      Controller->V2.PhysicalDeviceIndex = 0;
+	      Controller->V2.NewPhysicalDeviceInformation->Channel = 0;
+	      Controller->V2.NewPhysicalDeviceInformation->TargetID = 0;
+	      Controller->V2.NewPhysicalDeviceInformation->LogicalUnit = 0;
+	      Controller->V2.StartPhysicalDeviceInformationScan = false;
+	    }
+	  CommandMailbox->PhysicalDeviceInfo.CommandOpcode = DAC960_V2_IOCTL;
+	  CommandMailbox->PhysicalDeviceInfo.DataTransferSize =
+	    sizeof(DAC960_V2_PhysicalDeviceInfo_T);
+	  CommandMailbox->PhysicalDeviceInfo.PhysicalDevice.LogicalUnit =
+	    Controller->V2.NewPhysicalDeviceInformation->LogicalUnit;
+	  CommandMailbox->PhysicalDeviceInfo.PhysicalDevice.TargetID =
+	    Controller->V2.NewPhysicalDeviceInformation->TargetID;
+	  CommandMailbox->PhysicalDeviceInfo.PhysicalDevice.Channel =
+	    Controller->V2.NewPhysicalDeviceInformation->Channel;
+	  CommandMailbox->PhysicalDeviceInfo.IOCTL_Opcode =
+	    DAC960_V2_GetPhysicalDeviceInfoValid;
+	  CommandMailbox->PhysicalDeviceInfo.DataTransferMemoryAddress
+					    .ScatterGatherSegments[0]
+					    .SegmentDataPointer =
+	    Controller->V2.NewPhysicalDeviceInformationDMA;
+	  CommandMailbox->PhysicalDeviceInfo.DataTransferMemoryAddress
+					    .ScatterGatherSegments[0]
+					    .SegmentByteCount =
+	    CommandMailbox->PhysicalDeviceInfo.DataTransferSize;
+	  DAC960_QueueCommand(Command);
+	  return;
+	}
+      if (Controller->V2.NeedLogicalDeviceInformation)
+	{
+	  if (Controller->V2.StartLogicalDeviceInformationScan)
+	    {
+	      int LogicalDriveNumber;
+	      for (LogicalDriveNumber = 0;
+		   LogicalDriveNumber < DAC960_MaxLogicalDrives;
+		   LogicalDriveNumber++)
+		Controller->V2.LogicalDriveFoundDuringScan
+			       [LogicalDriveNumber] = false;
+	      Controller->V2.NewLogicalDeviceInformation->LogicalDeviceNumber = 0;
+	      Controller->V2.StartLogicalDeviceInformationScan = false;
+	    }
+	  CommandMailbox->LogicalDeviceInfo.CommandOpcode = DAC960_V2_IOCTL;
+	  CommandMailbox->LogicalDeviceInfo.DataTransferSize =
+	    sizeof(DAC960_V2_LogicalDeviceInfo_T);
+	  CommandMailbox->LogicalDeviceInfo.LogicalDevice.LogicalDeviceNumber =
+	    Controller->V2.NewLogicalDeviceInformation->LogicalDeviceNumber;
+	  CommandMailbox->LogicalDeviceInfo.IOCTL_Opcode =
+	    DAC960_V2_GetLogicalDeviceInfoValid;
+	  CommandMailbox->LogicalDeviceInfo.DataTransferMemoryAddress
+					   .ScatterGatherSegments[0]
+					   .SegmentDataPointer =
+	    Controller->V2.NewLogicalDeviceInformationDMA;
+	  CommandMailbox->LogicalDeviceInfo.DataTransferMemoryAddress
+					   .ScatterGatherSegments[0]
+					   .SegmentByteCount =
+	    CommandMailbox->LogicalDeviceInfo.DataTransferSize;
+	  DAC960_QueueCommand(Command);
+	  return;
+	}
+      Controller->MonitoringTimerCount++;
+      Controller->MonitoringTimer.expires =
+	jiffies + DAC960_HealthStatusMonitoringInterval;
+      	add_timer(&Controller->MonitoringTimer);
+    }
+  if (CommandType == DAC960_ImmediateCommand)
+    {
+      complete(Command->Completion);
+      Command->Completion = NULL;
+      return;
+    }
+  if (CommandType == DAC960_QueuedCommand)
+    {
+      DAC960_V2_KernelCommand_T *KernelCommand = Command->V2.KernelCommand;
+      KernelCommand->CommandStatus = CommandStatus;
+      KernelCommand->RequestSenseLength = Command->V2.RequestSenseLength;
+      KernelCommand->DataTransferLength = Command->V2.DataTransferResidue;
+      Command->V2.KernelCommand = NULL;
+      DAC960_DeallocateCommand(Command);
+      KernelCommand->CompletionFunction(KernelCommand);
+      return;
+    }
+  /*
+    Queue a Status Monitoring Command to the Controller using the just
+    completed Command if one was deferred previously due to lack of a
+    free Command when the Monitoring Timer Function was called.
+  */
+  if (Controller->MonitoringCommandDeferred)
+    {
+      Controller->MonitoringCommandDeferred = false;
+      DAC960_V2_QueueMonitoringCommand(Command);
+      return;
+    }
+  /*
+    Deallocate the Command.
+  */
+  DAC960_DeallocateCommand(Command);
+  /*
+    Wake up any processes waiting on a free Command.
+  */
+  wake_up(&Controller->CommandWaitQueue);
+}
+
+/*
+  DAC960_GEM_InterruptHandler handles hardware interrupts from DAC960 GEM Series
+  Controllers.
+*/
+
+static irqreturn_t DAC960_GEM_InterruptHandler(int IRQ_Channel,
+				       void *DeviceIdentifier)
+{
+  DAC960_Controller_T *Controller = DeviceIdentifier;
+  void __iomem *ControllerBaseAddress = Controller->BaseAddress;
+  DAC960_V2_StatusMailbox_T *NextStatusMailbox;
+  unsigned long flags;
+
+  spin_lock_irqsave(&Controller->queue_lock, flags);
+  DAC960_GEM_AcknowledgeInterrupt(ControllerBaseAddress);
+  NextStatusMailbox = Controller->V2.NextStatusMailbox;
+  while (NextStatusMailbox->Fields.CommandIdentifier > 0)
+    {
+       DAC960_V2_CommandIdentifier_T CommandIdentifier =
+           NextStatusMailbox->Fields.CommandIdentifier;
+       DAC960_Command_T *Command = Controller->Commands[CommandIdentifier-1];
+       Command->V2.CommandStatus = NextStatusMailbox->Fields.CommandStatus;
+       Command->V2.RequestSenseLength =
+           NextStatusMailbox->Fields.RequestSenseLength;
+       Command->V2.DataTransferResidue =
+           NextStatusMailbox->Fields.DataTransferResidue;
+       NextStatusMailbox->Words[0] = 0;
+       if (++NextStatusMailbox > Controller->V2.LastStatusMailbox)
+           NextStatusMailbox = Controller->V2.FirstStatusMailbox;
+       DAC960_V2_ProcessCompletedCommand(Command);
+    }
+  Controller->V2.NextStatusMailbox = NextStatusMailbox;
+  /*
+    Attempt to remove additional I/O Requests from the Controller's
+    I/O Request Queue and queue them to the Controller.
+  */
+  DAC960_ProcessRequest(Controller);
+  spin_unlock_irqrestore(&Controller->queue_lock, flags);
+  return IRQ_HANDLED;
+}
+
+/*
+  DAC960_BA_InterruptHandler handles hardware interrupts from DAC960 BA Series
+  Controllers.
+*/
+
+static irqreturn_t DAC960_BA_InterruptHandler(int IRQ_Channel,
+				       void *DeviceIdentifier)
+{
+  DAC960_Controller_T *Controller = DeviceIdentifier;
+  void __iomem *ControllerBaseAddress = Controller->BaseAddress;
+  DAC960_V2_StatusMailbox_T *NextStatusMailbox;
+  unsigned long flags;
+
+  spin_lock_irqsave(&Controller->queue_lock, flags);
+  DAC960_BA_AcknowledgeInterrupt(ControllerBaseAddress);
+  NextStatusMailbox = Controller->V2.NextStatusMailbox;
+  while (NextStatusMailbox->Fields.CommandIdentifier > 0)
+    {
+      DAC960_V2_CommandIdentifier_T CommandIdentifier =
+	NextStatusMailbox->Fields.CommandIdentifier;
+      DAC960_Command_T *Command = Controller->Commands[CommandIdentifier-1];
+      Command->V2.CommandStatus = NextStatusMailbox->Fields.CommandStatus;
+      Command->V2.RequestSenseLength =
+	NextStatusMailbox->Fields.RequestSenseLength;
+      Command->V2.DataTransferResidue =
+	NextStatusMailbox->Fields.DataTransferResidue;
+      NextStatusMailbox->Words[0] = 0;
+      if (++NextStatusMailbox > Controller->V2.LastStatusMailbox)
+	NextStatusMailbox = Controller->V2.FirstStatusMailbox;
+      DAC960_V2_ProcessCompletedCommand(Command);
+    }
+  Controller->V2.NextStatusMailbox = NextStatusMailbox;
+  /*
+    Attempt to remove additional I/O Requests from the Controller's
+    I/O Request Queue and queue them to the Controller.
+  */
+  DAC960_ProcessRequest(Controller);
+  spin_unlock_irqrestore(&Controller->queue_lock, flags);
+  return IRQ_HANDLED;
+}
+
+
+/*
+  DAC960_LP_InterruptHandler handles hardware interrupts from DAC960 LP Series
+  Controllers.
+*/
+
+static irqreturn_t DAC960_LP_InterruptHandler(int IRQ_Channel,
+				       void *DeviceIdentifier)
+{
+  DAC960_Controller_T *Controller = DeviceIdentifier;
+  void __iomem *ControllerBaseAddress = Controller->BaseAddress;
+  DAC960_V2_StatusMailbox_T *NextStatusMailbox;
+  unsigned long flags;
+
+  spin_lock_irqsave(&Controller->queue_lock, flags);
+  DAC960_LP_AcknowledgeInterrupt(ControllerBaseAddress);
+  NextStatusMailbox = Controller->V2.NextStatusMailbox;
+  while (NextStatusMailbox->Fields.CommandIdentifier > 0)
+    {
+      DAC960_V2_CommandIdentifier_T CommandIdentifier =
+	NextStatusMailbox->Fields.CommandIdentifier;
+      DAC960_Command_T *Command = Controller->Commands[CommandIdentifier-1];
+      Command->V2.CommandStatus = NextStatusMailbox->Fields.CommandStatus;
+      Command->V2.RequestSenseLength =
+	NextStatusMailbox->Fields.RequestSenseLength;
+      Command->V2.DataTransferResidue =
+	NextStatusMailbox->Fields.DataTransferResidue;
+      NextStatusMailbox->Words[0] = 0;
+      if (++NextStatusMailbox > Controller->V2.LastStatusMailbox)
+	NextStatusMailbox = Controller->V2.FirstStatusMailbox;
+      DAC960_V2_ProcessCompletedCommand(Command);
+    }
+  Controller->V2.NextStatusMailbox = NextStatusMailbox;
+  /*
+    Attempt to remove additional I/O Requests from the Controller's
+    I/O Request Queue and queue them to the Controller.
+  */
+  DAC960_ProcessRequest(Controller);
+  spin_unlock_irqrestore(&Controller->queue_lock, flags);
+  return IRQ_HANDLED;
+}
+
+
+/*
+  DAC960_LA_InterruptHandler handles hardware interrupts from DAC960 LA Series
+  Controllers.
+*/
+
+static irqreturn_t DAC960_LA_InterruptHandler(int IRQ_Channel,
+				       void *DeviceIdentifier)
+{
+  DAC960_Controller_T *Controller = DeviceIdentifier;
+  void __iomem *ControllerBaseAddress = Controller->BaseAddress;
+  DAC960_V1_StatusMailbox_T *NextStatusMailbox;
+  unsigned long flags;
+
+  spin_lock_irqsave(&Controller->queue_lock, flags);
+  DAC960_LA_AcknowledgeInterrupt(ControllerBaseAddress);
+  NextStatusMailbox = Controller->V1.NextStatusMailbox;
+  while (NextStatusMailbox->Fields.Valid)
+    {
+      DAC960_V1_CommandIdentifier_T CommandIdentifier =
+	NextStatusMailbox->Fields.CommandIdentifier;
+      DAC960_Command_T *Command = Controller->Commands[CommandIdentifier-1];
+      Command->V1.CommandStatus = NextStatusMailbox->Fields.CommandStatus;
+      NextStatusMailbox->Word = 0;
+      if (++NextStatusMailbox > Controller->V1.LastStatusMailbox)
+	NextStatusMailbox = Controller->V1.FirstStatusMailbox;
+      DAC960_V1_ProcessCompletedCommand(Command);
+    }
+  Controller->V1.NextStatusMailbox = NextStatusMailbox;
+  /*
+    Attempt to remove additional I/O Requests from the Controller's
+    I/O Request Queue and queue them to the Controller.
+  */
+  DAC960_ProcessRequest(Controller);
+  spin_unlock_irqrestore(&Controller->queue_lock, flags);
+  return IRQ_HANDLED;
+}
+
+
+/*
+  DAC960_PG_InterruptHandler handles hardware interrupts from DAC960 PG Series
+  Controllers.
+*/
+
+static irqreturn_t DAC960_PG_InterruptHandler(int IRQ_Channel,
+				       void *DeviceIdentifier)
+{
+  DAC960_Controller_T *Controller = DeviceIdentifier;
+  void __iomem *ControllerBaseAddress = Controller->BaseAddress;
+  DAC960_V1_StatusMailbox_T *NextStatusMailbox;
+  unsigned long flags;
+
+  spin_lock_irqsave(&Controller->queue_lock, flags);
+  DAC960_PG_AcknowledgeInterrupt(ControllerBaseAddress);
+  NextStatusMailbox = Controller->V1.NextStatusMailbox;
+  while (NextStatusMailbox->Fields.Valid)
+    {
+      DAC960_V1_CommandIdentifier_T CommandIdentifier =
+	NextStatusMailbox->Fields.CommandIdentifier;
+      DAC960_Command_T *Command = Controller->Commands[CommandIdentifier-1];
+      Command->V1.CommandStatus = NextStatusMailbox->Fields.CommandStatus;
+      NextStatusMailbox->Word = 0;
+      if (++NextStatusMailbox > Controller->V1.LastStatusMailbox)
+	NextStatusMailbox = Controller->V1.FirstStatusMailbox;
+      DAC960_V1_ProcessCompletedCommand(Command);
+    }
+  Controller->V1.NextStatusMailbox = NextStatusMailbox;
+  /*
+    Attempt to remove additional I/O Requests from the Controller's
+    I/O Request Queue and queue them to the Controller.
+  */
+  DAC960_ProcessRequest(Controller);
+  spin_unlock_irqrestore(&Controller->queue_lock, flags);
+  return IRQ_HANDLED;
+}
+
+
+/*
+  DAC960_PD_InterruptHandler handles hardware interrupts from DAC960 PD Series
+  Controllers.
+*/
+
+static irqreturn_t DAC960_PD_InterruptHandler(int IRQ_Channel,
+				       void *DeviceIdentifier)
+{
+  DAC960_Controller_T *Controller = DeviceIdentifier;
+  void __iomem *ControllerBaseAddress = Controller->BaseAddress;
+  unsigned long flags;
+
+  spin_lock_irqsave(&Controller->queue_lock, flags);
+  while (DAC960_PD_StatusAvailableP(ControllerBaseAddress))
+    {
+      DAC960_V1_CommandIdentifier_T CommandIdentifier =
+	DAC960_PD_ReadStatusCommandIdentifier(ControllerBaseAddress);
+      DAC960_Command_T *Command = Controller->Commands[CommandIdentifier-1];
+      Command->V1.CommandStatus =
+	DAC960_PD_ReadStatusRegister(ControllerBaseAddress);
+      DAC960_PD_AcknowledgeInterrupt(ControllerBaseAddress);
+      DAC960_PD_AcknowledgeStatus(ControllerBaseAddress);
+      DAC960_V1_ProcessCompletedCommand(Command);
+    }
+  /*
+    Attempt to remove additional I/O Requests from the Controller's
+    I/O Request Queue and queue them to the Controller.
+  */
+  DAC960_ProcessRequest(Controller);
+  spin_unlock_irqrestore(&Controller->queue_lock, flags);
+  return IRQ_HANDLED;
+}
+
+
+/*
+  DAC960_P_InterruptHandler handles hardware interrupts from DAC960 P Series
+  Controllers.
+
+  Translations of DAC960_V1_Enquiry and DAC960_V1_GetDeviceState rely
+  on the data having been placed into DAC960_Controller_T, rather than
+  an arbitrary buffer.
+*/
+
+static irqreturn_t DAC960_P_InterruptHandler(int IRQ_Channel,
+				      void *DeviceIdentifier)
+{
+  DAC960_Controller_T *Controller = DeviceIdentifier;
+  void __iomem *ControllerBaseAddress = Controller->BaseAddress;
+  unsigned long flags;
+
+  spin_lock_irqsave(&Controller->queue_lock, flags);
+  while (DAC960_PD_StatusAvailableP(ControllerBaseAddress))
+    {
+      DAC960_V1_CommandIdentifier_T CommandIdentifier =
+	DAC960_PD_ReadStatusCommandIdentifier(ControllerBaseAddress);
+      DAC960_Command_T *Command = Controller->Commands[CommandIdentifier-1];
+      DAC960_V1_CommandMailbox_T *CommandMailbox = &Command->V1.CommandMailbox;
+      DAC960_V1_CommandOpcode_T CommandOpcode =
+	CommandMailbox->Common.CommandOpcode;
+      Command->V1.CommandStatus =
+	DAC960_PD_ReadStatusRegister(ControllerBaseAddress);
+      DAC960_PD_AcknowledgeInterrupt(ControllerBaseAddress);
+      DAC960_PD_AcknowledgeStatus(ControllerBaseAddress);
+      switch (CommandOpcode)
+	{
+	case DAC960_V1_Enquiry_Old:
+	  Command->V1.CommandMailbox.Common.CommandOpcode = DAC960_V1_Enquiry;
+	  DAC960_P_To_PD_TranslateEnquiry(Controller->V1.NewEnquiry);
+	  break;
+	case DAC960_V1_GetDeviceState_Old:
+	  Command->V1.CommandMailbox.Common.CommandOpcode =
+	    					DAC960_V1_GetDeviceState;
+	  DAC960_P_To_PD_TranslateDeviceState(Controller->V1.NewDeviceState);
+	  break;
+	case DAC960_V1_Read_Old:
+	  Command->V1.CommandMailbox.Common.CommandOpcode = DAC960_V1_Read;
+	  DAC960_P_To_PD_TranslateReadWriteCommand(CommandMailbox);
+	  break;
+	case DAC960_V1_Write_Old:
+	  Command->V1.CommandMailbox.Common.CommandOpcode = DAC960_V1_Write;
+	  DAC960_P_To_PD_TranslateReadWriteCommand(CommandMailbox);
+	  break;
+	case DAC960_V1_ReadWithScatterGather_Old:
+	  Command->V1.CommandMailbox.Common.CommandOpcode =
+	    DAC960_V1_ReadWithScatterGather;
+	  DAC960_P_To_PD_TranslateReadWriteCommand(CommandMailbox);
+	  break;
+	case DAC960_V1_WriteWithScatterGather_Old:
+	  Command->V1.CommandMailbox.Common.CommandOpcode =
+	    DAC960_V1_WriteWithScatterGather;
+	  DAC960_P_To_PD_TranslateReadWriteCommand(CommandMailbox);
+	  break;
+	default:
+	  break;
+	}
+      DAC960_V1_ProcessCompletedCommand(Command);
+    }
+  /*
+    Attempt to remove additional I/O Requests from the Controller's
+    I/O Request Queue and queue them to the Controller.
+  */
+  DAC960_ProcessRequest(Controller);
+  spin_unlock_irqrestore(&Controller->queue_lock, flags);
+  return IRQ_HANDLED;
+}
+
+
+/*
+  DAC960_V1_QueueMonitoringCommand queues a Monitoring Command to DAC960 V1
+  Firmware Controllers.
+*/
+
+static void DAC960_V1_QueueMonitoringCommand(DAC960_Command_T *Command)
+{
+  DAC960_Controller_T *Controller = Command->Controller;
+  DAC960_V1_CommandMailbox_T *CommandMailbox = &Command->V1.CommandMailbox;
+  DAC960_V1_ClearCommand(Command);
+  Command->CommandType = DAC960_MonitoringCommand;
+  CommandMailbox->Type3.CommandOpcode = DAC960_V1_Enquiry;
+  CommandMailbox->Type3.BusAddress = Controller->V1.NewEnquiryDMA;
+  DAC960_QueueCommand(Command);
+}
+
+
+/*
+  DAC960_V2_QueueMonitoringCommand queues a Monitoring Command to DAC960 V2
+  Firmware Controllers.
+*/
+
+static void DAC960_V2_QueueMonitoringCommand(DAC960_Command_T *Command)
+{
+  DAC960_Controller_T *Controller = Command->Controller;
+  DAC960_V2_CommandMailbox_T *CommandMailbox = &Command->V2.CommandMailbox;
+  DAC960_V2_ClearCommand(Command);
+  Command->CommandType = DAC960_MonitoringCommand;
+  CommandMailbox->ControllerInfo.CommandOpcode = DAC960_V2_IOCTL;
+  CommandMailbox->ControllerInfo.CommandControlBits
+				.DataTransferControllerToHost = true;
+  CommandMailbox->ControllerInfo.CommandControlBits
+				.NoAutoRequestSense = true;
+  CommandMailbox->ControllerInfo.DataTransferSize =
+    sizeof(DAC960_V2_ControllerInfo_T);
+  CommandMailbox->ControllerInfo.ControllerNumber = 0;
+  CommandMailbox->ControllerInfo.IOCTL_Opcode = DAC960_V2_GetControllerInfo;
+  CommandMailbox->ControllerInfo.DataTransferMemoryAddress
+				.ScatterGatherSegments[0]
+				.SegmentDataPointer =
+    Controller->V2.NewControllerInformationDMA;
+  CommandMailbox->ControllerInfo.DataTransferMemoryAddress
+				.ScatterGatherSegments[0]
+				.SegmentByteCount =
+    CommandMailbox->ControllerInfo.DataTransferSize;
+  DAC960_QueueCommand(Command);
+}
+
+
+/*
+  DAC960_MonitoringTimerFunction is the timer function for monitoring
+  the status of DAC960 Controllers.
+*/
+
+static void DAC960_MonitoringTimerFunction(unsigned long TimerData)
+{
+  DAC960_Controller_T *Controller = (DAC960_Controller_T *) TimerData;
+  DAC960_Command_T *Command;
+  unsigned long flags;
+
+  if (Controller->FirmwareType == DAC960_V1_Controller)
+    {
+      spin_lock_irqsave(&Controller->queue_lock, flags);
+      /*
+	Queue a Status Monitoring Command to Controller.
+      */
+      Command = DAC960_AllocateCommand(Controller);
+      if (Command != NULL)
+	DAC960_V1_QueueMonitoringCommand(Command);
+      else Controller->MonitoringCommandDeferred = true;
+      spin_unlock_irqrestore(&Controller->queue_lock, flags);
+    }
+  else
+    {
+      DAC960_V2_ControllerInfo_T *ControllerInfo =
+	&Controller->V2.ControllerInformation;
+      unsigned int StatusChangeCounter =
+	Controller->V2.HealthStatusBuffer->StatusChangeCounter;
+      bool ForceMonitoringCommand = false;
+      if (time_after(jiffies, Controller->SecondaryMonitoringTime
+	  + DAC960_SecondaryMonitoringInterval))
+	{
+	  int LogicalDriveNumber;
+	  for (LogicalDriveNumber = 0;
+	       LogicalDriveNumber < DAC960_MaxLogicalDrives;
+	       LogicalDriveNumber++)
+	    {
+	      DAC960_V2_LogicalDeviceInfo_T *LogicalDeviceInfo =
+		Controller->V2.LogicalDeviceInformation[LogicalDriveNumber];
+	      if (LogicalDeviceInfo == NULL) continue;
+	      if (!LogicalDeviceInfo->LogicalDeviceControl
+				     .LogicalDeviceInitialized)
+		{
+		  ForceMonitoringCommand = true;
+		  break;
+		}
+	    }
+	  Controller->SecondaryMonitoringTime = jiffies;
+	}
+      if (StatusChangeCounter == Controller->V2.StatusChangeCounter &&
+	  Controller->V2.HealthStatusBuffer->NextEventSequenceNumber
+	  == Controller->V2.NextEventSequenceNumber &&
+	  (ControllerInfo->BackgroundInitializationsActive +
+	   ControllerInfo->LogicalDeviceInitializationsActive +
+	   ControllerInfo->PhysicalDeviceInitializationsActive +
+	   ControllerInfo->ConsistencyChecksActive +
+	   ControllerInfo->RebuildsActive +
+	   ControllerInfo->OnlineExpansionsActive == 0 ||
+	   time_before(jiffies, Controller->PrimaryMonitoringTime
+	   + DAC960_MonitoringTimerInterval)) &&
+	  !ForceMonitoringCommand)
+	{
+	  Controller->MonitoringTimer.expires =
+	    jiffies + DAC960_HealthStatusMonitoringInterval;
+	    add_timer(&Controller->MonitoringTimer);
+	  return;
+	}
+      Controller->V2.StatusChangeCounter = StatusChangeCounter;
+      Controller->PrimaryMonitoringTime = jiffies;
+
+      spin_lock_irqsave(&Controller->queue_lock, flags);
+      /*
+	Queue a Status Monitoring Command to Controller.
+      */
+      Command = DAC960_AllocateCommand(Controller);
+      if (Command != NULL)
+	DAC960_V2_QueueMonitoringCommand(Command);
+      else Controller->MonitoringCommandDeferred = true;
+      spin_unlock_irqrestore(&Controller->queue_lock, flags);
+      /*
+	Wake up any processes waiting on a Health Status Buffer change.
+      */
+      wake_up(&Controller->HealthStatusWaitQueue);
+    }
+}
+
+/*
+  DAC960_CheckStatusBuffer verifies that there is room to hold ByteCount
+  additional bytes in the Combined Status Buffer and grows the buffer if
+  necessary.  It returns true if there is enough room and false otherwise.
+*/
+
+static bool DAC960_CheckStatusBuffer(DAC960_Controller_T *Controller,
+					unsigned int ByteCount)
+{
+  unsigned char *NewStatusBuffer;
+  if (Controller->InitialStatusLength + 1 +
+      Controller->CurrentStatusLength + ByteCount + 1 <=
+      Controller->CombinedStatusBufferLength)
+    return true;
+  if (Controller->CombinedStatusBufferLength == 0)
+    {
+      unsigned int NewStatusBufferLength = DAC960_InitialStatusBufferSize;
+      while (NewStatusBufferLength < ByteCount)
+	NewStatusBufferLength *= 2;
+      Controller->CombinedStatusBuffer = kmalloc(NewStatusBufferLength,
+						  GFP_ATOMIC);
+      if (Controller->CombinedStatusBuffer == NULL) return false;
+      Controller->CombinedStatusBufferLength = NewStatusBufferLength;
+      return true;
+    }
+  NewStatusBuffer = kmalloc(2 * Controller->CombinedStatusBufferLength,
+			     GFP_ATOMIC);
+  if (NewStatusBuffer == NULL)
+    {
+      DAC960_Warning("Unable to expand Combined Status Buffer - Truncating\n",
+		     Controller);
+      return false;
+    }
+  memcpy(NewStatusBuffer, Controller->CombinedStatusBuffer,
+	 Controller->CombinedStatusBufferLength);
+  kfree(Controller->CombinedStatusBuffer);
+  Controller->CombinedStatusBuffer = NewStatusBuffer;
+  Controller->CombinedStatusBufferLength *= 2;
+  Controller->CurrentStatusBuffer =
+    &NewStatusBuffer[Controller->InitialStatusLength + 1];
+  return true;
+}
+
+
+/*
+  DAC960_Message prints Driver Messages.
+*/
+
+static void DAC960_Message(DAC960_MessageLevel_T MessageLevel,
+			   unsigned char *Format,
+			   DAC960_Controller_T *Controller,
+			   ...)
+{
+  static unsigned char Buffer[DAC960_LineBufferSize];
+  static bool BeginningOfLine = true;
+  va_list Arguments;
+  int Length = 0;
+  va_start(Arguments, Controller);
+  Length = vsprintf(Buffer, Format, Arguments);
+  va_end(Arguments);
+  if (Controller == NULL)
+    printk("%sDAC960#%d: %s", DAC960_MessageLevelMap[MessageLevel],
+	   DAC960_ControllerCount, Buffer);
+  else if (MessageLevel == DAC960_AnnounceLevel ||
+	   MessageLevel == DAC960_InfoLevel)
+    {
+      if (!Controller->ControllerInitialized)
+	{
+	  if (DAC960_CheckStatusBuffer(Controller, Length))
+	    {
+	      strcpy(&Controller->CombinedStatusBuffer
+				  [Controller->InitialStatusLength],
+		     Buffer);
+	      Controller->InitialStatusLength += Length;
+	      Controller->CurrentStatusBuffer =
+		&Controller->CombinedStatusBuffer
+			     [Controller->InitialStatusLength + 1];
+	    }
+	  if (MessageLevel == DAC960_AnnounceLevel)
+	    {
+	      static int AnnouncementLines = 0;
+	      if (++AnnouncementLines <= 2)
+		printk("%sDAC960: %s", DAC960_MessageLevelMap[MessageLevel],
+		       Buffer);
+	    }
+	  else
+	    {
+	      if (BeginningOfLine)
+		{
+		  if (Buffer[0] != '\n' || Length > 1)
+		    printk("%sDAC960#%d: %s",
+			   DAC960_MessageLevelMap[MessageLevel],
+			   Controller->ControllerNumber, Buffer);
+		}
+	      else printk("%s", Buffer);
+	    }
+	}
+      else if (DAC960_CheckStatusBuffer(Controller, Length))
+	{
+	  strcpy(&Controller->CurrentStatusBuffer[
+		    Controller->CurrentStatusLength], Buffer);
+	  Controller->CurrentStatusLength += Length;
+	}
+    }
+  else if (MessageLevel == DAC960_ProgressLevel)
+    {
+      strcpy(Controller->ProgressBuffer, Buffer);
+      Controller->ProgressBufferLength = Length;
+      if (Controller->EphemeralProgressMessage)
+	{
+	  if (time_after_eq(jiffies, Controller->LastProgressReportTime
+	      + DAC960_ProgressReportingInterval))
+	    {
+	      printk("%sDAC960#%d: %s", DAC960_MessageLevelMap[MessageLevel],
+		     Controller->ControllerNumber, Buffer);
+	      Controller->LastProgressReportTime = jiffies;
+	    }
+	}
+      else printk("%sDAC960#%d: %s", DAC960_MessageLevelMap[MessageLevel],
+		  Controller->ControllerNumber, Buffer);
+    }
+  else if (MessageLevel == DAC960_UserCriticalLevel)
+    {
+      strcpy(&Controller->UserStatusBuffer[Controller->UserStatusLength],
+	     Buffer);
+      Controller->UserStatusLength += Length;
+      if (Buffer[0] != '\n' || Length > 1)
+	printk("%sDAC960#%d: %s", DAC960_MessageLevelMap[MessageLevel],
+	       Controller->ControllerNumber, Buffer);
+    }
+  else
+    {
+      if (BeginningOfLine)
+	printk("%sDAC960#%d: %s", DAC960_MessageLevelMap[MessageLevel],
+	       Controller->ControllerNumber, Buffer);
+      else printk("%s", Buffer);
+    }
+  BeginningOfLine = (Buffer[Length-1] == '\n');
+}
+
+
+/*
+  DAC960_ParsePhysicalDevice parses spaces followed by a Physical Device
+  Channel:TargetID specification from a User Command string.  It updates
+  Channel and TargetID and returns true on success and false on failure.
+*/
+
+static bool DAC960_ParsePhysicalDevice(DAC960_Controller_T *Controller,
+					  char *UserCommandString,
+					  unsigned char *Channel,
+					  unsigned char *TargetID)
+{
+  char *NewUserCommandString = UserCommandString;
+  unsigned long XChannel, XTargetID;
+  while (*UserCommandString == ' ') UserCommandString++;
+  if (UserCommandString == NewUserCommandString)
+    return false;
+  XChannel = simple_strtoul(UserCommandString, &NewUserCommandString, 10);
+  if (NewUserCommandString == UserCommandString ||
+      *NewUserCommandString != ':' ||
+      XChannel >= Controller->Channels)
+    return false;
+  UserCommandString = ++NewUserCommandString;
+  XTargetID = simple_strtoul(UserCommandString, &NewUserCommandString, 10);
+  if (NewUserCommandString == UserCommandString ||
+      *NewUserCommandString != '\0' ||
+      XTargetID >= Controller->Targets)
+    return false;
+  *Channel = XChannel;
+  *TargetID = XTargetID;
+  return true;
+}
+
+
+/*
+  DAC960_ParseLogicalDrive parses spaces followed by a Logical Drive Number
+  specification from a User Command string.  It updates LogicalDriveNumber and
+  returns true on success and false on failure.
+*/
+
+static bool DAC960_ParseLogicalDrive(DAC960_Controller_T *Controller,
+					char *UserCommandString,
+					unsigned char *LogicalDriveNumber)
+{
+  char *NewUserCommandString = UserCommandString;
+  unsigned long XLogicalDriveNumber;
+  while (*UserCommandString == ' ') UserCommandString++;
+  if (UserCommandString == NewUserCommandString)
+    return false;
+  XLogicalDriveNumber =
+    simple_strtoul(UserCommandString, &NewUserCommandString, 10);
+  if (NewUserCommandString == UserCommandString ||
+      *NewUserCommandString != '\0' ||
+      XLogicalDriveNumber > DAC960_MaxLogicalDrives - 1)
+    return false;
+  *LogicalDriveNumber = XLogicalDriveNumber;
+  return true;
+}
+
+
+/*
+  DAC960_V1_SetDeviceState sets the Device State for a Physical Device for
+  DAC960 V1 Firmware Controllers.
+*/
+
+static void DAC960_V1_SetDeviceState(DAC960_Controller_T *Controller,
+				     DAC960_Command_T *Command,
+				     unsigned char Channel,
+				     unsigned char TargetID,
+				     DAC960_V1_PhysicalDeviceState_T
+				       DeviceState,
+				     const unsigned char *DeviceStateString)
+{
+  DAC960_V1_CommandMailbox_T *CommandMailbox = &Command->V1.CommandMailbox;
+  CommandMailbox->Type3D.CommandOpcode = DAC960_V1_StartDevice;
+  CommandMailbox->Type3D.Channel = Channel;
+  CommandMailbox->Type3D.TargetID = TargetID;
+  CommandMailbox->Type3D.DeviceState = DeviceState;
+  CommandMailbox->Type3D.Modifier = 0;
+  DAC960_ExecuteCommand(Command);
+  switch (Command->V1.CommandStatus)
+    {
+    case DAC960_V1_NormalCompletion:
+      DAC960_UserCritical("%s of Physical Device %d:%d Succeeded\n", Controller,
+			  DeviceStateString, Channel, TargetID);
+      break;
+    case DAC960_V1_UnableToStartDevice:
+      DAC960_UserCritical("%s of Physical Device %d:%d Failed - "
+			  "Unable to Start Device\n", Controller,
+			  DeviceStateString, Channel, TargetID);
+      break;
+    case DAC960_V1_NoDeviceAtAddress:
+      DAC960_UserCritical("%s of Physical Device %d:%d Failed - "
+			  "No Device at Address\n", Controller,
+			  DeviceStateString, Channel, TargetID);
+      break;
+    case DAC960_V1_InvalidChannelOrTargetOrModifier:
+      DAC960_UserCritical("%s of Physical Device %d:%d Failed - "
+			  "Invalid Channel or Target or Modifier\n",
+			  Controller, DeviceStateString, Channel, TargetID);
+      break;
+    case DAC960_V1_ChannelBusy:
+      DAC960_UserCritical("%s of Physical Device %d:%d Failed - "
+			  "Channel Busy\n", Controller,
+			  DeviceStateString, Channel, TargetID);
+      break;
+    default:
+      DAC960_UserCritical("%s of Physical Device %d:%d Failed - "
+			  "Unexpected Status %04X\n", Controller,
+			  DeviceStateString, Channel, TargetID,
+			  Command->V1.CommandStatus);
+      break;
+    }
+}
+
+
+/*
+  DAC960_V1_ExecuteUserCommand executes a User Command for DAC960 V1 Firmware
+  Controllers.
+*/
+
+static bool DAC960_V1_ExecuteUserCommand(DAC960_Controller_T *Controller,
+					    unsigned char *UserCommand)
+{
+  DAC960_Command_T *Command;
+  DAC960_V1_CommandMailbox_T *CommandMailbox;
+  unsigned long flags;
+  unsigned char Channel, TargetID, LogicalDriveNumber;
+
+  spin_lock_irqsave(&Controller->queue_lock, flags);
+  while ((Command = DAC960_AllocateCommand(Controller)) == NULL)
+    DAC960_WaitForCommand(Controller);
+  spin_unlock_irqrestore(&Controller->queue_lock, flags);
+  Controller->UserStatusLength = 0;
+  DAC960_V1_ClearCommand(Command);
+  Command->CommandType = DAC960_ImmediateCommand;
+  CommandMailbox = &Command->V1.CommandMailbox;
+  if (strcmp(UserCommand, "flush-cache") == 0)
+    {
+      CommandMailbox->Type3.CommandOpcode = DAC960_V1_Flush;
+      DAC960_ExecuteCommand(Command);
+      DAC960_UserCritical("Cache Flush Completed\n", Controller);
+    }
+  else if (strncmp(UserCommand, "kill", 4) == 0 &&
+	   DAC960_ParsePhysicalDevice(Controller, &UserCommand[4],
+				      &Channel, &TargetID))
+    {
+      DAC960_V1_DeviceState_T *DeviceState =
+	&Controller->V1.DeviceState[Channel][TargetID];
+      if (DeviceState->Present &&
+	  DeviceState->DeviceType == DAC960_V1_DiskType &&
+	  DeviceState->DeviceState != DAC960_V1_Device_Dead)
+	DAC960_V1_SetDeviceState(Controller, Command, Channel, TargetID,
+				 DAC960_V1_Device_Dead, "Kill");
+      else DAC960_UserCritical("Kill of Physical Device %d:%d Illegal\n",
+			       Controller, Channel, TargetID);
+    }
+  else if (strncmp(UserCommand, "make-online", 11) == 0 &&
+	   DAC960_ParsePhysicalDevice(Controller, &UserCommand[11],
+				      &Channel, &TargetID))
+    {
+      DAC960_V1_DeviceState_T *DeviceState =
+	&Controller->V1.DeviceState[Channel][TargetID];
+      if (DeviceState->Present &&
+	  DeviceState->DeviceType == DAC960_V1_DiskType &&
+	  DeviceState->DeviceState == DAC960_V1_Device_Dead)
+	DAC960_V1_SetDeviceState(Controller, Command, Channel, TargetID,
+				 DAC960_V1_Device_Online, "Make Online");
+      else DAC960_UserCritical("Make Online of Physical Device %d:%d Illegal\n",
+			       Controller, Channel, TargetID);
+
+    }
+  else if (strncmp(UserCommand, "make-standby", 12) == 0 &&
+	   DAC960_ParsePhysicalDevice(Controller, &UserCommand[12],
+				      &Channel, &TargetID))
+    {
+      DAC960_V1_DeviceState_T *DeviceState =
+	&Controller->V1.DeviceState[Channel][TargetID];
+      if (DeviceState->Present &&
+	  DeviceState->DeviceType == DAC960_V1_DiskType &&
+	  DeviceState->DeviceState == DAC960_V1_Device_Dead)
+	DAC960_V1_SetDeviceState(Controller, Command, Channel, TargetID,
+				 DAC960_V1_Device_Standby, "Make Standby");
+      else DAC960_UserCritical("Make Standby of Physical "
+			       "Device %d:%d Illegal\n",
+			       Controller, Channel, TargetID);
+    }
+  else if (strncmp(UserCommand, "rebuild", 7) == 0 &&
+	   DAC960_ParsePhysicalDevice(Controller, &UserCommand[7],
+				      &Channel, &TargetID))
+    {
+      CommandMailbox->Type3D.CommandOpcode = DAC960_V1_RebuildAsync;
+      CommandMailbox->Type3D.Channel = Channel;
+      CommandMailbox->Type3D.TargetID = TargetID;
+      DAC960_ExecuteCommand(Command);
+      switch (Command->V1.CommandStatus)
+	{
+	case DAC960_V1_NormalCompletion:
+	  DAC960_UserCritical("Rebuild of Physical Device %d:%d Initiated\n",
+			      Controller, Channel, TargetID);
+	  break;
+	case DAC960_V1_AttemptToRebuildOnlineDrive:
+	  DAC960_UserCritical("Rebuild of Physical Device %d:%d Failed - "
+			      "Attempt to Rebuild Online or "
+			      "Unresponsive Drive\n",
+			      Controller, Channel, TargetID);
+	  break;
+	case DAC960_V1_NewDiskFailedDuringRebuild:
+	  DAC960_UserCritical("Rebuild of Physical Device %d:%d Failed - "
+			      "New Disk Failed During Rebuild\n",
+			      Controller, Channel, TargetID);
+	  break;
+	case DAC960_V1_InvalidDeviceAddress:
+	  DAC960_UserCritical("Rebuild of Physical Device %d:%d Failed - "
+			      "Invalid Device Address\n",
+			      Controller, Channel, TargetID);
+	  break;
+	case DAC960_V1_RebuildOrCheckAlreadyInProgress:
+	  DAC960_UserCritical("Rebuild of Physical Device %d:%d Failed - "
+			      "Rebuild or Consistency Check Already "
+			      "in Progress\n", Controller, Channel, TargetID);
+	  break;
+	default:
+	  DAC960_UserCritical("Rebuild of Physical Device %d:%d Failed - "
+			      "Unexpected Status %04X\n", Controller,
+			      Channel, TargetID, Command->V1.CommandStatus);
+	  break;
+	}
+    }
+  else if (strncmp(UserCommand, "check-consistency", 17) == 0 &&
+	   DAC960_ParseLogicalDrive(Controller, &UserCommand[17],
+				    &LogicalDriveNumber))
+    {
+      CommandMailbox->Type3C.CommandOpcode = DAC960_V1_CheckConsistencyAsync;
+      CommandMailbox->Type3C.LogicalDriveNumber = LogicalDriveNumber;
+      CommandMailbox->Type3C.AutoRestore = true;
+      DAC960_ExecuteCommand(Command);
+      switch (Command->V1.CommandStatus)
+	{
+	case DAC960_V1_NormalCompletion:
+	  DAC960_UserCritical("Consistency Check of Logical Drive %d "
+			      "(/dev/rd/c%dd%d) Initiated\n",
+			      Controller, LogicalDriveNumber,
+			      Controller->ControllerNumber,
+			      LogicalDriveNumber);
+	  break;
+	case DAC960_V1_DependentDiskIsDead:
+	  DAC960_UserCritical("Consistency Check of Logical Drive %d "
+			      "(/dev/rd/c%dd%d) Failed - "
+			      "Dependent Physical Device is DEAD\n",
+			      Controller, LogicalDriveNumber,
+			      Controller->ControllerNumber,
+			      LogicalDriveNumber);
+	  break;
+	case DAC960_V1_InvalidOrNonredundantLogicalDrive:
+	  DAC960_UserCritical("Consistency Check of Logical Drive %d "
+			      "(/dev/rd/c%dd%d) Failed - "
+			      "Invalid or Nonredundant Logical Drive\n",
+			      Controller, LogicalDriveNumber,
+			      Controller->ControllerNumber,
+			      LogicalDriveNumber);
+	  break;
+	case DAC960_V1_RebuildOrCheckAlreadyInProgress:
+	  DAC960_UserCritical("Consistency Check of Logical Drive %d "
+			      "(/dev/rd/c%dd%d) Failed - Rebuild or "
+			      "Consistency Check Already in Progress\n",
+			      Controller, LogicalDriveNumber,
+			      Controller->ControllerNumber,
+			      LogicalDriveNumber);
+	  break;
+	default:
+	  DAC960_UserCritical("Consistency Check of Logical Drive %d "
+			      "(/dev/rd/c%dd%d) Failed - "
+			      "Unexpected Status %04X\n",
+			      Controller, LogicalDriveNumber,
+			      Controller->ControllerNumber,
+			      LogicalDriveNumber, Command->V1.CommandStatus);
+	  break;
+	}
+    }
+  else if (strcmp(UserCommand, "cancel-rebuild") == 0 ||
+	   strcmp(UserCommand, "cancel-consistency-check") == 0)
+    {
+      /*
+        the OldRebuildRateConstant is never actually used
+        once its value is retrieved from the controller.
+       */
+      unsigned char *OldRebuildRateConstant;
+      dma_addr_t OldRebuildRateConstantDMA;
+
+      OldRebuildRateConstant = pci_alloc_consistent( Controller->PCIDevice,
+		sizeof(char), &OldRebuildRateConstantDMA);
+      if (OldRebuildRateConstant == NULL) {
+         DAC960_UserCritical("Cancellation of Rebuild or "
+			     "Consistency Check Failed - "
+			     "Out of Memory",
+                             Controller);
+	 goto failure;
+      }
+      CommandMailbox->Type3R.CommandOpcode = DAC960_V1_RebuildControl;
+      CommandMailbox->Type3R.RebuildRateConstant = 0xFF;
+      CommandMailbox->Type3R.BusAddress = OldRebuildRateConstantDMA;
+      DAC960_ExecuteCommand(Command);
+      switch (Command->V1.CommandStatus)
+	{
+	case DAC960_V1_NormalCompletion:
+	  DAC960_UserCritical("Rebuild or Consistency Check Cancelled\n",
+			      Controller);
+	  break;
+	default:
+	  DAC960_UserCritical("Cancellation of Rebuild or "
+			      "Consistency Check Failed - "
+			      "Unexpected Status %04X\n",
+			      Controller, Command->V1.CommandStatus);
+	  break;
+	}
+failure:
+  	pci_free_consistent(Controller->PCIDevice, sizeof(char),
+		OldRebuildRateConstant, OldRebuildRateConstantDMA);
+    }
+  else DAC960_UserCritical("Illegal User Command: '%s'\n",
+			   Controller, UserCommand);
+
+  spin_lock_irqsave(&Controller->queue_lock, flags);
+  DAC960_DeallocateCommand(Command);
+  spin_unlock_irqrestore(&Controller->queue_lock, flags);
+  return true;
+}
+
+
+/*
+  DAC960_V2_TranslatePhysicalDevice translates a Physical Device Channel and
+  TargetID into a Logical Device.  It returns true on success and false
+  on failure.
+*/
+
+static bool DAC960_V2_TranslatePhysicalDevice(DAC960_Command_T *Command,
+						 unsigned char Channel,
+						 unsigned char TargetID,
+						 unsigned short
+						   *LogicalDeviceNumber)
+{
+  DAC960_V2_CommandMailbox_T SavedCommandMailbox, *CommandMailbox;
+  DAC960_Controller_T *Controller =  Command->Controller;
+
+  CommandMailbox = &Command->V2.CommandMailbox;
+  memcpy(&SavedCommandMailbox, CommandMailbox,
+	 sizeof(DAC960_V2_CommandMailbox_T));
+
+  CommandMailbox->PhysicalDeviceInfo.CommandOpcode = DAC960_V2_IOCTL;
+  CommandMailbox->PhysicalDeviceInfo.CommandControlBits
+				    .DataTransferControllerToHost = true;
+  CommandMailbox->PhysicalDeviceInfo.CommandControlBits
+				    .NoAutoRequestSense = true;
+  CommandMailbox->PhysicalDeviceInfo.DataTransferSize =
+    sizeof(DAC960_V2_PhysicalToLogicalDevice_T);
+  CommandMailbox->PhysicalDeviceInfo.PhysicalDevice.TargetID = TargetID;
+  CommandMailbox->PhysicalDeviceInfo.PhysicalDevice.Channel = Channel;
+  CommandMailbox->PhysicalDeviceInfo.IOCTL_Opcode =
+    DAC960_V2_TranslatePhysicalToLogicalDevice;
+  CommandMailbox->Common.DataTransferMemoryAddress
+			.ScatterGatherSegments[0]
+			.SegmentDataPointer =
+    		Controller->V2.PhysicalToLogicalDeviceDMA;
+  CommandMailbox->Common.DataTransferMemoryAddress
+			.ScatterGatherSegments[0]
+			.SegmentByteCount =
+    		CommandMailbox->Common.DataTransferSize;
+
+  DAC960_ExecuteCommand(Command);
+  *LogicalDeviceNumber = Controller->V2.PhysicalToLogicalDevice->LogicalDeviceNumber;
+
+  memcpy(CommandMailbox, &SavedCommandMailbox,
+	 sizeof(DAC960_V2_CommandMailbox_T));
+  return (Command->V2.CommandStatus == DAC960_V2_NormalCompletion);
+}
+
+
+/*
+  DAC960_V2_ExecuteUserCommand executes a User Command for DAC960 V2 Firmware
+  Controllers.
+*/
+
+static bool DAC960_V2_ExecuteUserCommand(DAC960_Controller_T *Controller,
+					    unsigned char *UserCommand)
+{
+  DAC960_Command_T *Command;
+  DAC960_V2_CommandMailbox_T *CommandMailbox;
+  unsigned long flags;
+  unsigned char Channel, TargetID, LogicalDriveNumber;
+  unsigned short LogicalDeviceNumber;
+
+  spin_lock_irqsave(&Controller->queue_lock, flags);
+  while ((Command = DAC960_AllocateCommand(Controller)) == NULL)
+    DAC960_WaitForCommand(Controller);
+  spin_unlock_irqrestore(&Controller->queue_lock, flags);
+  Controller->UserStatusLength = 0;
+  DAC960_V2_ClearCommand(Command);
+  Command->CommandType = DAC960_ImmediateCommand;
+  CommandMailbox = &Command->V2.CommandMailbox;
+  CommandMailbox->Common.CommandOpcode = DAC960_V2_IOCTL;
+  CommandMailbox->Common.CommandControlBits.DataTransferControllerToHost = true;
+  CommandMailbox->Common.CommandControlBits.NoAutoRequestSense = true;
+  if (strcmp(UserCommand, "flush-cache") == 0)
+    {
+      CommandMailbox->DeviceOperation.IOCTL_Opcode = DAC960_V2_PauseDevice;
+      CommandMailbox->DeviceOperation.OperationDevice =
+	DAC960_V2_RAID_Controller;
+      DAC960_ExecuteCommand(Command);
+      DAC960_UserCritical("Cache Flush Completed\n", Controller);
+    }
+  else if (strncmp(UserCommand, "kill", 4) == 0 &&
+	   DAC960_ParsePhysicalDevice(Controller, &UserCommand[4],
+				      &Channel, &TargetID) &&
+	   DAC960_V2_TranslatePhysicalDevice(Command, Channel, TargetID,
+					     &LogicalDeviceNumber))
+    {
+      CommandMailbox->SetDeviceState.LogicalDevice.LogicalDeviceNumber =
+	LogicalDeviceNumber;
+      CommandMailbox->SetDeviceState.IOCTL_Opcode =
+	DAC960_V2_SetDeviceState;
+      CommandMailbox->SetDeviceState.DeviceState.PhysicalDeviceState =
+	DAC960_V2_Device_Dead;
+      DAC960_ExecuteCommand(Command);
+      DAC960_UserCritical("Kill of Physical Device %d:%d %s\n",
+			  Controller, Channel, TargetID,
+			  (Command->V2.CommandStatus
+			   == DAC960_V2_NormalCompletion
+			   ? "Succeeded" : "Failed"));
+    }
+  else if (strncmp(UserCommand, "make-online", 11) == 0 &&
+	   DAC960_ParsePhysicalDevice(Controller, &UserCommand[11],
+				      &Channel, &TargetID) &&
+	   DAC960_V2_TranslatePhysicalDevice(Command, Channel, TargetID,
+					     &LogicalDeviceNumber))
+    {
+      CommandMailbox->SetDeviceState.LogicalDevice.LogicalDeviceNumber =
+	LogicalDeviceNumber;
+      CommandMailbox->SetDeviceState.IOCTL_Opcode =
+	DAC960_V2_SetDeviceState;
+      CommandMailbox->SetDeviceState.DeviceState.PhysicalDeviceState =
+	DAC960_V2_Device_Online;
+      DAC960_ExecuteCommand(Command);
+      DAC960_UserCritical("Make Online of Physical Device %d:%d %s\n",
+			  Controller, Channel, TargetID,
+			  (Command->V2.CommandStatus
+			   == DAC960_V2_NormalCompletion
+			   ? "Succeeded" : "Failed"));
+    }
+  else if (strncmp(UserCommand, "make-standby", 12) == 0 &&
+	   DAC960_ParsePhysicalDevice(Controller, &UserCommand[12],
+				      &Channel, &TargetID) &&
+	   DAC960_V2_TranslatePhysicalDevice(Command, Channel, TargetID,
+					     &LogicalDeviceNumber))
+    {
+      CommandMailbox->SetDeviceState.LogicalDevice.LogicalDeviceNumber =
+	LogicalDeviceNumber;
+      CommandMailbox->SetDeviceState.IOCTL_Opcode =
+	DAC960_V2_SetDeviceState;
+      CommandMailbox->SetDeviceState.DeviceState.PhysicalDeviceState =
+	DAC960_V2_Device_Standby;
+      DAC960_ExecuteCommand(Command);
+      DAC960_UserCritical("Make Standby of Physical Device %d:%d %s\n",
+			  Controller, Channel, TargetID,
+			  (Command->V2.CommandStatus
+			   == DAC960_V2_NormalCompletion
+			   ? "Succeeded" : "Failed"));
+    }
+  else if (strncmp(UserCommand, "rebuild", 7) == 0 &&
+	   DAC960_ParsePhysicalDevice(Controller, &UserCommand[7],
+				      &Channel, &TargetID) &&
+	   DAC960_V2_TranslatePhysicalDevice(Command, Channel, TargetID,
+					     &LogicalDeviceNumber))
+    {
+      CommandMailbox->LogicalDeviceInfo.LogicalDevice.LogicalDeviceNumber =
+	LogicalDeviceNumber;
+      CommandMailbox->LogicalDeviceInfo.IOCTL_Opcode =
+	DAC960_V2_RebuildDeviceStart;
+      DAC960_ExecuteCommand(Command);
+      DAC960_UserCritical("Rebuild of Physical Device %d:%d %s\n",
+			  Controller, Channel, TargetID,
+			  (Command->V2.CommandStatus
+			   == DAC960_V2_NormalCompletion
+			   ? "Initiated" : "Not Initiated"));
+    }
+  else if (strncmp(UserCommand, "cancel-rebuild", 14) == 0 &&
+	   DAC960_ParsePhysicalDevice(Controller, &UserCommand[14],
+				      &Channel, &TargetID) &&
+	   DAC960_V2_TranslatePhysicalDevice(Command, Channel, TargetID,
+					     &LogicalDeviceNumber))
+    {
+      CommandMailbox->LogicalDeviceInfo.LogicalDevice.LogicalDeviceNumber =
+	LogicalDeviceNumber;
+      CommandMailbox->LogicalDeviceInfo.IOCTL_Opcode =
+	DAC960_V2_RebuildDeviceStop;
+      DAC960_ExecuteCommand(Command);
+      DAC960_UserCritical("Rebuild of Physical Device %d:%d %s\n",
+			  Controller, Channel, TargetID,
+			  (Command->V2.CommandStatus
+			   == DAC960_V2_NormalCompletion
+			   ? "Cancelled" : "Not Cancelled"));
+    }
+  else if (strncmp(UserCommand, "check-consistency", 17) == 0 &&
+	   DAC960_ParseLogicalDrive(Controller, &UserCommand[17],
+				    &LogicalDriveNumber))
+    {
+      CommandMailbox->ConsistencyCheck.LogicalDevice.LogicalDeviceNumber =
+	LogicalDriveNumber;
+      CommandMailbox->ConsistencyCheck.IOCTL_Opcode =
+	DAC960_V2_ConsistencyCheckStart;
+      CommandMailbox->ConsistencyCheck.RestoreConsistency = true;
+      CommandMailbox->ConsistencyCheck.InitializedAreaOnly = false;
+      DAC960_ExecuteCommand(Command);
+      DAC960_UserCritical("Consistency Check of Logical Drive %d "
+			  "(/dev/rd/c%dd%d) %s\n",
+			  Controller, LogicalDriveNumber,
+			  Controller->ControllerNumber,
+			  LogicalDriveNumber,
+			  (Command->V2.CommandStatus
+			   == DAC960_V2_NormalCompletion
+			   ? "Initiated" : "Not Initiated"));
+    }
+  else if (strncmp(UserCommand, "cancel-consistency-check", 24) == 0 &&
+	   DAC960_ParseLogicalDrive(Controller, &UserCommand[24],
+				    &LogicalDriveNumber))
+    {
+      CommandMailbox->ConsistencyCheck.LogicalDevice.LogicalDeviceNumber =
+	LogicalDriveNumber;
+      CommandMailbox->ConsistencyCheck.IOCTL_Opcode =
+	DAC960_V2_ConsistencyCheckStop;
+      DAC960_ExecuteCommand(Command);
+      DAC960_UserCritical("Consistency Check of Logical Drive %d "
+			  "(/dev/rd/c%dd%d) %s\n",
+			  Controller, LogicalDriveNumber,
+			  Controller->ControllerNumber,
+			  LogicalDriveNumber,
+			  (Command->V2.CommandStatus
+			   == DAC960_V2_NormalCompletion
+			   ? "Cancelled" : "Not Cancelled"));
+    }
+  else if (strcmp(UserCommand, "perform-discovery") == 0)
+    {
+      CommandMailbox->Common.IOCTL_Opcode = DAC960_V2_StartDiscovery;
+      DAC960_ExecuteCommand(Command);
+      DAC960_UserCritical("Discovery %s\n", Controller,
+			  (Command->V2.CommandStatus
+			   == DAC960_V2_NormalCompletion
+			   ? "Initiated" : "Not Initiated"));
+      if (Command->V2.CommandStatus == DAC960_V2_NormalCompletion)
+	{
+	  CommandMailbox->ControllerInfo.CommandOpcode = DAC960_V2_IOCTL;
+	  CommandMailbox->ControllerInfo.CommandControlBits
+					.DataTransferControllerToHost = true;
+	  CommandMailbox->ControllerInfo.CommandControlBits
+					.NoAutoRequestSense = true;
+	  CommandMailbox->ControllerInfo.DataTransferSize =
+	    sizeof(DAC960_V2_ControllerInfo_T);
+	  CommandMailbox->ControllerInfo.ControllerNumber = 0;
+	  CommandMailbox->ControllerInfo.IOCTL_Opcode =
+	    DAC960_V2_GetControllerInfo;
+	  /*
+	   * How does this NOT race with the queued Monitoring
+	   * usage of this structure?
+	   */
+	  CommandMailbox->ControllerInfo.DataTransferMemoryAddress
+					.ScatterGatherSegments[0]
+					.SegmentDataPointer =
+	    Controller->V2.NewControllerInformationDMA;
+	  CommandMailbox->ControllerInfo.DataTransferMemoryAddress
+					.ScatterGatherSegments[0]
+					.SegmentByteCount =
+	    CommandMailbox->ControllerInfo.DataTransferSize;
+	  DAC960_ExecuteCommand(Command);
+	  while (Controller->V2.NewControllerInformation->PhysicalScanActive)
+	    {
+	      DAC960_ExecuteCommand(Command);
+	      sleep_on_timeout(&Controller->CommandWaitQueue, HZ);
+	    }
+	  DAC960_UserCritical("Discovery Completed\n", Controller);
+ 	}
+    }
+  else if (strcmp(UserCommand, "suppress-enclosure-messages") == 0)
+    Controller->SuppressEnclosureMessages = true;
+  else DAC960_UserCritical("Illegal User Command: '%s'\n",
+			   Controller, UserCommand);
+
+  spin_lock_irqsave(&Controller->queue_lock, flags);
+  DAC960_DeallocateCommand(Command);
+  spin_unlock_irqrestore(&Controller->queue_lock, flags);
+  return true;
+}
+
+static int dac960_proc_show(struct seq_file *m, void *v)
+{
+  unsigned char *StatusMessage = "OK\n";
+  int ControllerNumber;
+  for (ControllerNumber = 0;
+       ControllerNumber < DAC960_ControllerCount;
+       ControllerNumber++)
+    {
+      DAC960_Controller_T *Controller = DAC960_Controllers[ControllerNumber];
+      if (Controller == NULL) continue;
+      if (Controller->MonitoringAlertMode)
+	{
+	  StatusMessage = "ALERT\n";
+	  break;
+	}
+    }
+  seq_puts(m, StatusMessage);
+  return 0;
+}
+
+static int dac960_proc_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, dac960_proc_show, NULL);
+}
+
+static const struct file_operations dac960_proc_fops = {
+	.owner		= THIS_MODULE,
+	.open		= dac960_proc_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static int dac960_initial_status_proc_show(struct seq_file *m, void *v)
+{
+	DAC960_Controller_T *Controller = (DAC960_Controller_T *)m->private;
+	seq_printf(m, "%.*s", Controller->InitialStatusLength, Controller->CombinedStatusBuffer);
+	return 0;
+}
+
+static int dac960_initial_status_proc_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, dac960_initial_status_proc_show, PDE(inode)->data);
+}
+
+static const struct file_operations dac960_initial_status_proc_fops = {
+	.owner		= THIS_MODULE,
+	.open		= dac960_initial_status_proc_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static int dac960_current_status_proc_show(struct seq_file *m, void *v)
+{
+  DAC960_Controller_T *Controller = (DAC960_Controller_T *) m->private;
+  unsigned char *StatusMessage =
+    "No Rebuild or Consistency Check in Progress\n";
+  int ProgressMessageLength = strlen(StatusMessage);
+  if (jiffies != Controller->LastCurrentStatusTime)
+    {
+      Controller->CurrentStatusLength = 0;
+      DAC960_AnnounceDriver(Controller);
+      DAC960_ReportControllerConfiguration(Controller);
+      DAC960_ReportDeviceConfiguration(Controller);
+      if (Controller->ProgressBufferLength > 0)
+	ProgressMessageLength = Controller->ProgressBufferLength;
+      if (DAC960_CheckStatusBuffer(Controller, 2 + ProgressMessageLength))
+	{
+	  unsigned char *CurrentStatusBuffer = Controller->CurrentStatusBuffer;
+	  CurrentStatusBuffer[Controller->CurrentStatusLength++] = ' ';
+	  CurrentStatusBuffer[Controller->CurrentStatusLength++] = ' ';
+	  if (Controller->ProgressBufferLength > 0)
+	    strcpy(&CurrentStatusBuffer[Controller->CurrentStatusLength],
+		   Controller->ProgressBuffer);
+	  else
+	    strcpy(&CurrentStatusBuffer[Controller->CurrentStatusLength],
+		   StatusMessage);
+	  Controller->CurrentStatusLength += ProgressMessageLength;
+	}
+      Controller->LastCurrentStatusTime = jiffies;
+    }
+	seq_printf(m, "%.*s", Controller->CurrentStatusLength, Controller->CurrentStatusBuffer);
+	return 0;
+}
+
+static int dac960_current_status_proc_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, dac960_current_status_proc_show, PDE(inode)->data);
+}
+
+static const struct file_operations dac960_current_status_proc_fops = {
+	.owner		= THIS_MODULE,
+	.open		= dac960_current_status_proc_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static int dac960_user_command_proc_show(struct seq_file *m, void *v)
+{
+	DAC960_Controller_T *Controller = (DAC960_Controller_T *)m->private;
+
+	seq_printf(m, "%.*s", Controller->UserStatusLength, Controller->UserStatusBuffer);
+	return 0;
+}
+
+static int dac960_user_command_proc_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, dac960_user_command_proc_show, PDE(inode)->data);
+}
+
+static ssize_t dac960_user_command_proc_write(struct file *file,
+				       const char __user *Buffer,
+				       size_t Count, loff_t *pos)
+{
+  DAC960_Controller_T *Controller = (DAC960_Controller_T *) PDE(file->f_path.dentry->d_inode)->data;
+  unsigned char CommandBuffer[80];
+  int Length;
+  if (Count > sizeof(CommandBuffer)-1) return -EINVAL;
+  if (copy_from_user(CommandBuffer, Buffer, Count)) return -EFAULT;
+  CommandBuffer[Count] = '\0';
+  Length = strlen(CommandBuffer);
+  if (Length > 0 && CommandBuffer[Length-1] == '\n')
+    CommandBuffer[--Length] = '\0';
+  if (Controller->FirmwareType == DAC960_V1_Controller)
+    return (DAC960_V1_ExecuteUserCommand(Controller, CommandBuffer)
+	    ? Count : -EBUSY);
+  else
+    return (DAC960_V2_ExecuteUserCommand(Controller, CommandBuffer)
+	    ? Count : -EBUSY);
+}
+
+static const struct file_operations dac960_user_command_proc_fops = {
+	.owner		= THIS_MODULE,
+	.open		= dac960_user_command_proc_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+	.write		= dac960_user_command_proc_write,
+};
+
+/*
+  DAC960_CreateProcEntries creates the /proc/rd/... entries for the
+  DAC960 Driver.
+*/
+
+static void DAC960_CreateProcEntries(DAC960_Controller_T *Controller)
+{
+	struct proc_dir_entry *StatusProcEntry;
+	struct proc_dir_entry *ControllerProcEntry;
+	struct proc_dir_entry *UserCommandProcEntry;
+
+	if (DAC960_ProcDirectoryEntry == NULL) {
+  		DAC960_ProcDirectoryEntry = proc_mkdir("rd", NULL);
+  		StatusProcEntry = proc_create("status", 0,
+					   DAC960_ProcDirectoryEntry,
+					   &dac960_proc_fops);
+	}
+
+      sprintf(Controller->ControllerName, "c%d", Controller->ControllerNumber);
+      ControllerProcEntry = proc_mkdir(Controller->ControllerName,
+				       DAC960_ProcDirectoryEntry);
+      proc_create_data("initial_status", 0, ControllerProcEntry, &dac960_initial_status_proc_fops, Controller);
+      proc_create_data("current_status", 0, ControllerProcEntry, &dac960_current_status_proc_fops, Controller);
+      UserCommandProcEntry = proc_create_data("user_command", S_IWUSR | S_IRUSR, ControllerProcEntry, &dac960_user_command_proc_fops, Controller);
+      Controller->ControllerProcEntry = ControllerProcEntry;
+}
+
+
+/*
+  DAC960_DestroyProcEntries destroys the /proc/rd/... entries for the
+  DAC960 Driver.
+*/
+
+static void DAC960_DestroyProcEntries(DAC960_Controller_T *Controller)
+{
+      if (Controller->ControllerProcEntry == NULL)
+	      return;
+      remove_proc_entry("initial_status", Controller->ControllerProcEntry);
+      remove_proc_entry("current_status", Controller->ControllerProcEntry);
+      remove_proc_entry("user_command", Controller->ControllerProcEntry);
+      remove_proc_entry(Controller->ControllerName, DAC960_ProcDirectoryEntry);
+      Controller->ControllerProcEntry = NULL;
+}
+
+#ifdef DAC960_GAM_MINOR
+
+/*
+ * DAC960_gam_ioctl is the ioctl function for performing RAID operations.
+*/
+
+static long DAC960_gam_ioctl(struct file *file, unsigned int Request,
+						unsigned long Argument)
+{
+  long ErrorCode = 0;
+  if (!capable(CAP_SYS_ADMIN)) return -EACCES;
+
+  mutex_lock(&DAC960_mutex);
+  switch (Request)
+    {
+    case DAC960_IOCTL_GET_CONTROLLER_COUNT:
+      ErrorCode = DAC960_ControllerCount;
+      break;
+    case DAC960_IOCTL_GET_CONTROLLER_INFO:
+      {
+	DAC960_ControllerInfo_T __user *UserSpaceControllerInfo =
+	  (DAC960_ControllerInfo_T __user *) Argument;
+	DAC960_ControllerInfo_T ControllerInfo;
+	DAC960_Controller_T *Controller;
+	int ControllerNumber;
+	if (UserSpaceControllerInfo == NULL)
+		ErrorCode = -EINVAL;
+	else ErrorCode = get_user(ControllerNumber,
+			     &UserSpaceControllerInfo->ControllerNumber);
+	if (ErrorCode != 0)
+		break;
+	ErrorCode = -ENXIO;
+	if (ControllerNumber < 0 ||
+	    ControllerNumber > DAC960_ControllerCount - 1) {
+	  break;
+	}
+	Controller = DAC960_Controllers[ControllerNumber];
+	if (Controller == NULL)
+		break;
+	memset(&ControllerInfo, 0, sizeof(DAC960_ControllerInfo_T));
+	ControllerInfo.ControllerNumber = ControllerNumber;
+	ControllerInfo.FirmwareType = Controller->FirmwareType;
+	ControllerInfo.Channels = Controller->Channels;
+	ControllerInfo.Targets = Controller->Targets;
+	ControllerInfo.PCI_Bus = Controller->Bus;
+	ControllerInfo.PCI_Device = Controller->Device;
+	ControllerInfo.PCI_Function = Controller->Function;
+	ControllerInfo.IRQ_Channel = Controller->IRQ_Channel;
+	ControllerInfo.PCI_Address = Controller->PCI_Address;
+	strcpy(ControllerInfo.ModelName, Controller->ModelName);
+	strcpy(ControllerInfo.FirmwareVersion, Controller->FirmwareVersion);
+	ErrorCode = (copy_to_user(UserSpaceControllerInfo, &ControllerInfo,
+			     sizeof(DAC960_ControllerInfo_T)) ? -EFAULT : 0);
+	break;
+      }
+    case DAC960_IOCTL_V1_EXECUTE_COMMAND:
+      {
+	DAC960_V1_UserCommand_T __user *UserSpaceUserCommand =
+	  (DAC960_V1_UserCommand_T __user *) Argument;
+	DAC960_V1_UserCommand_T UserCommand;
+	DAC960_Controller_T *Controller;
+	DAC960_Command_T *Command = NULL;
+	DAC960_V1_CommandOpcode_T CommandOpcode;
+	DAC960_V1_CommandStatus_T CommandStatus;
+	DAC960_V1_DCDB_T DCDB;
+	DAC960_V1_DCDB_T *DCDB_IOBUF = NULL;
+	dma_addr_t	DCDB_IOBUFDMA;
+	unsigned long flags;
+	int ControllerNumber, DataTransferLength;
+	unsigned char *DataTransferBuffer = NULL;
+	dma_addr_t DataTransferBufferDMA;
+	if (UserSpaceUserCommand == NULL) {
+		ErrorCode = -EINVAL;
+		break;
+	}
+	if (copy_from_user(&UserCommand, UserSpaceUserCommand,
+				   sizeof(DAC960_V1_UserCommand_T))) {
+		ErrorCode = -EFAULT;
+		break;
+	}
+	ControllerNumber = UserCommand.ControllerNumber;
+    	ErrorCode = -ENXIO;
+	if (ControllerNumber < 0 ||
+	    ControllerNumber > DAC960_ControllerCount - 1)
+	    	break;
+	Controller = DAC960_Controllers[ControllerNumber];
+	if (Controller == NULL)
+		break;
+	ErrorCode = -EINVAL;
+	if (Controller->FirmwareType != DAC960_V1_Controller)
+		break;
+	CommandOpcode = UserCommand.CommandMailbox.Common.CommandOpcode;
+	DataTransferLength = UserCommand.DataTransferLength;
+	if (CommandOpcode & 0x80)
+		break;
+	if (CommandOpcode == DAC960_V1_DCDB)
+	  {
+	    if (copy_from_user(&DCDB, UserCommand.DCDB,
+			       sizeof(DAC960_V1_DCDB_T))) {
+		ErrorCode = -EFAULT;
+		break;
+	    }
+	    if (DCDB.Channel >= DAC960_V1_MaxChannels)
+	    		break;
+	    if (!((DataTransferLength == 0 &&
+		   DCDB.Direction
+		   == DAC960_V1_DCDB_NoDataTransfer) ||
+		  (DataTransferLength > 0 &&
+		   DCDB.Direction
+		   == DAC960_V1_DCDB_DataTransferDeviceToSystem) ||
+		  (DataTransferLength < 0 &&
+		   DCDB.Direction
+		   == DAC960_V1_DCDB_DataTransferSystemToDevice)))
+		   	break;
+	    if (((DCDB.TransferLengthHigh4 << 16) | DCDB.TransferLength)
+		!= abs(DataTransferLength))
+			break;
+	    DCDB_IOBUF = pci_alloc_consistent(Controller->PCIDevice,
+			sizeof(DAC960_V1_DCDB_T), &DCDB_IOBUFDMA);
+	    if (DCDB_IOBUF == NULL) {
+	    		ErrorCode = -ENOMEM;
+			break;
+		}
+	  }
+	ErrorCode = -ENOMEM;
+	if (DataTransferLength > 0)
+	  {
+	    DataTransferBuffer = pci_alloc_consistent(Controller->PCIDevice,
+				DataTransferLength, &DataTransferBufferDMA);
+	    if (DataTransferBuffer == NULL)
+	    	break;
+	    memset(DataTransferBuffer, 0, DataTransferLength);
+	  }
+	else if (DataTransferLength < 0)
+	  {
+	    DataTransferBuffer = pci_alloc_consistent(Controller->PCIDevice,
+				-DataTransferLength, &DataTransferBufferDMA);
+	    if (DataTransferBuffer == NULL)
+	    	break;
+	    if (copy_from_user(DataTransferBuffer,
+			       UserCommand.DataTransferBuffer,
+			       -DataTransferLength)) {
+		ErrorCode = -EFAULT;
+		break;
+	    }
+	  }
+	if (CommandOpcode == DAC960_V1_DCDB)
+	  {
+	    spin_lock_irqsave(&Controller->queue_lock, flags);
+	    while ((Command = DAC960_AllocateCommand(Controller)) == NULL)
+	      DAC960_WaitForCommand(Controller);
+	    while (Controller->V1.DirectCommandActive[DCDB.Channel]
+						     [DCDB.TargetID])
+	      {
+		spin_unlock_irq(&Controller->queue_lock);
+		__wait_event(Controller->CommandWaitQueue,
+			     !Controller->V1.DirectCommandActive
+					     [DCDB.Channel][DCDB.TargetID]);
+		spin_lock_irq(&Controller->queue_lock);
+	      }
+	    Controller->V1.DirectCommandActive[DCDB.Channel]
+					      [DCDB.TargetID] = true;
+	    spin_unlock_irqrestore(&Controller->queue_lock, flags);
+	    DAC960_V1_ClearCommand(Command);
+	    Command->CommandType = DAC960_ImmediateCommand;
+	    memcpy(&Command->V1.CommandMailbox, &UserCommand.CommandMailbox,
+		   sizeof(DAC960_V1_CommandMailbox_T));
+	    Command->V1.CommandMailbox.Type3.BusAddress = DCDB_IOBUFDMA;
+	    DCDB.BusAddress = DataTransferBufferDMA;
+	    memcpy(DCDB_IOBUF, &DCDB, sizeof(DAC960_V1_DCDB_T));
+	  }
+	else
+	  {
+	    spin_lock_irqsave(&Controller->queue_lock, flags);
+	    while ((Command = DAC960_AllocateCommand(Controller)) == NULL)
+	      DAC960_WaitForCommand(Controller);
+	    spin_unlock_irqrestore(&Controller->queue_lock, flags);
+	    DAC960_V1_ClearCommand(Command);
+	    Command->CommandType = DAC960_ImmediateCommand;
+	    memcpy(&Command->V1.CommandMailbox, &UserCommand.CommandMailbox,
+		   sizeof(DAC960_V1_CommandMailbox_T));
+	    if (DataTransferBuffer != NULL)
+	      Command->V1.CommandMailbox.Type3.BusAddress =
+		DataTransferBufferDMA;
+	  }
+	DAC960_ExecuteCommand(Command);
+	CommandStatus = Command->V1.CommandStatus;
+	spin_lock_irqsave(&Controller->queue_lock, flags);
+	DAC960_DeallocateCommand(Command);
+	spin_unlock_irqrestore(&Controller->queue_lock, flags);
+	if (DataTransferLength > 0)
+	  {
+	    if (copy_to_user(UserCommand.DataTransferBuffer,
+			     DataTransferBuffer, DataTransferLength)) {
+		ErrorCode = -EFAULT;
+		goto Failure1;
+            }
+	  }
+	if (CommandOpcode == DAC960_V1_DCDB)
+	  {
+	    /*
+	      I don't believe Target or Channel in the DCDB_IOBUF
+	      should be any different from the contents of DCDB.
+	     */
+	    Controller->V1.DirectCommandActive[DCDB.Channel]
+					      [DCDB.TargetID] = false;
+	    if (copy_to_user(UserCommand.DCDB, DCDB_IOBUF,
+			     sizeof(DAC960_V1_DCDB_T))) {
+		ErrorCode = -EFAULT;
+		goto Failure1;
+	    }
+	  }
+	ErrorCode = CommandStatus;
+      Failure1:
+	if (DataTransferBuffer != NULL)
+	  pci_free_consistent(Controller->PCIDevice, abs(DataTransferLength),
+			DataTransferBuffer, DataTransferBufferDMA);
+	if (DCDB_IOBUF != NULL)
+	  pci_free_consistent(Controller->PCIDevice, sizeof(DAC960_V1_DCDB_T),
+			DCDB_IOBUF, DCDB_IOBUFDMA);
+      	break;
+      }
+    case DAC960_IOCTL_V2_EXECUTE_COMMAND:
+      {
+	DAC960_V2_UserCommand_T __user *UserSpaceUserCommand =
+	  (DAC960_V2_UserCommand_T __user *) Argument;
+	DAC960_V2_UserCommand_T UserCommand;
+	DAC960_Controller_T *Controller;
+	DAC960_Command_T *Command = NULL;
+	DAC960_V2_CommandMailbox_T *CommandMailbox;
+	DAC960_V2_CommandStatus_T CommandStatus;
+	unsigned long flags;
+	int ControllerNumber, DataTransferLength;
+	int DataTransferResidue, RequestSenseLength;
+	unsigned char *DataTransferBuffer = NULL;
+	dma_addr_t DataTransferBufferDMA;
+	unsigned char *RequestSenseBuffer = NULL;
+	dma_addr_t RequestSenseBufferDMA;
+
+	ErrorCode = -EINVAL;
+	if (UserSpaceUserCommand == NULL)
+		break;
+	if (copy_from_user(&UserCommand, UserSpaceUserCommand,
+			   sizeof(DAC960_V2_UserCommand_T))) {
+		ErrorCode = -EFAULT;
+		break;
+	}
+	ErrorCode = -ENXIO;
+	ControllerNumber = UserCommand.ControllerNumber;
+	if (ControllerNumber < 0 ||
+	    ControllerNumber > DAC960_ControllerCount - 1)
+	    	break;
+	Controller = DAC960_Controllers[ControllerNumber];
+	if (Controller == NULL)
+		break;
+	if (Controller->FirmwareType != DAC960_V2_Controller){
+		ErrorCode = -EINVAL;
+		break;
+	}
+	DataTransferLength = UserCommand.DataTransferLength;
+    	ErrorCode = -ENOMEM;
+	if (DataTransferLength > 0)
+	  {
+	    DataTransferBuffer = pci_alloc_consistent(Controller->PCIDevice,
+				DataTransferLength, &DataTransferBufferDMA);
+	    if (DataTransferBuffer == NULL)
+	    	break;
+	    memset(DataTransferBuffer, 0, DataTransferLength);
+	  }
+	else if (DataTransferLength < 0)
+	  {
+	    DataTransferBuffer = pci_alloc_consistent(Controller->PCIDevice,
+				-DataTransferLength, &DataTransferBufferDMA);
+	    if (DataTransferBuffer == NULL)
+	    	break;
+	    if (copy_from_user(DataTransferBuffer,
+			       UserCommand.DataTransferBuffer,
+			       -DataTransferLength)) {
+		ErrorCode = -EFAULT;
+		goto Failure2;
+	    }
+	  }
+	RequestSenseLength = UserCommand.RequestSenseLength;
+	if (RequestSenseLength > 0)
+	  {
+	    RequestSenseBuffer = pci_alloc_consistent(Controller->PCIDevice,
+			RequestSenseLength, &RequestSenseBufferDMA);
+	    if (RequestSenseBuffer == NULL)
+	      {
+		ErrorCode = -ENOMEM;
+		goto Failure2;
+	      }
+	    memset(RequestSenseBuffer, 0, RequestSenseLength);
+	  }
+	spin_lock_irqsave(&Controller->queue_lock, flags);
+	while ((Command = DAC960_AllocateCommand(Controller)) == NULL)
+	  DAC960_WaitForCommand(Controller);
+	spin_unlock_irqrestore(&Controller->queue_lock, flags);
+	DAC960_V2_ClearCommand(Command);
+	Command->CommandType = DAC960_ImmediateCommand;
+	CommandMailbox = &Command->V2.CommandMailbox;
+	memcpy(CommandMailbox, &UserCommand.CommandMailbox,
+	       sizeof(DAC960_V2_CommandMailbox_T));
+	CommandMailbox->Common.CommandControlBits
+			      .AdditionalScatterGatherListMemory = false;
+	CommandMailbox->Common.CommandControlBits
+			      .NoAutoRequestSense = true;
+	CommandMailbox->Common.DataTransferSize = 0;
+	CommandMailbox->Common.DataTransferPageNumber = 0;
+	memset(&CommandMailbox->Common.DataTransferMemoryAddress, 0,
+	       sizeof(DAC960_V2_DataTransferMemoryAddress_T));
+	if (DataTransferLength != 0)
+	  {
+	    if (DataTransferLength > 0)
+	      {
+		CommandMailbox->Common.CommandControlBits
+				      .DataTransferControllerToHost = true;
+		CommandMailbox->Common.DataTransferSize = DataTransferLength;
+	      }
+	    else
+	      {
+		CommandMailbox->Common.CommandControlBits
+				      .DataTransferControllerToHost = false;
+		CommandMailbox->Common.DataTransferSize = -DataTransferLength;
+	      }
+	    CommandMailbox->Common.DataTransferMemoryAddress
+				  .ScatterGatherSegments[0]
+				  .SegmentDataPointer = DataTransferBufferDMA;
+	    CommandMailbox->Common.DataTransferMemoryAddress
+				  .ScatterGatherSegments[0]
+				  .SegmentByteCount =
+	      CommandMailbox->Common.DataTransferSize;
+	  }
+	if (RequestSenseLength > 0)
+	  {
+	    CommandMailbox->Common.CommandControlBits
+				  .NoAutoRequestSense = false;
+	    CommandMailbox->Common.RequestSenseSize = RequestSenseLength;
+	    CommandMailbox->Common.RequestSenseBusAddress =
+	      						RequestSenseBufferDMA;
+	  }
+	DAC960_ExecuteCommand(Command);
+	CommandStatus = Command->V2.CommandStatus;
+	RequestSenseLength = Command->V2.RequestSenseLength;
+	DataTransferResidue = Command->V2.DataTransferResidue;
+	spin_lock_irqsave(&Controller->queue_lock, flags);
+	DAC960_DeallocateCommand(Command);
+	spin_unlock_irqrestore(&Controller->queue_lock, flags);
+	if (RequestSenseLength > UserCommand.RequestSenseLength)
+	  RequestSenseLength = UserCommand.RequestSenseLength;
+	if (copy_to_user(&UserSpaceUserCommand->DataTransferLength,
+				 &DataTransferResidue,
+				 sizeof(DataTransferResidue))) {
+		ErrorCode = -EFAULT;
+		goto Failure2;
+	}
+	if (copy_to_user(&UserSpaceUserCommand->RequestSenseLength,
+			 &RequestSenseLength, sizeof(RequestSenseLength))) {
+		ErrorCode = -EFAULT;
+		goto Failure2;
+	}
+	if (DataTransferLength > 0)
+	  {
+	    if (copy_to_user(UserCommand.DataTransferBuffer,
+			     DataTransferBuffer, DataTransferLength)) {
+		ErrorCode = -EFAULT;
+		goto Failure2;
+	    }
+	  }
+	if (RequestSenseLength > 0)
+	  {
+	    if (copy_to_user(UserCommand.RequestSenseBuffer,
+			     RequestSenseBuffer, RequestSenseLength)) {
+		ErrorCode = -EFAULT;
+		goto Failure2;
+	    }
+	  }
+	ErrorCode = CommandStatus;
+      Failure2:
+	  pci_free_consistent(Controller->PCIDevice, abs(DataTransferLength),
+		DataTransferBuffer, DataTransferBufferDMA);
+	if (RequestSenseBuffer != NULL)
+	  pci_free_consistent(Controller->PCIDevice, RequestSenseLength,
+		RequestSenseBuffer, RequestSenseBufferDMA);
+        break;
+      }
+    case DAC960_IOCTL_V2_GET_HEALTH_STATUS:
+      {
+	DAC960_V2_GetHealthStatus_T __user *UserSpaceGetHealthStatus =
+	  (DAC960_V2_GetHealthStatus_T __user *) Argument;
+	DAC960_V2_GetHealthStatus_T GetHealthStatus;
+	DAC960_V2_HealthStatusBuffer_T HealthStatusBuffer;
+	DAC960_Controller_T *Controller;
+	int ControllerNumber;
+	if (UserSpaceGetHealthStatus == NULL) {
+		ErrorCode = -EINVAL;
+		break;
+	}
+	if (copy_from_user(&GetHealthStatus, UserSpaceGetHealthStatus,
+			   sizeof(DAC960_V2_GetHealthStatus_T))) {
+		ErrorCode = -EFAULT;
+		break;
+	}
+	ErrorCode = -ENXIO;
+	ControllerNumber = GetHealthStatus.ControllerNumber;
+	if (ControllerNumber < 0 ||
+	    ControllerNumber > DAC960_ControllerCount - 1)
+		    break;
+	Controller = DAC960_Controllers[ControllerNumber];
+	if (Controller == NULL)
+		break;
+	if (Controller->FirmwareType != DAC960_V2_Controller) {
+		ErrorCode = -EINVAL;
+		break;
+	}
+	if (copy_from_user(&HealthStatusBuffer,
+			   GetHealthStatus.HealthStatusBuffer,
+			   sizeof(DAC960_V2_HealthStatusBuffer_T))) {
+		ErrorCode = -EFAULT;
+		break;
+	}
+	while (Controller->V2.HealthStatusBuffer->StatusChangeCounter
+	       == HealthStatusBuffer.StatusChangeCounter &&
+	       Controller->V2.HealthStatusBuffer->NextEventSequenceNumber
+	       == HealthStatusBuffer.NextEventSequenceNumber)
+	  {
+	    interruptible_sleep_on_timeout(&Controller->HealthStatusWaitQueue,
+					   DAC960_MonitoringTimerInterval);
+	    if (signal_pending(current)) {
+	    	ErrorCode = -EINTR;
+	    	break;
+	    }
+	  }
+	if (copy_to_user(GetHealthStatus.HealthStatusBuffer,
+			 Controller->V2.HealthStatusBuffer,
+			 sizeof(DAC960_V2_HealthStatusBuffer_T)))
+		ErrorCode = -EFAULT;
+	else
+		ErrorCode =  0;
+      }
+      default:
+	ErrorCode = -ENOTTY;
+    }
+  mutex_unlock(&DAC960_mutex);
+  return ErrorCode;
+}
+
+static const struct file_operations DAC960_gam_fops = {
+	.owner		= THIS_MODULE,
+	.unlocked_ioctl	= DAC960_gam_ioctl,
+	.llseek		= noop_llseek,
+};
+
+static struct miscdevice DAC960_gam_dev = {
+	DAC960_GAM_MINOR,
+	"dac960_gam",
+	&DAC960_gam_fops
+};
+
+static int DAC960_gam_init(void)
+{
+	int ret;
+
+	ret = misc_register(&DAC960_gam_dev);
+	if (ret)
+		printk(KERN_ERR "DAC960_gam: can't misc_register on minor %d\n", DAC960_GAM_MINOR);
+	return ret;
+}
+
+static void DAC960_gam_cleanup(void)
+{
+	misc_deregister(&DAC960_gam_dev);
+}
+
+#endif /* DAC960_GAM_MINOR */
+
+static struct DAC960_privdata DAC960_GEM_privdata = {
+	.HardwareType =		DAC960_GEM_Controller,
+	.FirmwareType 	=	DAC960_V2_Controller,
+	.InterruptHandler =	DAC960_GEM_InterruptHandler,
+	.MemoryWindowSize =	DAC960_GEM_RegisterWindowSize,
+};
+
+
+static struct DAC960_privdata DAC960_BA_privdata = {
+	.HardwareType =		DAC960_BA_Controller,
+	.FirmwareType 	=	DAC960_V2_Controller,
+	.InterruptHandler =	DAC960_BA_InterruptHandler,
+	.MemoryWindowSize =	DAC960_BA_RegisterWindowSize,
+};
+
+static struct DAC960_privdata DAC960_LP_privdata = {
+	.HardwareType =		DAC960_LP_Controller,
+	.FirmwareType 	=	DAC960_V2_Controller,
+	.InterruptHandler =	DAC960_LP_InterruptHandler,
+	.MemoryWindowSize =	DAC960_LP_RegisterWindowSize,
+};
+
+static struct DAC960_privdata DAC960_LA_privdata = {
+	.HardwareType =		DAC960_LA_Controller,
+	.FirmwareType 	=	DAC960_V1_Controller,
+	.InterruptHandler =	DAC960_LA_InterruptHandler,
+	.MemoryWindowSize =	DAC960_LA_RegisterWindowSize,
+};
+
+static struct DAC960_privdata DAC960_PG_privdata = {
+	.HardwareType =		DAC960_PG_Controller,
+	.FirmwareType 	=	DAC960_V1_Controller,
+	.InterruptHandler =	DAC960_PG_InterruptHandler,
+	.MemoryWindowSize =	DAC960_PG_RegisterWindowSize,
+};
+
+static struct DAC960_privdata DAC960_PD_privdata = {
+	.HardwareType =		DAC960_PD_Controller,
+	.FirmwareType 	=	DAC960_V1_Controller,
+	.InterruptHandler =	DAC960_PD_InterruptHandler,
+	.MemoryWindowSize =	DAC960_PD_RegisterWindowSize,
+};
+
+static struct DAC960_privdata DAC960_P_privdata = {
+	.HardwareType =		DAC960_P_Controller,
+	.FirmwareType 	=	DAC960_V1_Controller,
+	.InterruptHandler =	DAC960_P_InterruptHandler,
+	.MemoryWindowSize =	DAC960_PD_RegisterWindowSize,
+};
+
+static const struct pci_device_id DAC960_id_table[] = {
+	{
+		.vendor 	= PCI_VENDOR_ID_MYLEX,
+		.device		= PCI_DEVICE_ID_MYLEX_DAC960_GEM,
+		.subvendor	= PCI_VENDOR_ID_MYLEX,
+		.subdevice	= PCI_ANY_ID,
+		.driver_data	= (unsigned long) &DAC960_GEM_privdata,
+	},
+	{
+		.vendor 	= PCI_VENDOR_ID_MYLEX,
+		.device		= PCI_DEVICE_ID_MYLEX_DAC960_BA,
+		.subvendor	= PCI_ANY_ID,
+		.subdevice	= PCI_ANY_ID,
+		.driver_data	= (unsigned long) &DAC960_BA_privdata,
+	},
+	{
+		.vendor 	= PCI_VENDOR_ID_MYLEX,
+		.device		= PCI_DEVICE_ID_MYLEX_DAC960_LP,
+		.subvendor	= PCI_ANY_ID,
+		.subdevice	= PCI_ANY_ID,
+		.driver_data	= (unsigned long) &DAC960_LP_privdata,
+	},
+	{
+		.vendor 	= PCI_VENDOR_ID_DEC,
+		.device		= PCI_DEVICE_ID_DEC_21285,
+		.subvendor	= PCI_VENDOR_ID_MYLEX,
+		.subdevice	= PCI_DEVICE_ID_MYLEX_DAC960_LA,
+		.driver_data	= (unsigned long) &DAC960_LA_privdata,
+	},
+	{
+		.vendor 	= PCI_VENDOR_ID_MYLEX,
+		.device		= PCI_DEVICE_ID_MYLEX_DAC960_PG,
+		.subvendor	= PCI_ANY_ID,
+		.subdevice	= PCI_ANY_ID,
+		.driver_data	= (unsigned long) &DAC960_PG_privdata,
+	},
+	{
+		.vendor 	= PCI_VENDOR_ID_MYLEX,
+		.device		= PCI_DEVICE_ID_MYLEX_DAC960_PD,
+		.subvendor	= PCI_ANY_ID,
+		.subdevice	= PCI_ANY_ID,
+		.driver_data	= (unsigned long) &DAC960_PD_privdata,
+	},
+	{
+		.vendor 	= PCI_VENDOR_ID_MYLEX,
+		.device		= PCI_DEVICE_ID_MYLEX_DAC960_P,
+		.subvendor	= PCI_ANY_ID,
+		.subdevice	= PCI_ANY_ID,
+		.driver_data	= (unsigned long) &DAC960_P_privdata,
+	},
+	{0, },
+};
+
+MODULE_DEVICE_TABLE(pci, DAC960_id_table);
+
+static struct pci_driver DAC960_pci_driver = {
+	.name		= "DAC960",
+	.id_table	= DAC960_id_table,
+	.probe		= DAC960_Probe,
+	.remove		= DAC960_Remove,
+};
+
+static int __init DAC960_init_module(void)
+{
+	int ret;
+
+	ret =  pci_register_driver(&DAC960_pci_driver);
+#ifdef DAC960_GAM_MINOR
+	if (!ret)
+		DAC960_gam_init();
+#endif
+	return ret;
+}
+
+static void __exit DAC960_cleanup_module(void)
+{
+	int i;
+
+#ifdef DAC960_GAM_MINOR
+	DAC960_gam_cleanup();
+#endif
+
+	for (i = 0; i < DAC960_ControllerCount; i++) {
+		DAC960_Controller_T *Controller = DAC960_Controllers[i];
+		if (Controller == NULL)
+			continue;
+		DAC960_FinalizeController(Controller);
+	}
+	if (DAC960_ProcDirectoryEntry != NULL) {
+  		remove_proc_entry("rd/status", NULL);
+  		remove_proc_entry("rd", NULL);
+	}
+	DAC960_ControllerCount = 0;
+	pci_unregister_driver(&DAC960_pci_driver);
+}
+
+module_init(DAC960_init_module);
+module_exit(DAC960_cleanup_module);
+
+MODULE_LICENSE("GPL");
diff --git a/drivers/block/DAC960.h b/drivers/block/DAC960.h
new file mode 100644
index 00000000..85fa9bb6
--- /dev/null
+++ b/drivers/block/DAC960.h
@@ -0,0 +1,4415 @@
+/*
+
+  Linux Driver for Mylex DAC960/AcceleRAID/eXtremeRAID PCI RAID Controllers
+
+  Copyright 1998-2001 by Leonard N. Zubkoff <lnz@dandelion.com>
+
+  This program is free software; you may redistribute and/or modify it under
+  the terms of the GNU General Public License Version 2 as published by the
+  Free Software Foundation.
+
+  This program is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY, without even the implied warranty of MERCHANTABILITY
+  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+  for complete details.
+
+  The author respectfully requests that any modifications to this software be
+  sent directly to him for evaluation and testing.
+
+*/
+
+
+/*
+  Define the maximum number of DAC960 Controllers supported by this driver.
+*/
+
+#define DAC960_MaxControllers			8
+
+
+/*
+  Define the maximum number of Controller Channels supported by DAC960
+  V1 and V2 Firmware Controllers.
+*/
+
+#define DAC960_V1_MaxChannels			3
+#define DAC960_V2_MaxChannels			4
+
+
+/*
+  Define the maximum number of Targets per Channel supported by DAC960
+  V1 and V2 Firmware Controllers.
+*/
+
+#define DAC960_V1_MaxTargets			16
+#define DAC960_V2_MaxTargets			128
+
+
+/*
+  Define the maximum number of Logical Drives supported by DAC960
+  V1 and V2 Firmware Controllers.
+*/
+
+#define DAC960_MaxLogicalDrives			32
+
+
+/*
+  Define the maximum number of Physical Devices supported by DAC960
+  V1 and V2 Firmware Controllers.
+*/
+
+#define DAC960_V1_MaxPhysicalDevices		45
+#define DAC960_V2_MaxPhysicalDevices		272
+
+/*
+  Define a 32/64 bit I/O Address data type.
+*/
+
+typedef unsigned long DAC960_IO_Address_T;
+
+
+/*
+  Define a 32/64 bit PCI Bus Address data type.
+*/
+
+typedef unsigned long DAC960_PCI_Address_T;
+
+
+/*
+  Define a 32 bit Bus Address data type.
+*/
+
+typedef unsigned int DAC960_BusAddress32_T;
+
+
+/*
+  Define a 64 bit Bus Address data type.
+*/
+
+typedef unsigned long long DAC960_BusAddress64_T;
+
+
+/*
+  Define a 32 bit Byte Count data type.
+*/
+
+typedef unsigned int DAC960_ByteCount32_T;
+
+
+/*
+  Define a 64 bit Byte Count data type.
+*/
+
+typedef unsigned long long DAC960_ByteCount64_T;
+
+
+/*
+  dma_loaf is used by helper routines to divide a region of
+  dma mapped memory into smaller pieces, where those pieces
+  are not of uniform size.
+ */
+
+struct dma_loaf {
+	void	*cpu_base;
+	dma_addr_t dma_base;
+	size_t  length;
+	void	*cpu_free;
+	dma_addr_t dma_free;
+};
+
+/*
+  Define the SCSI INQUIRY Standard Data structure.
+*/
+
+typedef struct DAC960_SCSI_Inquiry
+{
+  unsigned char PeripheralDeviceType:5;			/* Byte 0 Bits 0-4 */
+  unsigned char PeripheralQualifier:3;			/* Byte 0 Bits 5-7 */
+  unsigned char DeviceTypeModifier:7;			/* Byte 1 Bits 0-6 */
+  bool RMB:1;						/* Byte 1 Bit 7 */
+  unsigned char ANSI_ApprovedVersion:3;			/* Byte 2 Bits 0-2 */
+  unsigned char ECMA_Version:3;				/* Byte 2 Bits 3-5 */
+  unsigned char ISO_Version:2;				/* Byte 2 Bits 6-7 */
+  unsigned char ResponseDataFormat:4;			/* Byte 3 Bits 0-3 */
+  unsigned char :2;					/* Byte 3 Bits 4-5 */
+  bool TrmIOP:1;					/* Byte 3 Bit 6 */
+  bool AENC:1;						/* Byte 3 Bit 7 */
+  unsigned char AdditionalLength;			/* Byte 4 */
+  unsigned char :8;					/* Byte 5 */
+  unsigned char :8;					/* Byte 6 */
+  bool SftRe:1;						/* Byte 7 Bit 0 */
+  bool CmdQue:1;					/* Byte 7 Bit 1 */
+  bool :1;						/* Byte 7 Bit 2 */
+  bool Linked:1;					/* Byte 7 Bit 3 */
+  bool Sync:1;						/* Byte 7 Bit 4 */
+  bool WBus16:1;					/* Byte 7 Bit 5 */
+  bool WBus32:1;					/* Byte 7 Bit 6 */
+  bool RelAdr:1;					/* Byte 7 Bit 7 */
+  unsigned char VendorIdentification[8];		/* Bytes 8-15 */
+  unsigned char ProductIdentification[16];		/* Bytes 16-31 */
+  unsigned char ProductRevisionLevel[4];		/* Bytes 32-35 */
+}
+DAC960_SCSI_Inquiry_T;
+
+
+/*
+  Define the SCSI INQUIRY Unit Serial Number structure.
+*/
+
+typedef struct DAC960_SCSI_Inquiry_UnitSerialNumber
+{
+  unsigned char PeripheralDeviceType:5;			/* Byte 0 Bits 0-4 */
+  unsigned char PeripheralQualifier:3;			/* Byte 0 Bits 5-7 */
+  unsigned char PageCode;				/* Byte 1 */
+  unsigned char :8;					/* Byte 2 */
+  unsigned char PageLength;				/* Byte 3 */
+  unsigned char ProductSerialNumber[28];		/* Bytes 4-31 */
+}
+DAC960_SCSI_Inquiry_UnitSerialNumber_T;
+
+
+/*
+  Define the SCSI REQUEST SENSE Sense Key type.
+*/
+
+typedef enum
+{
+  DAC960_SenseKey_NoSense =			0x0,
+  DAC960_SenseKey_RecoveredError =		0x1,
+  DAC960_SenseKey_NotReady =			0x2,
+  DAC960_SenseKey_MediumError =			0x3,
+  DAC960_SenseKey_HardwareError =		0x4,
+  DAC960_SenseKey_IllegalRequest =		0x5,
+  DAC960_SenseKey_UnitAttention =		0x6,
+  DAC960_SenseKey_DataProtect =			0x7,
+  DAC960_SenseKey_BlankCheck =			0x8,
+  DAC960_SenseKey_VendorSpecific =		0x9,
+  DAC960_SenseKey_CopyAborted =			0xA,
+  DAC960_SenseKey_AbortedCommand =		0xB,
+  DAC960_SenseKey_Equal =			0xC,
+  DAC960_SenseKey_VolumeOverflow =		0xD,
+  DAC960_SenseKey_Miscompare =			0xE,
+  DAC960_SenseKey_Reserved =			0xF
+}
+__attribute__ ((packed))
+DAC960_SCSI_RequestSenseKey_T;
+
+
+/*
+  Define the SCSI REQUEST SENSE structure.
+*/
+
+typedef struct DAC960_SCSI_RequestSense
+{
+  unsigned char ErrorCode:7;				/* Byte 0 Bits 0-6 */
+  bool Valid:1;						/* Byte 0 Bit 7 */
+  unsigned char SegmentNumber;				/* Byte 1 */
+  DAC960_SCSI_RequestSenseKey_T SenseKey:4;		/* Byte 2 Bits 0-3 */
+  unsigned char :1;					/* Byte 2 Bit 4 */
+  bool ILI:1;						/* Byte 2 Bit 5 */
+  bool EOM:1;						/* Byte 2 Bit 6 */
+  bool Filemark:1;					/* Byte 2 Bit 7 */
+  unsigned char Information[4];				/* Bytes 3-6 */
+  unsigned char AdditionalSenseLength;			/* Byte 7 */
+  unsigned char CommandSpecificInformation[4];		/* Bytes 8-11 */
+  unsigned char AdditionalSenseCode;			/* Byte 12 */
+  unsigned char AdditionalSenseCodeQualifier;		/* Byte 13 */
+}
+DAC960_SCSI_RequestSense_T;
+
+
+/*
+  Define the DAC960 V1 Firmware Command Opcodes.
+*/
+
+typedef enum
+{
+  /* I/O Commands */
+  DAC960_V1_ReadExtended =			0x33,
+  DAC960_V1_WriteExtended =			0x34,
+  DAC960_V1_ReadAheadExtended =			0x35,
+  DAC960_V1_ReadExtendedWithScatterGather =	0xB3,
+  DAC960_V1_WriteExtendedWithScatterGather =	0xB4,
+  DAC960_V1_Read =				0x36,
+  DAC960_V1_ReadWithScatterGather =		0xB6,
+  DAC960_V1_Write =				0x37,
+  DAC960_V1_WriteWithScatterGather =		0xB7,
+  DAC960_V1_DCDB =				0x04,
+  DAC960_V1_DCDBWithScatterGather =		0x84,
+  DAC960_V1_Flush =				0x0A,
+  /* Controller Status Related Commands */
+  DAC960_V1_Enquiry =				0x53,
+  DAC960_V1_Enquiry2 =				0x1C,
+  DAC960_V1_GetLogicalDriveElement =		0x55,
+  DAC960_V1_GetLogicalDriveInformation =	0x19,
+  DAC960_V1_IOPortRead =			0x39,
+  DAC960_V1_IOPortWrite =			0x3A,
+  DAC960_V1_GetSDStats =			0x3E,
+  DAC960_V1_GetPDStats =			0x3F,
+  DAC960_V1_PerformEventLogOperation =		0x72,
+  /* Device Related Commands */
+  DAC960_V1_StartDevice =			0x10,
+  DAC960_V1_GetDeviceState =			0x50,
+  DAC960_V1_StopChannel =			0x13,
+  DAC960_V1_StartChannel =			0x12,
+  DAC960_V1_ResetChannel =			0x1A,
+  /* Commands Associated with Data Consistency and Errors */
+  DAC960_V1_Rebuild =				0x09,
+  DAC960_V1_RebuildAsync =			0x16,
+  DAC960_V1_CheckConsistency =			0x0F,
+  DAC960_V1_CheckConsistencyAsync =		0x1E,
+  DAC960_V1_RebuildStat =			0x0C,
+  DAC960_V1_GetRebuildProgress =		0x27,
+  DAC960_V1_RebuildControl =			0x1F,
+  DAC960_V1_ReadBadBlockTable =			0x0B,
+  DAC960_V1_ReadBadDataTable =			0x25,
+  DAC960_V1_ClearBadDataTable =			0x26,
+  DAC960_V1_GetErrorTable =			0x17,
+  DAC960_V1_AddCapacityAsync =			0x2A,
+  DAC960_V1_BackgroundInitializationControl =	0x2B,
+  /* Configuration Related Commands */
+  DAC960_V1_ReadConfig2 =			0x3D,
+  DAC960_V1_WriteConfig2 =			0x3C,
+  DAC960_V1_ReadConfigurationOnDisk =		0x4A,
+  DAC960_V1_WriteConfigurationOnDisk =		0x4B,
+  DAC960_V1_ReadConfiguration =			0x4E,
+  DAC960_V1_ReadBackupConfiguration =		0x4D,
+  DAC960_V1_WriteConfiguration =		0x4F,
+  DAC960_V1_AddConfiguration =			0x4C,
+  DAC960_V1_ReadConfigurationLabel =		0x48,
+  DAC960_V1_WriteConfigurationLabel =		0x49,
+  /* Firmware Upgrade Related Commands */
+  DAC960_V1_LoadImage =				0x20,
+  DAC960_V1_StoreImage =			0x21,
+  DAC960_V1_ProgramImage =			0x22,
+  /* Diagnostic Commands */
+  DAC960_V1_SetDiagnosticMode =			0x31,
+  DAC960_V1_RunDiagnostic =			0x32,
+  /* Subsystem Service Commands */
+  DAC960_V1_GetSubsystemData =			0x70,
+  DAC960_V1_SetSubsystemParameters =		0x71,
+  /* Version 2.xx Firmware Commands */
+  DAC960_V1_Enquiry_Old =			0x05,
+  DAC960_V1_GetDeviceState_Old =		0x14,
+  DAC960_V1_Read_Old =				0x02,
+  DAC960_V1_Write_Old =				0x03,
+  DAC960_V1_ReadWithScatterGather_Old =		0x82,
+  DAC960_V1_WriteWithScatterGather_Old =	0x83
+}
+__attribute__ ((packed))
+DAC960_V1_CommandOpcode_T;
+
+
+/*
+  Define the DAC960 V1 Firmware Command Identifier type.
+*/
+
+typedef unsigned char DAC960_V1_CommandIdentifier_T;
+
+
+/*
+  Define the DAC960 V1 Firmware Command Status Codes.
+*/
+
+#define DAC960_V1_NormalCompletion		0x0000	/* Common */
+#define DAC960_V1_CheckConditionReceived	0x0002	/* Common */
+#define DAC960_V1_NoDeviceAtAddress		0x0102	/* Common */
+#define DAC960_V1_InvalidDeviceAddress		0x0105	/* Common */
+#define DAC960_V1_InvalidParameter		0x0105	/* Common */
+#define DAC960_V1_IrrecoverableDataError	0x0001	/* I/O */
+#define DAC960_V1_LogicalDriveNonexistentOrOffline 0x0002 /* I/O */
+#define DAC960_V1_AccessBeyondEndOfLogicalDrive	0x0105	/* I/O */
+#define DAC960_V1_BadDataEncountered		0x010C	/* I/O */
+#define DAC960_V1_DeviceBusy			0x0008	/* DCDB */
+#define DAC960_V1_DeviceNonresponsive		0x000E	/* DCDB */
+#define DAC960_V1_CommandTerminatedAbnormally	0x000F	/* DCDB */
+#define DAC960_V1_UnableToStartDevice		0x0002	/* Device */
+#define DAC960_V1_InvalidChannelOrTargetOrModifier 0x0105 /* Device */
+#define DAC960_V1_ChannelBusy			0x0106	/* Device */
+#define DAC960_V1_ChannelNotStopped		0x0002	/* Device */
+#define DAC960_V1_AttemptToRebuildOnlineDrive	0x0002	/* Consistency */
+#define DAC960_V1_RebuildBadBlocksEncountered	0x0003	/* Consistency */
+#define DAC960_V1_NewDiskFailedDuringRebuild	0x0004	/* Consistency */
+#define DAC960_V1_RebuildOrCheckAlreadyInProgress 0x0106 /* Consistency */
+#define DAC960_V1_DependentDiskIsDead		0x0002	/* Consistency */
+#define DAC960_V1_InconsistentBlocksFound	0x0003	/* Consistency */
+#define DAC960_V1_InvalidOrNonredundantLogicalDrive 0x0105 /* Consistency */
+#define DAC960_V1_NoRebuildOrCheckInProgress	0x0105	/* Consistency */
+#define DAC960_V1_RebuildInProgress_DataValid	0x0000	/* Consistency */
+#define DAC960_V1_RebuildFailed_LogicalDriveFailure 0x0002 /* Consistency */
+#define DAC960_V1_RebuildFailed_BadBlocksOnOther 0x0003	/* Consistency */
+#define DAC960_V1_RebuildFailed_NewDriveFailed	0x0004	/* Consistency */
+#define DAC960_V1_RebuildSuccessful		0x0100	/* Consistency */
+#define DAC960_V1_RebuildSuccessfullyTerminated	0x0107	/* Consistency */
+#define DAC960_V1_BackgroundInitSuccessful	0x0100	/* Consistency */
+#define DAC960_V1_BackgroundInitAborted		0x0005	/* Consistency */
+#define DAC960_V1_NoBackgroundInitInProgress	0x0105	/* Consistency */
+#define DAC960_V1_AddCapacityInProgress		0x0004	/* Consistency */
+#define DAC960_V1_AddCapacityFailedOrSuspended	0x00F4	/* Consistency */
+#define DAC960_V1_Config2ChecksumError		0x0002	/* Configuration */
+#define DAC960_V1_ConfigurationSuspended	0x0106	/* Configuration */
+#define DAC960_V1_FailedToConfigureNVRAM	0x0105	/* Configuration */
+#define DAC960_V1_ConfigurationNotSavedStateChange 0x0106 /* Configuration */
+#define DAC960_V1_SubsystemNotInstalled		0x0001	/* Subsystem */
+#define DAC960_V1_SubsystemFailed		0x0002	/* Subsystem */
+#define DAC960_V1_SubsystemBusy			0x0106	/* Subsystem */
+
+typedef unsigned short DAC960_V1_CommandStatus_T;
+
+
+/*
+  Define the DAC960 V1 Firmware Enquiry Command reply structure.
+*/
+
+typedef struct DAC960_V1_Enquiry
+{
+  unsigned char NumberOfLogicalDrives;			/* Byte 0 */
+  unsigned int :24;					/* Bytes 1-3 */
+  unsigned int LogicalDriveSizes[32];			/* Bytes 4-131 */
+  unsigned short FlashAge;				/* Bytes 132-133 */
+  struct {
+    bool DeferredWriteError:1;				/* Byte 134 Bit 0 */
+    bool BatteryLow:1;					/* Byte 134 Bit 1 */
+    unsigned char :6;					/* Byte 134 Bits 2-7 */
+  } StatusFlags;
+  unsigned char :8;					/* Byte 135 */
+  unsigned char MinorFirmwareVersion;			/* Byte 136 */
+  unsigned char MajorFirmwareVersion;			/* Byte 137 */
+  enum {
+    DAC960_V1_NoStandbyRebuildOrCheckInProgress =		    0x00,
+    DAC960_V1_StandbyRebuildInProgress =			    0x01,
+    DAC960_V1_BackgroundRebuildInProgress =			    0x02,
+    DAC960_V1_BackgroundCheckInProgress =			    0x03,
+    DAC960_V1_StandbyRebuildCompletedWithError =		    0xFF,
+    DAC960_V1_BackgroundRebuildOrCheckFailed_DriveFailed =	    0xF0,
+    DAC960_V1_BackgroundRebuildOrCheckFailed_LogicalDriveFailed =   0xF1,
+    DAC960_V1_BackgroundRebuildOrCheckFailed_OtherCauses =	    0xF2,
+    DAC960_V1_BackgroundRebuildOrCheckSuccessfullyTerminated =	    0xF3
+  } __attribute__ ((packed)) RebuildFlag;		/* Byte 138 */
+  unsigned char MaxCommands;				/* Byte 139 */
+  unsigned char OfflineLogicalDriveCount;		/* Byte 140 */
+  unsigned char :8;					/* Byte 141 */
+  unsigned short EventLogSequenceNumber;		/* Bytes 142-143 */
+  unsigned char CriticalLogicalDriveCount;		/* Byte 144 */
+  unsigned int :24;					/* Bytes 145-147 */
+  unsigned char DeadDriveCount;				/* Byte 148 */
+  unsigned char :8;					/* Byte 149 */
+  unsigned char RebuildCount;				/* Byte 150 */
+  struct {
+    unsigned char :3;					/* Byte 151 Bits 0-2 */
+    bool BatteryBackupUnitPresent:1;			/* Byte 151 Bit 3 */
+    unsigned char :3;					/* Byte 151 Bits 4-6 */
+    unsigned char :1;					/* Byte 151 Bit 7 */
+  } MiscFlags;
+  struct {
+    unsigned char TargetID;
+    unsigned char Channel;
+  } DeadDrives[21];					/* Bytes 152-194 */
+  unsigned char Reserved[62];				/* Bytes 195-255 */
+}
+__attribute__ ((packed))
+DAC960_V1_Enquiry_T;
+
+
+/*
+  Define the DAC960 V1 Firmware Enquiry2 Command reply structure.
+*/
+
+typedef struct DAC960_V1_Enquiry2
+{
+  struct {
+    enum {
+      DAC960_V1_P_PD_PU =			0x01,
+      DAC960_V1_PL =				0x02,
+      DAC960_V1_PG =				0x10,
+      DAC960_V1_PJ =				0x11,
+      DAC960_V1_PR =				0x12,
+      DAC960_V1_PT =				0x13,
+      DAC960_V1_PTL0 =				0x14,
+      DAC960_V1_PRL =				0x15,
+      DAC960_V1_PTL1 =				0x16,
+      DAC960_V1_1164P =				0x20
+    } __attribute__ ((packed)) SubModel;		/* Byte 0 */
+    unsigned char ActualChannels;			/* Byte 1 */
+    enum {
+      DAC960_V1_FiveChannelBoard =		0x01,
+      DAC960_V1_ThreeChannelBoard =		0x02,
+      DAC960_V1_TwoChannelBoard =		0x03,
+      DAC960_V1_ThreeChannelASIC_DAC =		0x04
+    } __attribute__ ((packed)) Model;			/* Byte 2 */
+    enum {
+      DAC960_V1_EISA_Controller =		0x01,
+      DAC960_V1_MicroChannel_Controller =	0x02,
+      DAC960_V1_PCI_Controller =		0x03,
+      DAC960_V1_SCSItoSCSI_Controller =		0x08
+    } __attribute__ ((packed)) ProductFamily;		/* Byte 3 */
+  } HardwareID;						/* Bytes 0-3 */
+  /* MajorVersion.MinorVersion-FirmwareType-TurnID */
+  struct {
+    unsigned char MajorVersion;				/* Byte 4 */
+    unsigned char MinorVersion;				/* Byte 5 */
+    unsigned char TurnID;				/* Byte 6 */
+    char FirmwareType;					/* Byte 7 */
+  } FirmwareID;						/* Bytes 4-7 */
+  unsigned char :8;					/* Byte 8 */
+  unsigned int :24;					/* Bytes 9-11 */
+  unsigned char ConfiguredChannels;			/* Byte 12 */
+  unsigned char ActualChannels;				/* Byte 13 */
+  unsigned char MaxTargets;				/* Byte 14 */
+  unsigned char MaxTags;				/* Byte 15 */
+  unsigned char MaxLogicalDrives;			/* Byte 16 */
+  unsigned char MaxArms;				/* Byte 17 */
+  unsigned char MaxSpans;				/* Byte 18 */
+  unsigned char :8;					/* Byte 19 */
+  unsigned int :32;					/* Bytes 20-23 */
+  unsigned int MemorySize;				/* Bytes 24-27 */
+  unsigned int CacheSize;				/* Bytes 28-31 */
+  unsigned int FlashMemorySize;				/* Bytes 32-35 */
+  unsigned int NonVolatileMemorySize;			/* Bytes 36-39 */
+  struct {
+    enum {
+      DAC960_V1_RamType_DRAM =			0x0,
+      DAC960_V1_RamType_EDO =			0x1,
+      DAC960_V1_RamType_SDRAM =			0x2,
+      DAC960_V1_RamType_Last =			0x7
+    } __attribute__ ((packed)) RamType:3;		/* Byte 40 Bits 0-2 */
+    enum {
+      DAC960_V1_ErrorCorrection_None =		0x0,
+      DAC960_V1_ErrorCorrection_Parity =	0x1,
+      DAC960_V1_ErrorCorrection_ECC =		0x2,
+      DAC960_V1_ErrorCorrection_Last =		0x7
+    } __attribute__ ((packed)) ErrorCorrection:3;	/* Byte 40 Bits 3-5 */
+    bool FastPageMode:1;				/* Byte 40 Bit 6 */
+    bool LowPowerMemory:1;				/* Byte 40 Bit 7 */
+    unsigned char :8;					/* Bytes 41 */
+  } MemoryType;
+  unsigned short ClockSpeed;				/* Bytes 42-43 */
+  unsigned short MemorySpeed;				/* Bytes 44-45 */
+  unsigned short HardwareSpeed;				/* Bytes 46-47 */
+  unsigned int :32;					/* Bytes 48-51 */
+  unsigned int :32;					/* Bytes 52-55 */
+  unsigned char :8;					/* Byte 56 */
+  unsigned char :8;					/* Byte 57 */
+  unsigned short :16;					/* Bytes 58-59 */
+  unsigned short MaxCommands;				/* Bytes 60-61 */
+  unsigned short MaxScatterGatherEntries;		/* Bytes 62-63 */
+  unsigned short MaxDriveCommands;			/* Bytes 64-65 */
+  unsigned short MaxIODescriptors;			/* Bytes 66-67 */
+  unsigned short MaxCombinedSectors;			/* Bytes 68-69 */
+  unsigned char Latency;				/* Byte 70 */
+  unsigned char :8;					/* Byte 71 */
+  unsigned char SCSITimeout;				/* Byte 72 */
+  unsigned char :8;					/* Byte 73 */
+  unsigned short MinFreeLines;				/* Bytes 74-75 */
+  unsigned int :32;					/* Bytes 76-79 */
+  unsigned int :32;					/* Bytes 80-83 */
+  unsigned char RebuildRateConstant;			/* Byte 84 */
+  unsigned char :8;					/* Byte 85 */
+  unsigned char :8;					/* Byte 86 */
+  unsigned char :8;					/* Byte 87 */
+  unsigned int :32;					/* Bytes 88-91 */
+  unsigned int :32;					/* Bytes 92-95 */
+  unsigned short PhysicalDriveBlockSize;		/* Bytes 96-97 */
+  unsigned short LogicalDriveBlockSize;			/* Bytes 98-99 */
+  unsigned short MaxBlocksPerCommand;			/* Bytes 100-101 */
+  unsigned short BlockFactor;				/* Bytes 102-103 */
+  unsigned short CacheLineSize;				/* Bytes 104-105 */
+  struct {
+    enum {
+      DAC960_V1_Narrow_8bit =			0x0,
+      DAC960_V1_Wide_16bit =			0x1,
+      DAC960_V1_Wide_32bit =			0x2
+    } __attribute__ ((packed)) BusWidth:2;		/* Byte 106 Bits 0-1 */
+    enum {
+      DAC960_V1_Fast =				0x0,
+      DAC960_V1_Ultra =				0x1,
+      DAC960_V1_Ultra2 =			0x2
+    } __attribute__ ((packed)) BusSpeed:2;		/* Byte 106 Bits 2-3 */
+    bool Differential:1;				/* Byte 106 Bit 4 */
+    unsigned char :3;					/* Byte 106 Bits 5-7 */
+  } SCSICapability;
+  unsigned char :8;					/* Byte 107 */
+  unsigned int :32;					/* Bytes 108-111 */
+  unsigned short FirmwareBuildNumber;			/* Bytes 112-113 */
+  enum {
+    DAC960_V1_AEMI =				0x01,
+    DAC960_V1_OEM1 =				0x02,
+    DAC960_V1_OEM2 =				0x04,
+    DAC960_V1_OEM3 =				0x08,
+    DAC960_V1_Conner =				0x10,
+    DAC960_V1_SAFTE =				0x20
+  } __attribute__ ((packed)) FaultManagementType;	/* Byte 114 */
+  unsigned char :8;					/* Byte 115 */
+  struct {
+    bool Clustering:1;					/* Byte 116 Bit 0 */
+    bool MylexOnlineRAIDExpansion:1;			/* Byte 116 Bit 1 */
+    bool ReadAhead:1;					/* Byte 116 Bit 2 */
+    bool BackgroundInitialization:1;			/* Byte 116 Bit 3 */
+    unsigned int :28;					/* Bytes 116-119 */
+  } FirmwareFeatures;
+  unsigned int :32;					/* Bytes 120-123 */
+  unsigned int :32;					/* Bytes 124-127 */
+}
+DAC960_V1_Enquiry2_T;
+
+
+/*
+  Define the DAC960 V1 Firmware Logical Drive State type.
+*/
+
+typedef enum
+{
+  DAC960_V1_LogicalDrive_Online =		0x03,
+  DAC960_V1_LogicalDrive_Critical =		0x04,
+  DAC960_V1_LogicalDrive_Offline =		0xFF
+}
+__attribute__ ((packed))
+DAC960_V1_LogicalDriveState_T;
+
+
+/*
+  Define the DAC960 V1 Firmware Logical Drive Information structure.
+*/
+
+typedef struct DAC960_V1_LogicalDriveInformation
+{
+  unsigned int LogicalDriveSize;			/* Bytes 0-3 */
+  DAC960_V1_LogicalDriveState_T LogicalDriveState;	/* Byte 4 */
+  unsigned char RAIDLevel:7;				/* Byte 5 Bits 0-6 */
+  bool WriteBack:1;					/* Byte 5 Bit 7 */
+  unsigned short :16;					/* Bytes 6-7 */
+}
+DAC960_V1_LogicalDriveInformation_T;
+
+
+/*
+  Define the DAC960 V1 Firmware Get Logical Drive Information Command
+  reply structure.
+*/
+
+typedef DAC960_V1_LogicalDriveInformation_T
+	DAC960_V1_LogicalDriveInformationArray_T[DAC960_MaxLogicalDrives];
+
+
+/*
+  Define the DAC960 V1 Firmware Perform Event Log Operation Types.
+*/
+
+typedef enum
+{
+  DAC960_V1_GetEventLogEntry =			0x00
+}
+__attribute__ ((packed))
+DAC960_V1_PerformEventLogOpType_T;
+
+
+/*
+  Define the DAC960 V1 Firmware Get Event Log Entry Command reply structure.
+*/
+
+typedef struct DAC960_V1_EventLogEntry
+{
+  unsigned char MessageType;				/* Byte 0 */
+  unsigned char MessageLength;				/* Byte 1 */
+  unsigned char TargetID:5;				/* Byte 2 Bits 0-4 */
+  unsigned char Channel:3;				/* Byte 2 Bits 5-7 */
+  unsigned char LogicalUnit:6;				/* Byte 3 Bits 0-5 */
+  unsigned char :2;					/* Byte 3 Bits 6-7 */
+  unsigned short SequenceNumber;			/* Bytes 4-5 */
+  unsigned char ErrorCode:7;				/* Byte 6 Bits 0-6 */
+  bool Valid:1;						/* Byte 6 Bit 7 */
+  unsigned char SegmentNumber;				/* Byte 7 */
+  DAC960_SCSI_RequestSenseKey_T SenseKey:4;		/* Byte 8 Bits 0-3 */
+  unsigned char :1;					/* Byte 8 Bit 4 */
+  bool ILI:1;						/* Byte 8 Bit 5 */
+  bool EOM:1;						/* Byte 8 Bit 6 */
+  bool Filemark:1;					/* Byte 8 Bit 7 */
+  unsigned char Information[4];				/* Bytes 9-12 */
+  unsigned char AdditionalSenseLength;			/* Byte 13 */
+  unsigned char CommandSpecificInformation[4];		/* Bytes 14-17 */
+  unsigned char AdditionalSenseCode;			/* Byte 18 */
+  unsigned char AdditionalSenseCodeQualifier;		/* Byte 19 */
+  unsigned char Dummy[12];				/* Bytes 20-31 */
+}
+DAC960_V1_EventLogEntry_T;
+
+
+/*
+  Define the DAC960 V1 Firmware Physical Device State type.
+*/
+
+typedef enum
+{
+    DAC960_V1_Device_Dead =			0x00,
+    DAC960_V1_Device_WriteOnly =		0x02,
+    DAC960_V1_Device_Online =			0x03,
+    DAC960_V1_Device_Standby =			0x10
+}
+__attribute__ ((packed))
+DAC960_V1_PhysicalDeviceState_T;
+
+
+/*
+  Define the DAC960 V1 Firmware Get Device State Command reply structure.
+  The structure is padded by 2 bytes for compatibility with Version 2.xx
+  Firmware.
+*/
+
+typedef struct DAC960_V1_DeviceState
+{
+  bool Present:1;					/* Byte 0 Bit 0 */
+  unsigned char :7;					/* Byte 0 Bits 1-7 */
+  enum {
+    DAC960_V1_OtherType =			0x0,
+    DAC960_V1_DiskType =			0x1,
+    DAC960_V1_SequentialType =			0x2,
+    DAC960_V1_CDROM_or_WORM_Type =		0x3
+    } __attribute__ ((packed)) DeviceType:2;		/* Byte 1 Bits 0-1 */
+  bool :1;						/* Byte 1 Bit 2 */
+  bool Fast20:1;					/* Byte 1 Bit 3 */
+  bool Sync:1;						/* Byte 1 Bit 4 */
+  bool Fast:1;						/* Byte 1 Bit 5 */
+  bool Wide:1;						/* Byte 1 Bit 6 */
+  bool TaggedQueuingSupported:1;			/* Byte 1 Bit 7 */
+  DAC960_V1_PhysicalDeviceState_T DeviceState;		/* Byte 2 */
+  unsigned char :8;					/* Byte 3 */
+  unsigned char SynchronousMultiplier;			/* Byte 4 */
+  unsigned char SynchronousOffset:5;			/* Byte 5 Bits 0-4 */
+  unsigned char :3;					/* Byte 5 Bits 5-7 */
+  unsigned int DiskSize __attribute__ ((packed));	/* Bytes 6-9 */
+  unsigned short :16;					/* Bytes 10-11 */
+}
+DAC960_V1_DeviceState_T;
+
+
+/*
+  Define the DAC960 V1 Firmware Get Rebuild Progress Command reply structure.
+*/
+
+typedef struct DAC960_V1_RebuildProgress
+{
+  unsigned int LogicalDriveNumber;			/* Bytes 0-3 */
+  unsigned int LogicalDriveSize;			/* Bytes 4-7 */
+  unsigned int RemainingBlocks;				/* Bytes 8-11 */
+}
+DAC960_V1_RebuildProgress_T;
+
+
+/*
+  Define the DAC960 V1 Firmware Background Initialization Status Command
+  reply structure.
+*/
+
+typedef struct DAC960_V1_BackgroundInitializationStatus
+{
+  unsigned int LogicalDriveSize;			/* Bytes 0-3 */
+  unsigned int BlocksCompleted;				/* Bytes 4-7 */
+  unsigned char Reserved1[12];				/* Bytes 8-19 */
+  unsigned int LogicalDriveNumber;			/* Bytes 20-23 */
+  unsigned char RAIDLevel;				/* Byte 24 */
+  enum {
+    DAC960_V1_BackgroundInitializationInvalid =	    0x00,
+    DAC960_V1_BackgroundInitializationStarted =	    0x02,
+    DAC960_V1_BackgroundInitializationInProgress =  0x04,
+    DAC960_V1_BackgroundInitializationSuspended =   0x05,
+    DAC960_V1_BackgroundInitializationCancelled =   0x06
+  } __attribute__ ((packed)) Status;			/* Byte 25 */
+  unsigned char Reserved2[6];				/* Bytes 26-31 */
+}
+DAC960_V1_BackgroundInitializationStatus_T;
+
+
+/*
+  Define the DAC960 V1 Firmware Error Table Entry structure.
+*/
+
+typedef struct DAC960_V1_ErrorTableEntry
+{
+  unsigned char ParityErrorCount;			/* Byte 0 */
+  unsigned char SoftErrorCount;				/* Byte 1 */
+  unsigned char HardErrorCount;				/* Byte 2 */
+  unsigned char MiscErrorCount;				/* Byte 3 */
+}
+DAC960_V1_ErrorTableEntry_T;
+
+
+/*
+  Define the DAC960 V1 Firmware Get Error Table Command reply structure.
+*/
+
+typedef struct DAC960_V1_ErrorTable
+{
+  DAC960_V1_ErrorTableEntry_T
+    ErrorTableEntries[DAC960_V1_MaxChannels][DAC960_V1_MaxTargets];
+}
+DAC960_V1_ErrorTable_T;
+
+
+/*
+  Define the DAC960 V1 Firmware Read Config2 Command reply structure.
+*/
+
+typedef struct DAC960_V1_Config2
+{
+  unsigned char :1;					/* Byte 0 Bit 0 */
+  bool ActiveNegationEnabled:1;				/* Byte 0 Bit 1 */
+  unsigned char :5;					/* Byte 0 Bits 2-6 */
+  bool NoRescanIfResetReceivedDuringScan:1;		/* Byte 0 Bit 7 */
+  bool StorageWorksSupportEnabled:1;			/* Byte 1 Bit 0 */
+  bool HewlettPackardSupportEnabled:1;			/* Byte 1 Bit 1 */
+  bool NoDisconnectOnFirstCommand:1;			/* Byte 1 Bit 2 */
+  unsigned char :2;					/* Byte 1 Bits 3-4 */
+  bool AEMI_ARM:1;					/* Byte 1 Bit 5 */
+  bool AEMI_OFM:1;					/* Byte 1 Bit 6 */
+  unsigned char :1;					/* Byte 1 Bit 7 */
+  enum {
+    DAC960_V1_OEMID_Mylex =			0x00,
+    DAC960_V1_OEMID_IBM =			0x08,
+    DAC960_V1_OEMID_HP =			0x0A,
+    DAC960_V1_OEMID_DEC =			0x0C,
+    DAC960_V1_OEMID_Siemens =			0x10,
+    DAC960_V1_OEMID_Intel =			0x12
+  } __attribute__ ((packed)) OEMID;			/* Byte 2 */
+  unsigned char OEMModelNumber;				/* Byte 3 */
+  unsigned char PhysicalSector;				/* Byte 4 */
+  unsigned char LogicalSector;				/* Byte 5 */
+  unsigned char BlockFactor;				/* Byte 6 */
+  bool ReadAheadEnabled:1;				/* Byte 7 Bit 0 */
+  bool LowBIOSDelay:1;					/* Byte 7 Bit 1 */
+  unsigned char :2;					/* Byte 7 Bits 2-3 */
+  bool ReassignRestrictedToOneSector:1;			/* Byte 7 Bit 4 */
+  unsigned char :1;					/* Byte 7 Bit 5 */
+  bool ForceUnitAccessDuringWriteRecovery:1;		/* Byte 7 Bit 6 */
+  bool EnableLeftSymmetricRAID5Algorithm:1;		/* Byte 7 Bit 7 */
+  unsigned char DefaultRebuildRate;			/* Byte 8 */
+  unsigned char :8;					/* Byte 9 */
+  unsigned char BlocksPerCacheLine;			/* Byte 10 */
+  unsigned char BlocksPerStripe;			/* Byte 11 */
+  struct {
+    enum {
+      DAC960_V1_Async =				0x0,
+      DAC960_V1_Sync_8MHz =			0x1,
+      DAC960_V1_Sync_5MHz =			0x2,
+      DAC960_V1_Sync_10or20MHz =		0x3	/* Byte 11 Bits 0-1 */
+    } __attribute__ ((packed)) Speed:2;
+    bool Force8Bit:1;					/* Byte 11 Bit 2 */
+    bool DisableFast20:1;				/* Byte 11 Bit 3 */
+    unsigned char :3;					/* Byte 11 Bits 4-6 */
+    bool EnableTaggedQueuing:1;				/* Byte 11 Bit 7 */
+  } __attribute__ ((packed)) ChannelParameters[6];	/* Bytes 12-17 */
+  unsigned char SCSIInitiatorID;			/* Byte 18 */
+  unsigned char :8;					/* Byte 19 */
+  enum {
+    DAC960_V1_StartupMode_ControllerSpinUp =	0x00,
+    DAC960_V1_StartupMode_PowerOnSpinUp =	0x01
+  } __attribute__ ((packed)) StartupMode;		/* Byte 20 */
+  unsigned char SimultaneousDeviceSpinUpCount;		/* Byte 21 */
+  unsigned char SecondsDelayBetweenSpinUps;		/* Byte 22 */
+  unsigned char Reserved1[29];				/* Bytes 23-51 */
+  bool BIOSDisabled:1;					/* Byte 52 Bit 0 */
+  bool CDROMBootEnabled:1;				/* Byte 52 Bit 1 */
+  unsigned char :3;					/* Byte 52 Bits 2-4 */
+  enum {
+    DAC960_V1_Geometry_128_32 =			0x0,
+    DAC960_V1_Geometry_255_63 =			0x1,
+    DAC960_V1_Geometry_Reserved1 =		0x2,
+    DAC960_V1_Geometry_Reserved2 =		0x3
+  } __attribute__ ((packed)) DriveGeometry:2;		/* Byte 52 Bits 5-6 */
+  unsigned char :1;					/* Byte 52 Bit 7 */
+  unsigned char Reserved2[9];				/* Bytes 53-61 */
+  unsigned short Checksum;				/* Bytes 62-63 */
+}
+DAC960_V1_Config2_T;
+
+
+/*
+  Define the DAC960 V1 Firmware DCDB request structure.
+*/
+
+typedef struct DAC960_V1_DCDB
+{
+  unsigned char TargetID:4;				 /* Byte 0 Bits 0-3 */
+  unsigned char Channel:4;				 /* Byte 0 Bits 4-7 */
+  enum {
+    DAC960_V1_DCDB_NoDataTransfer =		0,
+    DAC960_V1_DCDB_DataTransferDeviceToSystem = 1,
+    DAC960_V1_DCDB_DataTransferSystemToDevice = 2,
+    DAC960_V1_DCDB_IllegalDataTransfer =	3
+  } __attribute__ ((packed)) Direction:2;		 /* Byte 1 Bits 0-1 */
+  bool EarlyStatus:1;					 /* Byte 1 Bit 2 */
+  unsigned char :1;					 /* Byte 1 Bit 3 */
+  enum {
+    DAC960_V1_DCDB_Timeout_24_hours =		0,
+    DAC960_V1_DCDB_Timeout_10_seconds =		1,
+    DAC960_V1_DCDB_Timeout_60_seconds =		2,
+    DAC960_V1_DCDB_Timeout_10_minutes =		3
+  } __attribute__ ((packed)) Timeout:2;			 /* Byte 1 Bits 4-5 */
+  bool NoAutomaticRequestSense:1;			 /* Byte 1 Bit 6 */
+  bool DisconnectPermitted:1;				 /* Byte 1 Bit 7 */
+  unsigned short TransferLength;			 /* Bytes 2-3 */
+  DAC960_BusAddress32_T BusAddress;			 /* Bytes 4-7 */
+  unsigned char CDBLength:4;				 /* Byte 8 Bits 0-3 */
+  unsigned char TransferLengthHigh4:4;			 /* Byte 8 Bits 4-7 */
+  unsigned char SenseLength;				 /* Byte 9 */
+  unsigned char CDB[12];				 /* Bytes 10-21 */
+  unsigned char SenseData[64];				 /* Bytes 22-85 */
+  unsigned char Status;					 /* Byte 86 */
+  unsigned char :8;					 /* Byte 87 */
+}
+DAC960_V1_DCDB_T;
+
+
+/*
+  Define the DAC960 V1 Firmware Scatter/Gather List Type 1 32 Bit Address
+  32 Bit Byte Count structure.
+*/
+
+typedef struct DAC960_V1_ScatterGatherSegment
+{
+  DAC960_BusAddress32_T SegmentDataPointer;		/* Bytes 0-3 */
+  DAC960_ByteCount32_T SegmentByteCount;		/* Bytes 4-7 */
+}
+DAC960_V1_ScatterGatherSegment_T;
+
+
+/*
+  Define the 13 Byte DAC960 V1 Firmware Command Mailbox structure.  Bytes 13-15
+  are not used.  The Command Mailbox structure is padded to 16 bytes for
+  efficient access.
+*/
+
+typedef union DAC960_V1_CommandMailbox
+{
+  unsigned int Words[4];				/* Words 0-3 */
+  unsigned char Bytes[16];				/* Bytes 0-15 */
+  struct {
+    DAC960_V1_CommandOpcode_T CommandOpcode;		/* Byte 0 */
+    DAC960_V1_CommandIdentifier_T CommandIdentifier;	/* Byte 1 */
+    unsigned char Dummy[14];				/* Bytes 2-15 */
+  } __attribute__ ((packed)) Common;
+  struct {
+    DAC960_V1_CommandOpcode_T CommandOpcode;		/* Byte 0 */
+    DAC960_V1_CommandIdentifier_T CommandIdentifier;	/* Byte 1 */
+    unsigned char Dummy1[6];				/* Bytes 2-7 */
+    DAC960_BusAddress32_T BusAddress;			/* Bytes 8-11 */
+    unsigned char Dummy2[4];				/* Bytes 12-15 */
+  } __attribute__ ((packed)) Type3;
+  struct {
+    DAC960_V1_CommandOpcode_T CommandOpcode;		/* Byte 0 */
+    DAC960_V1_CommandIdentifier_T CommandIdentifier;	/* Byte 1 */
+    unsigned char CommandOpcode2;			/* Byte 2 */
+    unsigned char Dummy1[5];				/* Bytes 3-7 */
+    DAC960_BusAddress32_T BusAddress;			/* Bytes 8-11 */
+    unsigned char Dummy2[4];				/* Bytes 12-15 */
+  } __attribute__ ((packed)) Type3B;
+  struct {
+    DAC960_V1_CommandOpcode_T CommandOpcode;		/* Byte 0 */
+    DAC960_V1_CommandIdentifier_T CommandIdentifier;	/* Byte 1 */
+    unsigned char Dummy1[5];				/* Bytes 2-6 */
+    unsigned char LogicalDriveNumber:6;			/* Byte 7 Bits 0-6 */
+    bool AutoRestore:1;					/* Byte 7 Bit 7 */
+    unsigned char Dummy2[8];				/* Bytes 8-15 */
+  } __attribute__ ((packed)) Type3C;
+  struct {
+    DAC960_V1_CommandOpcode_T CommandOpcode;		/* Byte 0 */
+    DAC960_V1_CommandIdentifier_T CommandIdentifier;	/* Byte 1 */
+    unsigned char Channel;				/* Byte 2 */
+    unsigned char TargetID;				/* Byte 3 */
+    DAC960_V1_PhysicalDeviceState_T DeviceState:5;	/* Byte 4 Bits 0-4 */
+    unsigned char Modifier:3;				/* Byte 4 Bits 5-7 */
+    unsigned char Dummy1[3];				/* Bytes 5-7 */
+    DAC960_BusAddress32_T BusAddress;			/* Bytes 8-11 */
+    unsigned char Dummy2[4];				/* Bytes 12-15 */
+  } __attribute__ ((packed)) Type3D;
+  struct {
+    DAC960_V1_CommandOpcode_T CommandOpcode;		/* Byte 0 */
+    DAC960_V1_CommandIdentifier_T CommandIdentifier;	/* Byte 1 */
+    DAC960_V1_PerformEventLogOpType_T OperationType;	/* Byte 2 */
+    unsigned char OperationQualifier;			/* Byte 3 */
+    unsigned short SequenceNumber;			/* Bytes 4-5 */
+    unsigned char Dummy1[2];				/* Bytes 6-7 */
+    DAC960_BusAddress32_T BusAddress;			/* Bytes 8-11 */
+    unsigned char Dummy2[4];				/* Bytes 12-15 */
+  } __attribute__ ((packed)) Type3E;
+  struct {
+    DAC960_V1_CommandOpcode_T CommandOpcode;		/* Byte 0 */
+    DAC960_V1_CommandIdentifier_T CommandIdentifier;	/* Byte 1 */
+    unsigned char Dummy1[2];				/* Bytes 2-3 */
+    unsigned char RebuildRateConstant;			/* Byte 4 */
+    unsigned char Dummy2[3];				/* Bytes 5-7 */
+    DAC960_BusAddress32_T BusAddress;			/* Bytes 8-11 */
+    unsigned char Dummy3[4];				/* Bytes 12-15 */
+  } __attribute__ ((packed)) Type3R;
+  struct {
+    DAC960_V1_CommandOpcode_T CommandOpcode;		/* Byte 0 */
+    DAC960_V1_CommandIdentifier_T CommandIdentifier;	/* Byte 1 */
+    unsigned short TransferLength;			/* Bytes 2-3 */
+    unsigned int LogicalBlockAddress;			/* Bytes 4-7 */
+    DAC960_BusAddress32_T BusAddress;			/* Bytes 8-11 */
+    unsigned char LogicalDriveNumber;			/* Byte 12 */
+    unsigned char Dummy[3];				/* Bytes 13-15 */
+  } __attribute__ ((packed)) Type4;
+  struct {
+    DAC960_V1_CommandOpcode_T CommandOpcode;		/* Byte 0 */
+    DAC960_V1_CommandIdentifier_T CommandIdentifier;	/* Byte 1 */
+    struct {
+      unsigned short TransferLength:11;			/* Bytes 2-3 */
+      unsigned char LogicalDriveNumber:5;		/* Byte 3 Bits 3-7 */
+    } __attribute__ ((packed)) LD;
+    unsigned int LogicalBlockAddress;			/* Bytes 4-7 */
+    DAC960_BusAddress32_T BusAddress;			/* Bytes 8-11 */
+    unsigned char ScatterGatherCount:6;			/* Byte 12 Bits 0-5 */
+    enum {
+      DAC960_V1_ScatterGather_32BitAddress_32BitByteCount = 0x0,
+      DAC960_V1_ScatterGather_32BitAddress_16BitByteCount = 0x1,
+      DAC960_V1_ScatterGather_32BitByteCount_32BitAddress = 0x2,
+      DAC960_V1_ScatterGather_16BitByteCount_32BitAddress = 0x3
+    } __attribute__ ((packed)) ScatterGatherType:2;	/* Byte 12 Bits 6-7 */
+    unsigned char Dummy[3];				/* Bytes 13-15 */
+  } __attribute__ ((packed)) Type5;
+  struct {
+    DAC960_V1_CommandOpcode_T CommandOpcode;		/* Byte 0 */
+    DAC960_V1_CommandIdentifier_T CommandIdentifier;	/* Byte 1 */
+    unsigned char CommandOpcode2;			/* Byte 2 */
+    unsigned char :8;					/* Byte 3 */
+    DAC960_BusAddress32_T CommandMailboxesBusAddress;	/* Bytes 4-7 */
+    DAC960_BusAddress32_T StatusMailboxesBusAddress;	/* Bytes 8-11 */
+    unsigned char Dummy[4];				/* Bytes 12-15 */
+  } __attribute__ ((packed)) TypeX;
+}
+DAC960_V1_CommandMailbox_T;
+
+
+/*
+  Define the DAC960 V2 Firmware Command Opcodes.
+*/
+
+typedef enum
+{
+  DAC960_V2_MemCopy =				0x01,
+  DAC960_V2_SCSI_10_Passthru =			0x02,
+  DAC960_V2_SCSI_255_Passthru =			0x03,
+  DAC960_V2_SCSI_10 =				0x04,
+  DAC960_V2_SCSI_256 =				0x05,
+  DAC960_V2_IOCTL =				0x20
+}
+__attribute__ ((packed))
+DAC960_V2_CommandOpcode_T;
+
+
+/*
+  Define the DAC960 V2 Firmware IOCTL Opcodes.
+*/
+
+typedef enum
+{
+  DAC960_V2_GetControllerInfo =			0x01,
+  DAC960_V2_GetLogicalDeviceInfoValid =		0x03,
+  DAC960_V2_GetPhysicalDeviceInfoValid =	0x05,
+  DAC960_V2_GetHealthStatus =			0x11,
+  DAC960_V2_GetEvent =				0x15,
+  DAC960_V2_StartDiscovery =			0x81,
+  DAC960_V2_SetDeviceState =			0x82,
+  DAC960_V2_RebuildDeviceStart =		0x88,
+  DAC960_V2_RebuildDeviceStop =			0x89,
+  DAC960_V2_ConsistencyCheckStart =		0x8C,
+  DAC960_V2_ConsistencyCheckStop =		0x8D,
+  DAC960_V2_SetMemoryMailbox =			0x8E,
+  DAC960_V2_PauseDevice =			0x92,
+  DAC960_V2_TranslatePhysicalToLogicalDevice =	0xC5
+}
+__attribute__ ((packed))
+DAC960_V2_IOCTL_Opcode_T;
+
+
+/*
+  Define the DAC960 V2 Firmware Command Identifier type.
+*/
+
+typedef unsigned short DAC960_V2_CommandIdentifier_T;
+
+
+/*
+  Define the DAC960 V2 Firmware Command Status Codes.
+*/
+
+#define DAC960_V2_NormalCompletion		0x00
+#define DAC960_V2_AbormalCompletion		0x02
+#define DAC960_V2_DeviceBusy			0x08
+#define DAC960_V2_DeviceNonresponsive		0x0E
+#define DAC960_V2_DeviceNonresponsive2		0x0F
+#define DAC960_V2_DeviceRevervationConflict	0x18
+
+typedef unsigned char DAC960_V2_CommandStatus_T;
+
+
+/*
+  Define the DAC960 V2 Firmware Memory Type structure.
+*/
+
+typedef struct DAC960_V2_MemoryType
+{
+  enum {
+    DAC960_V2_MemoryType_Reserved =		0x00,
+    DAC960_V2_MemoryType_DRAM =			0x01,
+    DAC960_V2_MemoryType_EDRAM =		0x02,
+    DAC960_V2_MemoryType_EDO =			0x03,
+    DAC960_V2_MemoryType_SDRAM =		0x04,
+    DAC960_V2_MemoryType_Last =			0x1F
+  } __attribute__ ((packed)) MemoryType:5;		/* Byte 0 Bits 0-4 */
+  bool :1;						/* Byte 0 Bit 5 */
+  bool MemoryParity:1;					/* Byte 0 Bit 6 */
+  bool MemoryECC:1;					/* Byte 0 Bit 7 */
+}
+DAC960_V2_MemoryType_T;
+
+
+/*
+  Define the DAC960 V2 Firmware Processor Type structure.
+*/
+
+typedef enum
+{
+  DAC960_V2_ProcessorType_i960CA =		0x01,
+  DAC960_V2_ProcessorType_i960RD =		0x02,
+  DAC960_V2_ProcessorType_i960RN =		0x03,
+  DAC960_V2_ProcessorType_i960RP =		0x04,
+  DAC960_V2_ProcessorType_NorthBay =		0x05,
+  DAC960_V2_ProcessorType_StrongArm =		0x06,
+  DAC960_V2_ProcessorType_i960RM =		0x07
+}
+__attribute__ ((packed))
+DAC960_V2_ProcessorType_T;
+
+
+/*
+  Define the DAC960 V2 Firmware Get Controller Info reply structure.
+*/
+
+typedef struct DAC960_V2_ControllerInfo
+{
+  unsigned char :8;					/* Byte 0 */
+  enum {
+    DAC960_V2_SCSI_Bus =			0x00,
+    DAC960_V2_Fibre_Bus =			0x01,
+    DAC960_V2_PCI_Bus =				0x03
+  } __attribute__ ((packed)) BusInterfaceType;		/* Byte 1 */
+  enum {
+    DAC960_V2_DAC960E =				0x01,
+    DAC960_V2_DAC960M =				0x08,
+    DAC960_V2_DAC960PD =			0x10,
+    DAC960_V2_DAC960PL =			0x11,
+    DAC960_V2_DAC960PU =			0x12,
+    DAC960_V2_DAC960PE =			0x13,
+    DAC960_V2_DAC960PG =			0x14,
+    DAC960_V2_DAC960PJ =			0x15,
+    DAC960_V2_DAC960PTL0 =			0x16,
+    DAC960_V2_DAC960PR =			0x17,
+    DAC960_V2_DAC960PRL =			0x18,
+    DAC960_V2_DAC960PT =			0x19,
+    DAC960_V2_DAC1164P =			0x1A,
+    DAC960_V2_DAC960PTL1 =			0x1B,
+    DAC960_V2_EXR2000P =			0x1C,
+    DAC960_V2_EXR3000P =			0x1D,
+    DAC960_V2_AcceleRAID352 =			0x1E,
+    DAC960_V2_AcceleRAID170 =			0x1F,
+    DAC960_V2_AcceleRAID160 =			0x20,
+    DAC960_V2_DAC960S =				0x60,
+    DAC960_V2_DAC960SU =			0x61,
+    DAC960_V2_DAC960SX =			0x62,
+    DAC960_V2_DAC960SF =			0x63,
+    DAC960_V2_DAC960SS =			0x64,
+    DAC960_V2_DAC960FL =			0x65,
+    DAC960_V2_DAC960LL =			0x66,
+    DAC960_V2_DAC960FF =			0x67,
+    DAC960_V2_DAC960HP =			0x68,
+    DAC960_V2_RAIDBRICK =			0x69,
+    DAC960_V2_METEOR_FL =			0x6A,
+    DAC960_V2_METEOR_FF =			0x6B
+  } __attribute__ ((packed)) ControllerType;		/* Byte 2 */
+  unsigned char :8;					/* Byte 3 */
+  unsigned short BusInterfaceSpeedMHz;			/* Bytes 4-5 */
+  unsigned char BusWidthBits;				/* Byte 6 */
+  unsigned char FlashCodeTypeOrProductID;		/* Byte 7 */
+  unsigned char NumberOfHostPortsPresent;		/* Byte 8 */
+  unsigned char Reserved1[7];				/* Bytes 9-15 */
+  unsigned char BusInterfaceName[16];			/* Bytes 16-31 */
+  unsigned char ControllerName[16];			/* Bytes 32-47 */
+  unsigned char Reserved2[16];				/* Bytes 48-63 */
+  /* Firmware Release Information */
+  unsigned char FirmwareMajorVersion;			/* Byte 64 */
+  unsigned char FirmwareMinorVersion;			/* Byte 65 */
+  unsigned char FirmwareTurnNumber;			/* Byte 66 */
+  unsigned char FirmwareBuildNumber;			/* Byte 67 */
+  unsigned char FirmwareReleaseDay;			/* Byte 68 */
+  unsigned char FirmwareReleaseMonth;			/* Byte 69 */
+  unsigned char FirmwareReleaseYearHigh2Digits;		/* Byte 70 */
+  unsigned char FirmwareReleaseYearLow2Digits;		/* Byte 71 */
+  /* Hardware Release Information */
+  unsigned char HardwareRevision;			/* Byte 72 */
+  unsigned int :24;					/* Bytes 73-75 */
+  unsigned char HardwareReleaseDay;			/* Byte 76 */
+  unsigned char HardwareReleaseMonth;			/* Byte 77 */
+  unsigned char HardwareReleaseYearHigh2Digits;		/* Byte 78 */
+  unsigned char HardwareReleaseYearLow2Digits;		/* Byte 79 */
+  /* Hardware Manufacturing Information */
+  unsigned char ManufacturingBatchNumber;		/* Byte 80 */
+  unsigned char :8;					/* Byte 81 */
+  unsigned char ManufacturingPlantNumber;		/* Byte 82 */
+  unsigned char :8;					/* Byte 83 */
+  unsigned char HardwareManufacturingDay;		/* Byte 84 */
+  unsigned char HardwareManufacturingMonth;		/* Byte 85 */
+  unsigned char HardwareManufacturingYearHigh2Digits;	/* Byte 86 */
+  unsigned char HardwareManufacturingYearLow2Digits;	/* Byte 87 */
+  unsigned char MaximumNumberOfPDDperXLD;		/* Byte 88 */
+  unsigned char MaximumNumberOfILDperXLD;		/* Byte 89 */
+  unsigned short NonvolatileMemorySizeKB;		/* Bytes 90-91 */
+  unsigned char MaximumNumberOfXLD;			/* Byte 92 */
+  unsigned int :24;					/* Bytes 93-95 */
+  /* Unique Information per Controller */
+  unsigned char ControllerSerialNumber[16];		/* Bytes 96-111 */
+  unsigned char Reserved3[16];				/* Bytes 112-127 */
+  /* Vendor Information */
+  unsigned int :24;					/* Bytes 128-130 */
+  unsigned char OEM_Code;				/* Byte 131 */
+  unsigned char VendorName[16];				/* Bytes 132-147 */
+  /* Other Physical/Controller/Operation Information */
+  bool BBU_Present:1;					/* Byte 148 Bit 0 */
+  bool ActiveActiveClusteringMode:1;			/* Byte 148 Bit 1 */
+  unsigned char :6;					/* Byte 148 Bits 2-7 */
+  unsigned char :8;					/* Byte 149 */
+  unsigned short :16;					/* Bytes 150-151 */
+  /* Physical Device Scan Information */
+  bool PhysicalScanActive:1;				/* Byte 152 Bit 0 */
+  unsigned char :7;					/* Byte 152 Bits 1-7 */
+  unsigned char PhysicalDeviceChannelNumber;		/* Byte 153 */
+  unsigned char PhysicalDeviceTargetID;			/* Byte 154 */
+  unsigned char PhysicalDeviceLogicalUnit;		/* Byte 155 */
+  /* Maximum Command Data Transfer Sizes */
+  unsigned short MaximumDataTransferSizeInBlocks;	/* Bytes 156-157 */
+  unsigned short MaximumScatterGatherEntries;		/* Bytes 158-159 */
+  /* Logical/Physical Device Counts */
+  unsigned short LogicalDevicesPresent;			/* Bytes 160-161 */
+  unsigned short LogicalDevicesCritical;		/* Bytes 162-163 */
+  unsigned short LogicalDevicesOffline;			/* Bytes 164-165 */
+  unsigned short PhysicalDevicesPresent;		/* Bytes 166-167 */
+  unsigned short PhysicalDisksPresent;			/* Bytes 168-169 */
+  unsigned short PhysicalDisksCritical;			/* Bytes 170-171 */
+  unsigned short PhysicalDisksOffline;			/* Bytes 172-173 */
+  unsigned short MaximumParallelCommands;		/* Bytes 174-175 */
+  /* Channel and Target ID Information */
+  unsigned char NumberOfPhysicalChannelsPresent;	/* Byte 176 */
+  unsigned char NumberOfVirtualChannelsPresent;		/* Byte 177 */
+  unsigned char NumberOfPhysicalChannelsPossible;	/* Byte 178 */
+  unsigned char NumberOfVirtualChannelsPossible;	/* Byte 179 */
+  unsigned char MaximumTargetsPerChannel[16];		/* Bytes 180-195 */
+  unsigned char Reserved4[12];				/* Bytes 196-207 */
+  /* Memory/Cache Information */
+  unsigned short MemorySizeMB;				/* Bytes 208-209 */
+  unsigned short CacheSizeMB;				/* Bytes 210-211 */
+  unsigned int ValidCacheSizeInBytes;			/* Bytes 212-215 */
+  unsigned int DirtyCacheSizeInBytes;			/* Bytes 216-219 */
+  unsigned short MemorySpeedMHz;			/* Bytes 220-221 */
+  unsigned char MemoryDataWidthBits;			/* Byte 222 */
+  DAC960_V2_MemoryType_T MemoryType;			/* Byte 223 */
+  unsigned char CacheMemoryTypeName[16];		/* Bytes 224-239 */
+  /* Execution Memory Information */
+  unsigned short ExecutionMemorySizeMB;			/* Bytes 240-241 */
+  unsigned short ExecutionL2CacheSizeMB;		/* Bytes 242-243 */
+  unsigned char Reserved5[8];				/* Bytes 244-251 */
+  unsigned short ExecutionMemorySpeedMHz;		/* Bytes 252-253 */
+  unsigned char ExecutionMemoryDataWidthBits;		/* Byte 254 */
+  DAC960_V2_MemoryType_T ExecutionMemoryType;		/* Byte 255 */
+  unsigned char ExecutionMemoryTypeName[16];		/* Bytes 256-271 */
+  /* First CPU Type Information */
+  unsigned short FirstProcessorSpeedMHz;		/* Bytes 272-273 */
+  DAC960_V2_ProcessorType_T FirstProcessorType;		/* Byte 274 */
+  unsigned char FirstProcessorCount;			/* Byte 275 */
+  unsigned char Reserved6[12];				/* Bytes 276-287 */
+  unsigned char FirstProcessorName[16];			/* Bytes 288-303 */
+  /* Second CPU Type Information */
+  unsigned short SecondProcessorSpeedMHz;		/* Bytes 304-305 */
+  DAC960_V2_ProcessorType_T SecondProcessorType;	/* Byte 306 */
+  unsigned char SecondProcessorCount;			/* Byte 307 */
+  unsigned char Reserved7[12];				/* Bytes 308-319 */
+  unsigned char SecondProcessorName[16];		/* Bytes 320-335 */
+  /* Debugging/Profiling/Command Time Tracing Information */
+  unsigned short CurrentProfilingDataPageNumber;	/* Bytes 336-337 */
+  unsigned short ProgramsAwaitingProfilingData;		/* Bytes 338-339 */
+  unsigned short CurrentCommandTimeTraceDataPageNumber;	/* Bytes 340-341 */
+  unsigned short ProgramsAwaitingCommandTimeTraceData;	/* Bytes 342-343 */
+  unsigned char Reserved8[8];				/* Bytes 344-351 */
+  /* Error Counters on Physical Devices */
+  unsigned short PhysicalDeviceBusResets;		/* Bytes 352-353 */
+  unsigned short PhysicalDeviceParityErrors;		/* Bytes 355-355 */
+  unsigned short PhysicalDeviceSoftErrors;		/* Bytes 356-357 */
+  unsigned short PhysicalDeviceCommandsFailed;		/* Bytes 358-359 */
+  unsigned short PhysicalDeviceMiscellaneousErrors;	/* Bytes 360-361 */
+  unsigned short PhysicalDeviceCommandTimeouts;		/* Bytes 362-363 */
+  unsigned short PhysicalDeviceSelectionTimeouts;	/* Bytes 364-365 */
+  unsigned short PhysicalDeviceRetriesDone;		/* Bytes 366-367 */
+  unsigned short PhysicalDeviceAbortsDone;		/* Bytes 368-369 */
+  unsigned short PhysicalDeviceHostCommandAbortsDone;	/* Bytes 370-371 */
+  unsigned short PhysicalDevicePredictedFailuresDetected; /* Bytes 372-373 */
+  unsigned short PhysicalDeviceHostCommandsFailed;	/* Bytes 374-375 */
+  unsigned short PhysicalDeviceHardErrors;		/* Bytes 376-377 */
+  unsigned char Reserved9[6];				/* Bytes 378-383 */
+  /* Error Counters on Logical Devices */
+  unsigned short LogicalDeviceSoftErrors;		/* Bytes 384-385 */
+  unsigned short LogicalDeviceCommandsFailed;		/* Bytes 386-387 */
+  unsigned short LogicalDeviceHostCommandAbortsDone;	/* Bytes 388-389 */
+  unsigned short :16;					/* Bytes 390-391 */
+  /* Error Counters on Controller */
+  unsigned short ControllerMemoryErrors;		/* Bytes 392-393 */
+  unsigned short ControllerHostCommandAbortsDone;	/* Bytes 394-395 */
+  unsigned int :32;					/* Bytes 396-399 */
+  /* Long Duration Activity Information */
+  unsigned short BackgroundInitializationsActive;	/* Bytes 400-401 */
+  unsigned short LogicalDeviceInitializationsActive;	/* Bytes 402-403 */
+  unsigned short PhysicalDeviceInitializationsActive;	/* Bytes 404-405 */
+  unsigned short ConsistencyChecksActive;		/* Bytes 406-407 */
+  unsigned short RebuildsActive;			/* Bytes 408-409 */
+  unsigned short OnlineExpansionsActive;		/* Bytes 410-411 */
+  unsigned short PatrolActivitiesActive;		/* Bytes 412-413 */
+  unsigned short :16;					/* Bytes 414-415 */
+  /* Flash ROM Information */
+  unsigned char FlashType;				/* Byte 416 */
+  unsigned char :8;					/* Byte 417 */
+  unsigned short FlashSizeMB;				/* Bytes 418-419 */
+  unsigned int FlashLimit;				/* Bytes 420-423 */
+  unsigned int FlashCount;				/* Bytes 424-427 */
+  unsigned int :32;					/* Bytes 428-431 */
+  unsigned char FlashTypeName[16];			/* Bytes 432-447 */
+  /* Firmware Run Time Information */
+  unsigned char RebuildRate;				/* Byte 448 */
+  unsigned char BackgroundInitializationRate;		/* Byte 449 */
+  unsigned char ForegroundInitializationRate;		/* Byte 450 */
+  unsigned char ConsistencyCheckRate;			/* Byte 451 */
+  unsigned int :32;					/* Bytes 452-455 */
+  unsigned int MaximumDP;				/* Bytes 456-459 */
+  unsigned int FreeDP;					/* Bytes 460-463 */
+  unsigned int MaximumIOP;				/* Bytes 464-467 */
+  unsigned int FreeIOP;					/* Bytes 468-471 */
+  unsigned short MaximumCombLengthInBlocks;		/* Bytes 472-473 */
+  unsigned short NumberOfConfigurationGroups;		/* Bytes 474-475 */
+  bool InstallationAbortStatus:1;			/* Byte 476 Bit 0 */
+  bool MaintenanceModeStatus:1;				/* Byte 476 Bit 1 */
+  unsigned int :24;					/* Bytes 476-479 */
+  unsigned char Reserved10[32];				/* Bytes 480-511 */
+  unsigned char Reserved11[512];			/* Bytes 512-1023 */
+}
+DAC960_V2_ControllerInfo_T;
+
+
+/*
+  Define the DAC960 V2 Firmware Logical Device State type.
+*/
+
+typedef enum
+{
+  DAC960_V2_LogicalDevice_Online =		0x01,
+  DAC960_V2_LogicalDevice_Offline =		0x08,
+  DAC960_V2_LogicalDevice_Critical =		0x09
+}
+__attribute__ ((packed))
+DAC960_V2_LogicalDeviceState_T;
+
+
+/*
+  Define the DAC960 V2 Firmware Get Logical Device Info reply structure.
+*/
+
+typedef struct DAC960_V2_LogicalDeviceInfo
+{
+  unsigned char :8;					/* Byte 0 */
+  unsigned char Channel;				/* Byte 1 */
+  unsigned char TargetID;				/* Byte 2 */
+  unsigned char LogicalUnit;				/* Byte 3 */
+  DAC960_V2_LogicalDeviceState_T LogicalDeviceState;	/* Byte 4 */
+  unsigned char RAIDLevel;				/* Byte 5 */
+  unsigned char StripeSize;				/* Byte 6 */
+  unsigned char CacheLineSize;				/* Byte 7 */
+  struct {
+    enum {
+      DAC960_V2_ReadCacheDisabled =		0x0,
+      DAC960_V2_ReadCacheEnabled =		0x1,
+      DAC960_V2_ReadAheadEnabled =		0x2,
+      DAC960_V2_IntelligentReadAheadEnabled =	0x3,
+      DAC960_V2_ReadCache_Last =		0x7
+    } __attribute__ ((packed)) ReadCache:3;		/* Byte 8 Bits 0-2 */
+    enum {
+      DAC960_V2_WriteCacheDisabled =		0x0,
+      DAC960_V2_LogicalDeviceReadOnly =		0x1,
+      DAC960_V2_WriteCacheEnabled =		0x2,
+      DAC960_V2_IntelligentWriteCacheEnabled =	0x3,
+      DAC960_V2_WriteCache_Last =		0x7
+    } __attribute__ ((packed)) WriteCache:3;		/* Byte 8 Bits 3-5 */
+    bool :1;						/* Byte 8 Bit 6 */
+    bool LogicalDeviceInitialized:1;			/* Byte 8 Bit 7 */
+  } LogicalDeviceControl;				/* Byte 8 */
+  /* Logical Device Operations Status */
+  bool ConsistencyCheckInProgress:1;			/* Byte 9 Bit 0 */
+  bool RebuildInProgress:1;				/* Byte 9 Bit 1 */
+  bool BackgroundInitializationInProgress:1;		/* Byte 9 Bit 2 */
+  bool ForegroundInitializationInProgress:1;		/* Byte 9 Bit 3 */
+  bool DataMigrationInProgress:1;			/* Byte 9 Bit 4 */
+  bool PatrolOperationInProgress:1;			/* Byte 9 Bit 5 */
+  unsigned char :2;					/* Byte 9 Bits 6-7 */
+  unsigned char RAID5WriteUpdate;			/* Byte 10 */
+  unsigned char RAID5Algorithm;				/* Byte 11 */
+  unsigned short LogicalDeviceNumber;			/* Bytes 12-13 */
+  /* BIOS Info */
+  bool BIOSDisabled:1;					/* Byte 14 Bit 0 */
+  bool CDROMBootEnabled:1;				/* Byte 14 Bit 1 */
+  bool DriveCoercionEnabled:1;				/* Byte 14 Bit 2 */
+  bool WriteSameDisabled:1;				/* Byte 14 Bit 3 */
+  bool HBA_ModeEnabled:1;				/* Byte 14 Bit 4 */
+  enum {
+    DAC960_V2_Geometry_128_32 =			0x0,
+    DAC960_V2_Geometry_255_63 =			0x1,
+    DAC960_V2_Geometry_Reserved1 =		0x2,
+    DAC960_V2_Geometry_Reserved2 =		0x3
+  } __attribute__ ((packed)) DriveGeometry:2;		/* Byte 14 Bits 5-6 */
+  bool SuperReadAheadEnabled:1;				/* Byte 14 Bit 7 */
+  unsigned char :8;					/* Byte 15 */
+  /* Error Counters */
+  unsigned short SoftErrors;				/* Bytes 16-17 */
+  unsigned short CommandsFailed;			/* Bytes 18-19 */
+  unsigned short HostCommandAbortsDone;			/* Bytes 20-21 */
+  unsigned short DeferredWriteErrors;			/* Bytes 22-23 */
+  unsigned int :32;					/* Bytes 24-27 */
+  unsigned int :32;					/* Bytes 28-31 */
+  /* Device Size Information */
+  unsigned short :16;					/* Bytes 32-33 */
+  unsigned short DeviceBlockSizeInBytes;		/* Bytes 34-35 */
+  unsigned int OriginalDeviceSize;			/* Bytes 36-39 */
+  unsigned int ConfigurableDeviceSize;			/* Bytes 40-43 */
+  unsigned int :32;					/* Bytes 44-47 */
+  unsigned char LogicalDeviceName[32];			/* Bytes 48-79 */
+  unsigned char SCSI_InquiryData[36];			/* Bytes 80-115 */
+  unsigned char Reserved1[12];				/* Bytes 116-127 */
+  DAC960_ByteCount64_T LastReadBlockNumber;		/* Bytes 128-135 */
+  DAC960_ByteCount64_T LastWrittenBlockNumber;		/* Bytes 136-143 */
+  DAC960_ByteCount64_T ConsistencyCheckBlockNumber;	/* Bytes 144-151 */
+  DAC960_ByteCount64_T RebuildBlockNumber;		/* Bytes 152-159 */
+  DAC960_ByteCount64_T BackgroundInitializationBlockNumber; /* Bytes 160-167 */
+  DAC960_ByteCount64_T ForegroundInitializationBlockNumber; /* Bytes 168-175 */
+  DAC960_ByteCount64_T DataMigrationBlockNumber;	/* Bytes 176-183 */
+  DAC960_ByteCount64_T PatrolOperationBlockNumber;	/* Bytes 184-191 */
+  unsigned char Reserved2[64];				/* Bytes 192-255 */
+}
+DAC960_V2_LogicalDeviceInfo_T;
+
+
+/*
+  Define the DAC960 V2 Firmware Physical Device State type.
+*/
+
+typedef enum
+{
+    DAC960_V2_Device_Unconfigured =		0x00,
+    DAC960_V2_Device_Online =			0x01,
+    DAC960_V2_Device_Rebuild =			0x03,
+    DAC960_V2_Device_Missing =			0x04,
+    DAC960_V2_Device_Critical =			0x05,
+    DAC960_V2_Device_Dead =			0x08,
+    DAC960_V2_Device_SuspectedDead =		0x0C,
+    DAC960_V2_Device_CommandedOffline =		0x10,
+    DAC960_V2_Device_Standby =			0x21,
+    DAC960_V2_Device_InvalidState =		0xFF
+}
+__attribute__ ((packed))
+DAC960_V2_PhysicalDeviceState_T;
+
+
+/*
+  Define the DAC960 V2 Firmware Get Physical Device Info reply structure.
+*/
+
+typedef struct DAC960_V2_PhysicalDeviceInfo
+{
+  unsigned char :8;					/* Byte 0 */
+  unsigned char Channel;				/* Byte 1 */
+  unsigned char TargetID;				/* Byte 2 */
+  unsigned char LogicalUnit;				/* Byte 3 */
+  /* Configuration Status Bits */
+  bool PhysicalDeviceFaultTolerant:1;			/* Byte 4 Bit 0 */
+  bool PhysicalDeviceConnected:1;			/* Byte 4 Bit 1 */
+  bool PhysicalDeviceLocalToController:1;		/* Byte 4 Bit 2 */
+  unsigned char :5;					/* Byte 4 Bits 3-7 */
+  /* Multiple Host/Controller Status Bits */
+  bool RemoteHostSystemDead:1;				/* Byte 5 Bit 0 */
+  bool RemoteControllerDead:1;				/* Byte 5 Bit 1 */
+  unsigned char :6;					/* Byte 5 Bits 2-7 */
+  DAC960_V2_PhysicalDeviceState_T PhysicalDeviceState;	/* Byte 6 */
+  unsigned char NegotiatedDataWidthBits;		/* Byte 7 */
+  unsigned short NegotiatedSynchronousMegaTransfers;	/* Bytes 8-9 */
+  /* Multiported Physical Device Information */
+  unsigned char NumberOfPortConnections;		/* Byte 10 */
+  unsigned char DriveAccessibilityBitmap;		/* Byte 11 */
+  unsigned int :32;					/* Bytes 12-15 */
+  unsigned char NetworkAddress[16];			/* Bytes 16-31 */
+  unsigned short MaximumTags;				/* Bytes 32-33 */
+  /* Physical Device Operations Status */
+  bool ConsistencyCheckInProgress:1;			/* Byte 34 Bit 0 */
+  bool RebuildInProgress:1;				/* Byte 34 Bit 1 */
+  bool MakingDataConsistentInProgress:1;		/* Byte 34 Bit 2 */
+  bool PhysicalDeviceInitializationInProgress:1;	/* Byte 34 Bit 3 */
+  bool DataMigrationInProgress:1;			/* Byte 34 Bit 4 */
+  bool PatrolOperationInProgress:1;			/* Byte 34 Bit 5 */
+  unsigned char :2;					/* Byte 34 Bits 6-7 */
+  unsigned char LongOperationStatus;			/* Byte 35 */
+  unsigned char ParityErrors;				/* Byte 36 */
+  unsigned char SoftErrors;				/* Byte 37 */
+  unsigned char HardErrors;				/* Byte 38 */
+  unsigned char MiscellaneousErrors;			/* Byte 39 */
+  unsigned char CommandTimeouts;			/* Byte 40 */
+  unsigned char Retries;				/* Byte 41 */
+  unsigned char Aborts;					/* Byte 42 */
+  unsigned char PredictedFailuresDetected;		/* Byte 43 */
+  unsigned int :32;					/* Bytes 44-47 */
+  unsigned short :16;					/* Bytes 48-49 */
+  unsigned short DeviceBlockSizeInBytes;		/* Bytes 50-51 */
+  unsigned int OriginalDeviceSize;			/* Bytes 52-55 */
+  unsigned int ConfigurableDeviceSize;			/* Bytes 56-59 */
+  unsigned int :32;					/* Bytes 60-63 */
+  unsigned char PhysicalDeviceName[16];			/* Bytes 64-79 */
+  unsigned char Reserved1[16];				/* Bytes 80-95 */
+  unsigned char Reserved2[32];				/* Bytes 96-127 */
+  unsigned char SCSI_InquiryData[36];			/* Bytes 128-163 */
+  unsigned char Reserved3[20];				/* Bytes 164-183 */
+  unsigned char Reserved4[8];				/* Bytes 184-191 */
+  DAC960_ByteCount64_T LastReadBlockNumber;		/* Bytes 192-199 */
+  DAC960_ByteCount64_T LastWrittenBlockNumber;		/* Bytes 200-207 */
+  DAC960_ByteCount64_T ConsistencyCheckBlockNumber;	/* Bytes 208-215 */
+  DAC960_ByteCount64_T RebuildBlockNumber;		/* Bytes 216-223 */
+  DAC960_ByteCount64_T MakingDataConsistentBlockNumber;	/* Bytes 224-231 */
+  DAC960_ByteCount64_T DeviceInitializationBlockNumber; /* Bytes 232-239 */
+  DAC960_ByteCount64_T DataMigrationBlockNumber;	/* Bytes 240-247 */
+  DAC960_ByteCount64_T PatrolOperationBlockNumber;	/* Bytes 248-255 */
+  unsigned char Reserved5[256];				/* Bytes 256-511 */
+}
+DAC960_V2_PhysicalDeviceInfo_T;
+
+
+/*
+  Define the DAC960 V2 Firmware Health Status Buffer structure.
+*/
+
+typedef struct DAC960_V2_HealthStatusBuffer
+{
+  unsigned int MicrosecondsFromControllerStartTime;	/* Bytes 0-3 */
+  unsigned int MillisecondsFromControllerStartTime;	/* Bytes 4-7 */
+  unsigned int SecondsFrom1January1970;			/* Bytes 8-11 */
+  unsigned int :32;					/* Bytes 12-15 */
+  unsigned int StatusChangeCounter;			/* Bytes 16-19 */
+  unsigned int :32;					/* Bytes 20-23 */
+  unsigned int DebugOutputMessageBufferIndex;		/* Bytes 24-27 */
+  unsigned int CodedMessageBufferIndex;			/* Bytes 28-31 */
+  unsigned int CurrentTimeTracePageNumber;		/* Bytes 32-35 */
+  unsigned int CurrentProfilerPageNumber;		/* Bytes 36-39 */
+  unsigned int NextEventSequenceNumber;			/* Bytes 40-43 */
+  unsigned int :32;					/* Bytes 44-47 */
+  unsigned char Reserved1[16];				/* Bytes 48-63 */
+  unsigned char Reserved2[64];				/* Bytes 64-127 */
+}
+DAC960_V2_HealthStatusBuffer_T;
+
+
+/*
+  Define the DAC960 V2 Firmware Get Event reply structure.
+*/
+
+typedef struct DAC960_V2_Event
+{
+  unsigned int EventSequenceNumber;			/* Bytes 0-3 */
+  unsigned int EventTime;				/* Bytes 4-7 */
+  unsigned int EventCode;				/* Bytes 8-11 */
+  unsigned char :8;					/* Byte 12 */
+  unsigned char Channel;				/* Byte 13 */
+  unsigned char TargetID;				/* Byte 14 */
+  unsigned char LogicalUnit;				/* Byte 15 */
+  unsigned int :32;					/* Bytes 16-19 */
+  unsigned int EventSpecificParameter;			/* Bytes 20-23 */
+  unsigned char RequestSenseData[40];			/* Bytes 24-63 */
+}
+DAC960_V2_Event_T;
+
+
+/*
+  Define the DAC960 V2 Firmware Command Control Bits structure.
+*/
+
+typedef struct DAC960_V2_CommandControlBits
+{
+  bool ForceUnitAccess:1;				/* Byte 0 Bit 0 */
+  bool DisablePageOut:1;				/* Byte 0 Bit 1 */
+  bool :1;						/* Byte 0 Bit 2 */
+  bool AdditionalScatterGatherListMemory:1;		/* Byte 0 Bit 3 */
+  bool DataTransferControllerToHost:1;			/* Byte 0 Bit 4 */
+  bool :1;						/* Byte 0 Bit 5 */
+  bool NoAutoRequestSense:1;				/* Byte 0 Bit 6 */
+  bool DisconnectProhibited:1;				/* Byte 0 Bit 7 */
+}
+DAC960_V2_CommandControlBits_T;
+
+
+/*
+  Define the DAC960 V2 Firmware Command Timeout structure.
+*/
+
+typedef struct DAC960_V2_CommandTimeout
+{
+  unsigned char TimeoutValue:6;				/* Byte 0 Bits 0-5 */
+  enum {
+    DAC960_V2_TimeoutScale_Seconds =		0,
+    DAC960_V2_TimeoutScale_Minutes =		1,
+    DAC960_V2_TimeoutScale_Hours =		2,
+    DAC960_V2_TimeoutScale_Reserved =		3
+  } __attribute__ ((packed)) TimeoutScale:2;		/* Byte 0 Bits 6-7 */
+}
+DAC960_V2_CommandTimeout_T;
+
+
+/*
+  Define the DAC960 V2 Firmware Physical Device structure.
+*/
+
+typedef struct DAC960_V2_PhysicalDevice
+{
+  unsigned char LogicalUnit;				/* Byte 0 */
+  unsigned char TargetID;				/* Byte 1 */
+  unsigned char Channel:3;				/* Byte 2 Bits 0-2 */
+  unsigned char Controller:5;				/* Byte 2 Bits 3-7 */
+}
+__attribute__ ((packed))
+DAC960_V2_PhysicalDevice_T;
+
+
+/*
+  Define the DAC960 V2 Firmware Logical Device structure.
+*/
+
+typedef struct DAC960_V2_LogicalDevice
+{
+  unsigned short LogicalDeviceNumber;			/* Bytes 0-1 */
+  unsigned char :3;					/* Byte 2 Bits 0-2 */
+  unsigned char Controller:5;				/* Byte 2 Bits 3-7 */
+}
+__attribute__ ((packed))
+DAC960_V2_LogicalDevice_T;
+
+
+/*
+  Define the DAC960 V2 Firmware Operation Device type.
+*/
+
+typedef enum
+{
+  DAC960_V2_Physical_Device =			0x00,
+  DAC960_V2_RAID_Device =			0x01,
+  DAC960_V2_Physical_Channel =			0x02,
+  DAC960_V2_RAID_Channel =			0x03,
+  DAC960_V2_Physical_Controller =		0x04,
+  DAC960_V2_RAID_Controller =			0x05,
+  DAC960_V2_Configuration_Group =		0x10,
+  DAC960_V2_Enclosure =				0x11
+}
+__attribute__ ((packed))
+DAC960_V2_OperationDevice_T;
+
+
+/*
+  Define the DAC960 V2 Firmware Translate Physical To Logical Device structure.
+*/
+
+typedef struct DAC960_V2_PhysicalToLogicalDevice
+{
+  unsigned short LogicalDeviceNumber;			/* Bytes 0-1 */
+  unsigned short :16;					/* Bytes 2-3 */
+  unsigned char PreviousBootController;			/* Byte 4 */
+  unsigned char PreviousBootChannel;			/* Byte 5 */
+  unsigned char PreviousBootTargetID;			/* Byte 6 */
+  unsigned char PreviousBootLogicalUnit;		/* Byte 7 */
+}
+DAC960_V2_PhysicalToLogicalDevice_T;
+
+
+
+/*
+  Define the DAC960 V2 Firmware Scatter/Gather List Entry structure.
+*/
+
+typedef struct DAC960_V2_ScatterGatherSegment
+{
+  DAC960_BusAddress64_T SegmentDataPointer;		/* Bytes 0-7 */
+  DAC960_ByteCount64_T SegmentByteCount;		/* Bytes 8-15 */
+}
+DAC960_V2_ScatterGatherSegment_T;
+
+
+/*
+  Define the DAC960 V2 Firmware Data Transfer Memory Address structure.
+*/
+
+typedef union DAC960_V2_DataTransferMemoryAddress
+{
+  DAC960_V2_ScatterGatherSegment_T ScatterGatherSegments[2]; /* Bytes 0-31 */
+  struct {
+    unsigned short ScatterGatherList0Length;		/* Bytes 0-1 */
+    unsigned short ScatterGatherList1Length;		/* Bytes 2-3 */
+    unsigned short ScatterGatherList2Length;		/* Bytes 4-5 */
+    unsigned short :16;					/* Bytes 6-7 */
+    DAC960_BusAddress64_T ScatterGatherList0Address;	/* Bytes 8-15 */
+    DAC960_BusAddress64_T ScatterGatherList1Address;	/* Bytes 16-23 */
+    DAC960_BusAddress64_T ScatterGatherList2Address;	/* Bytes 24-31 */
+  } ExtendedScatterGather;
+}
+DAC960_V2_DataTransferMemoryAddress_T;
+
+
+/*
+  Define the 64 Byte DAC960 V2 Firmware Command Mailbox structure.
+*/
+
+typedef union DAC960_V2_CommandMailbox
+{
+  unsigned int Words[16];				/* Words 0-15 */
+  struct {
+    DAC960_V2_CommandIdentifier_T CommandIdentifier;	/* Bytes 0-1 */
+    DAC960_V2_CommandOpcode_T CommandOpcode;		/* Byte 2 */
+    DAC960_V2_CommandControlBits_T CommandControlBits;	/* Byte 3 */
+    DAC960_ByteCount32_T DataTransferSize:24;		/* Bytes 4-6 */
+    unsigned char DataTransferPageNumber;		/* Byte 7 */
+    DAC960_BusAddress64_T RequestSenseBusAddress;	/* Bytes 8-15 */
+    unsigned int :24;					/* Bytes 16-18 */
+    DAC960_V2_CommandTimeout_T CommandTimeout;		/* Byte 19 */
+    unsigned char RequestSenseSize;			/* Byte 20 */
+    unsigned char IOCTL_Opcode;				/* Byte 21 */
+    unsigned char Reserved[10];				/* Bytes 22-31 */
+    DAC960_V2_DataTransferMemoryAddress_T
+      DataTransferMemoryAddress;			/* Bytes 32-63 */
+  } Common;
+  struct {
+    DAC960_V2_CommandIdentifier_T CommandIdentifier;	/* Bytes 0-1 */
+    DAC960_V2_CommandOpcode_T CommandOpcode;		/* Byte 2 */
+    DAC960_V2_CommandControlBits_T CommandControlBits;	/* Byte 3 */
+    DAC960_ByteCount32_T DataTransferSize;		/* Bytes 4-7 */
+    DAC960_BusAddress64_T RequestSenseBusAddress;	/* Bytes 8-15 */
+    DAC960_V2_PhysicalDevice_T PhysicalDevice;		/* Bytes 16-18 */
+    DAC960_V2_CommandTimeout_T CommandTimeout;		/* Byte 19 */
+    unsigned char RequestSenseSize;			/* Byte 20 */
+    unsigned char CDBLength;				/* Byte 21 */
+    unsigned char SCSI_CDB[10];				/* Bytes 22-31 */
+    DAC960_V2_DataTransferMemoryAddress_T
+      DataTransferMemoryAddress;			/* Bytes 32-63 */
+  } SCSI_10;
+  struct {
+    DAC960_V2_CommandIdentifier_T CommandIdentifier;	/* Bytes 0-1 */
+    DAC960_V2_CommandOpcode_T CommandOpcode;		/* Byte 2 */
+    DAC960_V2_CommandControlBits_T CommandControlBits;	/* Byte 3 */
+    DAC960_ByteCount32_T DataTransferSize;		/* Bytes 4-7 */
+    DAC960_BusAddress64_T RequestSenseBusAddress;	/* Bytes 8-15 */
+    DAC960_V2_PhysicalDevice_T PhysicalDevice;		/* Bytes 16-18 */
+    DAC960_V2_CommandTimeout_T CommandTimeout;		/* Byte 19 */
+    unsigned char RequestSenseSize;			/* Byte 20 */
+    unsigned char CDBLength;				/* Byte 21 */
+    unsigned short :16;					/* Bytes 22-23 */
+    DAC960_BusAddress64_T SCSI_CDB_BusAddress;		/* Bytes 24-31 */
+    DAC960_V2_DataTransferMemoryAddress_T
+      DataTransferMemoryAddress;			/* Bytes 32-63 */
+  } SCSI_255;
+  struct {
+    DAC960_V2_CommandIdentifier_T CommandIdentifier;	/* Bytes 0-1 */
+    DAC960_V2_CommandOpcode_T CommandOpcode;		/* Byte 2 */
+    DAC960_V2_CommandControlBits_T CommandControlBits;	/* Byte 3 */
+    DAC960_ByteCount32_T DataTransferSize:24;		/* Bytes 4-6 */
+    unsigned char DataTransferPageNumber;		/* Byte 7 */
+    DAC960_BusAddress64_T RequestSenseBusAddress;	/* Bytes 8-15 */
+    unsigned short :16;					/* Bytes 16-17 */
+    unsigned char ControllerNumber;			/* Byte 18 */
+    DAC960_V2_CommandTimeout_T CommandTimeout;		/* Byte 19 */
+    unsigned char RequestSenseSize;			/* Byte 20 */
+    unsigned char IOCTL_Opcode;				/* Byte 21 */
+    unsigned char Reserved[10];				/* Bytes 22-31 */
+    DAC960_V2_DataTransferMemoryAddress_T
+      DataTransferMemoryAddress;			/* Bytes 32-63 */
+  } ControllerInfo;
+  struct {
+    DAC960_V2_CommandIdentifier_T CommandIdentifier;	/* Bytes 0-1 */
+    DAC960_V2_CommandOpcode_T CommandOpcode;		/* Byte 2 */
+    DAC960_V2_CommandControlBits_T CommandControlBits;	/* Byte 3 */
+    DAC960_ByteCount32_T DataTransferSize:24;		/* Bytes 4-6 */
+    unsigned char DataTransferPageNumber;		/* Byte 7 */
+    DAC960_BusAddress64_T RequestSenseBusAddress;	/* Bytes 8-15 */
+    DAC960_V2_LogicalDevice_T LogicalDevice;		/* Bytes 16-18 */
+    DAC960_V2_CommandTimeout_T CommandTimeout;		/* Byte 19 */
+    unsigned char RequestSenseSize;			/* Byte 20 */
+    unsigned char IOCTL_Opcode;				/* Byte 21 */
+    unsigned char Reserved[10];				/* Bytes 22-31 */
+    DAC960_V2_DataTransferMemoryAddress_T
+      DataTransferMemoryAddress;			/* Bytes 32-63 */
+  } LogicalDeviceInfo;
+  struct {
+    DAC960_V2_CommandIdentifier_T CommandIdentifier;	/* Bytes 0-1 */
+    DAC960_V2_CommandOpcode_T CommandOpcode;		/* Byte 2 */
+    DAC960_V2_CommandControlBits_T CommandControlBits;	/* Byte 3 */
+    DAC960_ByteCount32_T DataTransferSize:24;		/* Bytes 4-6 */
+    unsigned char DataTransferPageNumber;		/* Byte 7 */
+    DAC960_BusAddress64_T RequestSenseBusAddress;	/* Bytes 8-15 */
+    DAC960_V2_PhysicalDevice_T PhysicalDevice;		/* Bytes 16-18 */
+    DAC960_V2_CommandTimeout_T CommandTimeout;		/* Byte 19 */
+    unsigned char RequestSenseSize;			/* Byte 20 */
+    unsigned char IOCTL_Opcode;				/* Byte 21 */
+    unsigned char Reserved[10];				/* Bytes 22-31 */
+    DAC960_V2_DataTransferMemoryAddress_T
+      DataTransferMemoryAddress;			/* Bytes 32-63 */
+  } PhysicalDeviceInfo;
+  struct {
+    DAC960_V2_CommandIdentifier_T CommandIdentifier;	/* Bytes 0-1 */
+    DAC960_V2_CommandOpcode_T CommandOpcode;		/* Byte 2 */
+    DAC960_V2_CommandControlBits_T CommandControlBits;	/* Byte 3 */
+    DAC960_ByteCount32_T DataTransferSize:24;		/* Bytes 4-6 */
+    unsigned char DataTransferPageNumber;		/* Byte 7 */
+    DAC960_BusAddress64_T RequestSenseBusAddress;	/* Bytes 8-15 */
+    unsigned short EventSequenceNumberHigh16;		/* Bytes 16-17 */
+    unsigned char ControllerNumber;			/* Byte 18 */
+    DAC960_V2_CommandTimeout_T CommandTimeout;		/* Byte 19 */
+    unsigned char RequestSenseSize;			/* Byte 20 */
+    unsigned char IOCTL_Opcode;				/* Byte 21 */
+    unsigned short EventSequenceNumberLow16;		/* Bytes 22-23 */
+    unsigned char Reserved[8];				/* Bytes 24-31 */
+    DAC960_V2_DataTransferMemoryAddress_T
+      DataTransferMemoryAddress;			/* Bytes 32-63 */
+  } GetEvent;
+  struct {
+    DAC960_V2_CommandIdentifier_T CommandIdentifier;	/* Bytes 0-1 */
+    DAC960_V2_CommandOpcode_T CommandOpcode;		/* Byte 2 */
+    DAC960_V2_CommandControlBits_T CommandControlBits;	/* Byte 3 */
+    DAC960_ByteCount32_T DataTransferSize:24;		/* Bytes 4-6 */
+    unsigned char DataTransferPageNumber;		/* Byte 7 */
+    DAC960_BusAddress64_T RequestSenseBusAddress;	/* Bytes 8-15 */
+    DAC960_V2_LogicalDevice_T LogicalDevice;		/* Bytes 16-18 */
+    DAC960_V2_CommandTimeout_T CommandTimeout;		/* Byte 19 */
+    unsigned char RequestSenseSize;			/* Byte 20 */
+    unsigned char IOCTL_Opcode;				/* Byte 21 */
+    union {
+      DAC960_V2_LogicalDeviceState_T LogicalDeviceState;
+      DAC960_V2_PhysicalDeviceState_T PhysicalDeviceState;
+    } DeviceState;					/* Byte 22 */
+    unsigned char Reserved[9];				/* Bytes 23-31 */
+    DAC960_V2_DataTransferMemoryAddress_T
+      DataTransferMemoryAddress;			/* Bytes 32-63 */
+  } SetDeviceState;
+  struct {
+    DAC960_V2_CommandIdentifier_T CommandIdentifier;	/* Bytes 0-1 */
+    DAC960_V2_CommandOpcode_T CommandOpcode;		/* Byte 2 */
+    DAC960_V2_CommandControlBits_T CommandControlBits;	/* Byte 3 */
+    DAC960_ByteCount32_T DataTransferSize:24;		/* Bytes 4-6 */
+    unsigned char DataTransferPageNumber;		/* Byte 7 */
+    DAC960_BusAddress64_T RequestSenseBusAddress;	/* Bytes 8-15 */
+    DAC960_V2_LogicalDevice_T LogicalDevice;		/* Bytes 16-18 */
+    DAC960_V2_CommandTimeout_T CommandTimeout;		/* Byte 19 */
+    unsigned char RequestSenseSize;			/* Byte 20 */
+    unsigned char IOCTL_Opcode;				/* Byte 21 */
+    bool RestoreConsistency:1;				/* Byte 22 Bit 0 */
+    bool InitializedAreaOnly:1;				/* Byte 22 Bit 1 */
+    unsigned char :6;					/* Byte 22 Bits 2-7 */
+    unsigned char Reserved[9];				/* Bytes 23-31 */
+    DAC960_V2_DataTransferMemoryAddress_T
+      DataTransferMemoryAddress;			/* Bytes 32-63 */
+  } ConsistencyCheck;
+  struct {
+    DAC960_V2_CommandIdentifier_T CommandIdentifier;	/* Bytes 0-1 */
+    DAC960_V2_CommandOpcode_T CommandOpcode;		/* Byte 2 */
+    DAC960_V2_CommandControlBits_T CommandControlBits;	/* Byte 3 */
+    unsigned char FirstCommandMailboxSizeKB;		/* Byte 4 */
+    unsigned char FirstStatusMailboxSizeKB;		/* Byte 5 */
+    unsigned char SecondCommandMailboxSizeKB;		/* Byte 6 */
+    unsigned char SecondStatusMailboxSizeKB;		/* Byte 7 */
+    DAC960_BusAddress64_T RequestSenseBusAddress;	/* Bytes 8-15 */
+    unsigned int :24;					/* Bytes 16-18 */
+    DAC960_V2_CommandTimeout_T CommandTimeout;		/* Byte 19 */
+    unsigned char RequestSenseSize;			/* Byte 20 */
+    unsigned char IOCTL_Opcode;				/* Byte 21 */
+    unsigned char HealthStatusBufferSizeKB;		/* Byte 22 */
+    unsigned char :8;					/* Byte 23 */
+    DAC960_BusAddress64_T HealthStatusBufferBusAddress; /* Bytes 24-31 */
+    DAC960_BusAddress64_T FirstCommandMailboxBusAddress; /* Bytes 32-39 */
+    DAC960_BusAddress64_T FirstStatusMailboxBusAddress; /* Bytes 40-47 */
+    DAC960_BusAddress64_T SecondCommandMailboxBusAddress; /* Bytes 48-55 */
+    DAC960_BusAddress64_T SecondStatusMailboxBusAddress; /* Bytes 56-63 */
+  } SetMemoryMailbox;
+  struct {
+    DAC960_V2_CommandIdentifier_T CommandIdentifier;	/* Bytes 0-1 */
+    DAC960_V2_CommandOpcode_T CommandOpcode;		/* Byte 2 */
+    DAC960_V2_CommandControlBits_T CommandControlBits;	/* Byte 3 */
+    DAC960_ByteCount32_T DataTransferSize:24;		/* Bytes 4-6 */
+    unsigned char DataTransferPageNumber;		/* Byte 7 */
+    DAC960_BusAddress64_T RequestSenseBusAddress;	/* Bytes 8-15 */
+    DAC960_V2_PhysicalDevice_T PhysicalDevice;		/* Bytes 16-18 */
+    DAC960_V2_CommandTimeout_T CommandTimeout;		/* Byte 19 */
+    unsigned char RequestSenseSize;			/* Byte 20 */
+    unsigned char IOCTL_Opcode;				/* Byte 21 */
+    DAC960_V2_OperationDevice_T OperationDevice;	/* Byte 22 */
+    unsigned char Reserved[9];				/* Bytes 23-31 */
+    DAC960_V2_DataTransferMemoryAddress_T
+      DataTransferMemoryAddress;			/* Bytes 32-63 */
+  } DeviceOperation;
+}
+DAC960_V2_CommandMailbox_T;
+
+
+/*
+  Define the DAC960 Driver IOCTL requests.
+*/
+
+#define DAC960_IOCTL_GET_CONTROLLER_COUNT	0xDAC001
+#define DAC960_IOCTL_GET_CONTROLLER_INFO	0xDAC002
+#define DAC960_IOCTL_V1_EXECUTE_COMMAND		0xDAC003
+#define DAC960_IOCTL_V2_EXECUTE_COMMAND		0xDAC004
+#define DAC960_IOCTL_V2_GET_HEALTH_STATUS	0xDAC005
+
+
+/*
+  Define the DAC960_IOCTL_GET_CONTROLLER_INFO reply structure.
+*/
+
+typedef struct DAC960_ControllerInfo
+{
+  unsigned char ControllerNumber;
+  unsigned char FirmwareType;
+  unsigned char Channels;
+  unsigned char Targets;
+  unsigned char PCI_Bus;
+  unsigned char PCI_Device;
+  unsigned char PCI_Function;
+  unsigned char IRQ_Channel;
+  DAC960_PCI_Address_T PCI_Address;
+  unsigned char ModelName[20];
+  unsigned char FirmwareVersion[12];
+}
+DAC960_ControllerInfo_T;
+
+
+/*
+  Define the User Mode DAC960_IOCTL_V1_EXECUTE_COMMAND request structure.
+*/
+
+typedef struct DAC960_V1_UserCommand
+{
+  unsigned char ControllerNumber;
+  DAC960_V1_CommandMailbox_T CommandMailbox;
+  int DataTransferLength;
+  void __user *DataTransferBuffer;
+  DAC960_V1_DCDB_T __user *DCDB;
+}
+DAC960_V1_UserCommand_T;
+
+
+/*
+  Define the Kernel Mode DAC960_IOCTL_V1_EXECUTE_COMMAND request structure.
+*/
+
+typedef struct DAC960_V1_KernelCommand
+{
+  unsigned char ControllerNumber;
+  DAC960_V1_CommandMailbox_T CommandMailbox;
+  int DataTransferLength;
+  void *DataTransferBuffer;
+  DAC960_V1_DCDB_T *DCDB;
+  DAC960_V1_CommandStatus_T CommandStatus;
+  void (*CompletionFunction)(struct DAC960_V1_KernelCommand *);
+  void *CompletionData;
+}
+DAC960_V1_KernelCommand_T;
+
+
+/*
+  Define the User Mode DAC960_IOCTL_V2_EXECUTE_COMMAND request structure.
+*/
+
+typedef struct DAC960_V2_UserCommand
+{
+  unsigned char ControllerNumber;
+  DAC960_V2_CommandMailbox_T CommandMailbox;
+  int DataTransferLength;
+  int RequestSenseLength;
+  void __user *DataTransferBuffer;
+  void __user *RequestSenseBuffer;
+}
+DAC960_V2_UserCommand_T;
+
+
+/*
+  Define the Kernel Mode DAC960_IOCTL_V2_EXECUTE_COMMAND request structure.
+*/
+
+typedef struct DAC960_V2_KernelCommand
+{
+  unsigned char ControllerNumber;
+  DAC960_V2_CommandMailbox_T CommandMailbox;
+  int DataTransferLength;
+  int RequestSenseLength;
+  void *DataTransferBuffer;
+  void *RequestSenseBuffer;
+  DAC960_V2_CommandStatus_T CommandStatus;
+  void (*CompletionFunction)(struct DAC960_V2_KernelCommand *);
+  void *CompletionData;
+}
+DAC960_V2_KernelCommand_T;
+
+
+/*
+  Define the User Mode DAC960_IOCTL_V2_GET_HEALTH_STATUS request structure.
+*/
+
+typedef struct DAC960_V2_GetHealthStatus
+{
+  unsigned char ControllerNumber;
+  DAC960_V2_HealthStatusBuffer_T __user *HealthStatusBuffer;
+}
+DAC960_V2_GetHealthStatus_T;
+
+
+/*
+  Import the Kernel Mode IOCTL interface.
+*/
+
+extern int DAC960_KernelIOCTL(unsigned int Request, void *Argument);
+
+
+/*
+  DAC960_DriverVersion protects the private portion of this file.
+*/
+
+#ifdef DAC960_DriverVersion
+
+
+/*
+  Define the maximum Driver Queue Depth and Controller Queue Depth supported
+  by DAC960 V1 and V2 Firmware Controllers.
+*/
+
+#define DAC960_MaxDriverQueueDepth		511
+#define DAC960_MaxControllerQueueDepth		512
+
+
+/*
+  Define the maximum number of Scatter/Gather Segments supported for any
+  DAC960 V1 and V2 Firmware controller.
+*/
+
+#define DAC960_V1_ScatterGatherLimit		33
+#define DAC960_V2_ScatterGatherLimit		128
+
+
+/*
+  Define the number of Command Mailboxes and Status Mailboxes used by the
+  DAC960 V1 and V2 Firmware Memory Mailbox Interface.
+*/
+
+#define DAC960_V1_CommandMailboxCount		256
+#define DAC960_V1_StatusMailboxCount		1024
+#define DAC960_V2_CommandMailboxCount		512
+#define DAC960_V2_StatusMailboxCount		512
+
+
+/*
+  Define the DAC960 Controller Monitoring Timer Interval.
+*/
+
+#define DAC960_MonitoringTimerInterval		(10 * HZ)
+
+
+/*
+  Define the DAC960 Controller Secondary Monitoring Interval.
+*/
+
+#define DAC960_SecondaryMonitoringInterval	(60 * HZ)
+
+
+/*
+  Define the DAC960 Controller Health Status Monitoring Interval.
+*/
+
+#define DAC960_HealthStatusMonitoringInterval	(1 * HZ)
+
+
+/*
+  Define the DAC960 Controller Progress Reporting Interval.
+*/
+
+#define DAC960_ProgressReportingInterval	(60 * HZ)
+
+
+/*
+  Define the maximum number of Partitions allowed for each Logical Drive.
+*/
+
+#define DAC960_MaxPartitions			8
+#define DAC960_MaxPartitionsBits		3
+
+/*
+  Define the DAC960 Controller fixed Block Size and Block Size Bits.
+*/
+
+#define DAC960_BlockSize			512
+#define DAC960_BlockSizeBits			9
+
+
+/*
+  Define the number of Command structures that should be allocated as a
+  group to optimize kernel memory allocation.
+*/
+
+#define DAC960_V1_CommandAllocationGroupSize	11
+#define DAC960_V2_CommandAllocationGroupSize	29
+
+
+/*
+  Define the Controller Line Buffer, Progress Buffer, User Message, and
+  Initial Status Buffer sizes.
+*/
+
+#define DAC960_LineBufferSize			100
+#define DAC960_ProgressBufferSize		200
+#define DAC960_UserMessageSize			200
+#define DAC960_InitialStatusBufferSize		(8192-32)
+
+
+/*
+  Define the DAC960 Controller Firmware Types.
+*/
+
+typedef enum
+{
+  DAC960_V1_Controller =			1,
+  DAC960_V2_Controller =			2
+}
+DAC960_FirmwareType_T;
+
+
+/*
+  Define the DAC960 Controller Hardware Types.
+*/
+
+typedef enum
+{
+  DAC960_BA_Controller =			1,	/* eXtremeRAID 2000 */
+  DAC960_LP_Controller =			2,	/* AcceleRAID 352 */
+  DAC960_LA_Controller =			3,	/* DAC1164P */
+  DAC960_PG_Controller =			4,	/* DAC960PTL/PJ/PG */
+  DAC960_PD_Controller =			5,	/* DAC960PU/PD/PL/P */
+  DAC960_P_Controller =				6,	/* DAC960PU/PD/PL/P */
+  DAC960_GEM_Controller =			7,	/* AcceleRAID 4/5/600 */
+}
+DAC960_HardwareType_T;
+
+
+/*
+  Define the Driver Message Levels.
+*/
+
+typedef enum DAC960_MessageLevel
+{
+  DAC960_AnnounceLevel =			0,
+  DAC960_InfoLevel =				1,
+  DAC960_NoticeLevel =				2,
+  DAC960_WarningLevel =				3,
+  DAC960_ErrorLevel =				4,
+  DAC960_ProgressLevel =			5,
+  DAC960_CriticalLevel =			6,
+  DAC960_UserCriticalLevel =			7
+}
+DAC960_MessageLevel_T;
+
+static char
+  *DAC960_MessageLevelMap[] =
+    { KERN_NOTICE, KERN_NOTICE, KERN_NOTICE, KERN_WARNING,
+      KERN_ERR, KERN_CRIT, KERN_CRIT, KERN_CRIT };
+
+
+/*
+  Define Driver Message macros.
+*/
+
+#define DAC960_Announce(Format, Arguments...) \
+  DAC960_Message(DAC960_AnnounceLevel, Format, ##Arguments)
+
+#define DAC960_Info(Format, Arguments...) \
+  DAC960_Message(DAC960_InfoLevel, Format, ##Arguments)
+
+#define DAC960_Notice(Format, Arguments...) \
+  DAC960_Message(DAC960_NoticeLevel, Format, ##Arguments)
+
+#define DAC960_Warning(Format, Arguments...) \
+  DAC960_Message(DAC960_WarningLevel, Format, ##Arguments)
+
+#define DAC960_Error(Format, Arguments...) \
+  DAC960_Message(DAC960_ErrorLevel, Format, ##Arguments)
+
+#define DAC960_Progress(Format, Arguments...) \
+  DAC960_Message(DAC960_ProgressLevel, Format, ##Arguments)
+
+#define DAC960_Critical(Format, Arguments...) \
+  DAC960_Message(DAC960_CriticalLevel, Format, ##Arguments)
+
+#define DAC960_UserCritical(Format, Arguments...) \
+  DAC960_Message(DAC960_UserCriticalLevel, Format, ##Arguments)
+
+
+struct DAC960_privdata {
+	DAC960_HardwareType_T	HardwareType;
+	DAC960_FirmwareType_T	FirmwareType;
+	irq_handler_t		InterruptHandler;
+	unsigned int		MemoryWindowSize;
+};
+
+
+/*
+  Define the DAC960 V1 Firmware Controller Status Mailbox structure.
+*/
+
+typedef union DAC960_V1_StatusMailbox
+{
+  unsigned int Word;					/* Word 0 */
+  struct {
+    DAC960_V1_CommandIdentifier_T CommandIdentifier;	/* Byte 0 */
+    unsigned char :7;					/* Byte 1 Bits 0-6 */
+    bool Valid:1;					/* Byte 1 Bit 7 */
+    DAC960_V1_CommandStatus_T CommandStatus;		/* Bytes 2-3 */
+  } Fields;
+}
+DAC960_V1_StatusMailbox_T;
+
+
+/*
+  Define the DAC960 V2 Firmware Controller Status Mailbox structure.
+*/
+
+typedef union DAC960_V2_StatusMailbox
+{
+  unsigned int Words[2];				/* Words 0-1 */
+  struct {
+    DAC960_V2_CommandIdentifier_T CommandIdentifier;	/* Bytes 0-1 */
+    DAC960_V2_CommandStatus_T CommandStatus;		/* Byte 2 */
+    unsigned char RequestSenseLength;			/* Byte 3 */
+    int DataTransferResidue;				/* Bytes 4-7 */
+  } Fields;
+}
+DAC960_V2_StatusMailbox_T;
+
+
+/*
+  Define the DAC960 Driver Command Types.
+*/
+
+typedef enum
+{
+  DAC960_ReadCommand =				1,
+  DAC960_WriteCommand =				2,
+  DAC960_ReadRetryCommand =			3,
+  DAC960_WriteRetryCommand =			4,
+  DAC960_MonitoringCommand =			5,
+  DAC960_ImmediateCommand =			6,
+  DAC960_QueuedCommand =			7
+}
+DAC960_CommandType_T;
+
+
+/*
+  Define the DAC960 Driver Command structure.
+*/
+
+typedef struct DAC960_Command
+{
+  int CommandIdentifier;
+  DAC960_CommandType_T CommandType;
+  struct DAC960_Controller *Controller;
+  struct DAC960_Command *Next;
+  struct completion *Completion;
+  unsigned int LogicalDriveNumber;
+  unsigned int BlockNumber;
+  unsigned int BlockCount;
+  unsigned int SegmentCount;
+  int	DmaDirection;
+  struct scatterlist *cmd_sglist;
+  struct request *Request;
+  union {
+    struct {
+      DAC960_V1_CommandMailbox_T CommandMailbox;
+      DAC960_V1_KernelCommand_T *KernelCommand;
+      DAC960_V1_CommandStatus_T CommandStatus;
+      DAC960_V1_ScatterGatherSegment_T *ScatterGatherList;
+      dma_addr_t ScatterGatherListDMA;
+      struct scatterlist ScatterList[DAC960_V1_ScatterGatherLimit];
+      unsigned int EndMarker[0];
+    } V1;
+    struct {
+      DAC960_V2_CommandMailbox_T CommandMailbox;
+      DAC960_V2_KernelCommand_T *KernelCommand;
+      DAC960_V2_CommandStatus_T CommandStatus;
+      unsigned char RequestSenseLength;
+      int DataTransferResidue;
+      DAC960_V2_ScatterGatherSegment_T *ScatterGatherList;
+      dma_addr_t ScatterGatherListDMA;
+      DAC960_SCSI_RequestSense_T *RequestSense;
+      dma_addr_t RequestSenseDMA;
+      struct scatterlist ScatterList[DAC960_V2_ScatterGatherLimit];
+      unsigned int EndMarker[0];
+    } V2;
+  } FW;
+}
+DAC960_Command_T;
+
+
+/*
+  Define the DAC960 Driver Controller structure.
+*/
+
+typedef struct DAC960_Controller
+{
+  void __iomem *BaseAddress;
+  void __iomem *MemoryMappedAddress;
+  DAC960_FirmwareType_T FirmwareType;
+  DAC960_HardwareType_T HardwareType;
+  DAC960_IO_Address_T IO_Address;
+  DAC960_PCI_Address_T PCI_Address;
+  struct pci_dev *PCIDevice;
+  unsigned char ControllerNumber;
+  unsigned char ControllerName[4];
+  unsigned char ModelName[20];
+  unsigned char FullModelName[28];
+  unsigned char FirmwareVersion[12];
+  unsigned char Bus;
+  unsigned char Device;
+  unsigned char Function;
+  unsigned char IRQ_Channel;
+  unsigned char Channels;
+  unsigned char Targets;
+  unsigned char MemorySize;
+  unsigned char LogicalDriveCount;
+  unsigned short CommandAllocationGroupSize;
+  unsigned short ControllerQueueDepth;
+  unsigned short DriverQueueDepth;
+  unsigned short MaxBlocksPerCommand;
+  unsigned short ControllerScatterGatherLimit;
+  unsigned short DriverScatterGatherLimit;
+  u64		BounceBufferLimit;
+  unsigned int CombinedStatusBufferLength;
+  unsigned int InitialStatusLength;
+  unsigned int CurrentStatusLength;
+  unsigned int ProgressBufferLength;
+  unsigned int UserStatusLength;
+  struct dma_loaf DmaPages;
+  unsigned long MonitoringTimerCount;
+  unsigned long PrimaryMonitoringTime;
+  unsigned long SecondaryMonitoringTime;
+  unsigned long ShutdownMonitoringTimer;
+  unsigned long LastProgressReportTime;
+  unsigned long LastCurrentStatusTime;
+  bool ControllerInitialized;
+  bool MonitoringCommandDeferred;
+  bool EphemeralProgressMessage;
+  bool DriveSpinUpMessageDisplayed;
+  bool MonitoringAlertMode;
+  bool SuppressEnclosureMessages;
+  struct timer_list MonitoringTimer;
+  struct gendisk *disks[DAC960_MaxLogicalDrives];
+  struct pci_pool *ScatterGatherPool;
+  DAC960_Command_T *FreeCommands;
+  unsigned char *CombinedStatusBuffer;
+  unsigned char *CurrentStatusBuffer;
+  struct request_queue *RequestQueue[DAC960_MaxLogicalDrives];
+  int req_q_index;
+  spinlock_t queue_lock;
+  wait_queue_head_t CommandWaitQueue;
+  wait_queue_head_t HealthStatusWaitQueue;
+  DAC960_Command_T InitialCommand;
+  DAC960_Command_T *Commands[DAC960_MaxDriverQueueDepth];
+  struct proc_dir_entry *ControllerProcEntry;
+  bool LogicalDriveInitiallyAccessible[DAC960_MaxLogicalDrives];
+  void (*QueueCommand)(DAC960_Command_T *Command);
+  bool (*ReadControllerConfiguration)(struct DAC960_Controller *);
+  bool (*ReadDeviceConfiguration)(struct DAC960_Controller *);
+  bool (*ReportDeviceConfiguration)(struct DAC960_Controller *);
+  void (*QueueReadWriteCommand)(DAC960_Command_T *Command);
+  union {
+    struct {
+      unsigned char GeometryTranslationHeads;
+      unsigned char GeometryTranslationSectors;
+      unsigned char PendingRebuildFlag;
+      unsigned short StripeSize;
+      unsigned short SegmentSize;
+      unsigned short NewEventLogSequenceNumber;
+      unsigned short OldEventLogSequenceNumber;
+      unsigned short DeviceStateChannel;
+      unsigned short DeviceStateTargetID;
+      bool DualModeMemoryMailboxInterface;
+      bool BackgroundInitializationStatusSupported;
+      bool SAFTE_EnclosureManagementEnabled;
+      bool NeedLogicalDriveInformation;
+      bool NeedErrorTableInformation;
+      bool NeedDeviceStateInformation;
+      bool NeedDeviceInquiryInformation;
+      bool NeedDeviceSerialNumberInformation;
+      bool NeedRebuildProgress;
+      bool NeedConsistencyCheckProgress;
+      bool NeedBackgroundInitializationStatus;
+      bool StartDeviceStateScan;
+      bool RebuildProgressFirst;
+      bool RebuildFlagPending;
+      bool RebuildStatusPending;
+
+      dma_addr_t	FirstCommandMailboxDMA;
+      DAC960_V1_CommandMailbox_T *FirstCommandMailbox;
+      DAC960_V1_CommandMailbox_T *LastCommandMailbox;
+      DAC960_V1_CommandMailbox_T *NextCommandMailbox;
+      DAC960_V1_CommandMailbox_T *PreviousCommandMailbox1;
+      DAC960_V1_CommandMailbox_T *PreviousCommandMailbox2;
+
+      dma_addr_t	FirstStatusMailboxDMA;
+      DAC960_V1_StatusMailbox_T *FirstStatusMailbox;
+      DAC960_V1_StatusMailbox_T *LastStatusMailbox;
+      DAC960_V1_StatusMailbox_T *NextStatusMailbox;
+
+      DAC960_V1_DCDB_T *MonitoringDCDB;
+      dma_addr_t MonitoringDCDB_DMA;
+
+      DAC960_V1_Enquiry_T Enquiry;
+      DAC960_V1_Enquiry_T *NewEnquiry;
+      dma_addr_t NewEnquiryDMA;
+
+      DAC960_V1_ErrorTable_T ErrorTable;
+      DAC960_V1_ErrorTable_T *NewErrorTable;
+      dma_addr_t NewErrorTableDMA;
+
+      DAC960_V1_EventLogEntry_T *EventLogEntry;
+      dma_addr_t EventLogEntryDMA;
+
+      DAC960_V1_RebuildProgress_T *RebuildProgress;
+      dma_addr_t RebuildProgressDMA;
+      DAC960_V1_CommandStatus_T LastRebuildStatus;
+      DAC960_V1_CommandStatus_T PendingRebuildStatus;
+
+      DAC960_V1_LogicalDriveInformationArray_T LogicalDriveInformation;
+      DAC960_V1_LogicalDriveInformationArray_T *NewLogicalDriveInformation;
+      dma_addr_t NewLogicalDriveInformationDMA;
+
+      DAC960_V1_BackgroundInitializationStatus_T
+        	*BackgroundInitializationStatus;
+      dma_addr_t BackgroundInitializationStatusDMA;
+      DAC960_V1_BackgroundInitializationStatus_T
+        	LastBackgroundInitializationStatus;
+
+      DAC960_V1_DeviceState_T
+	DeviceState[DAC960_V1_MaxChannels][DAC960_V1_MaxTargets];
+      DAC960_V1_DeviceState_T *NewDeviceState;
+      dma_addr_t	NewDeviceStateDMA;
+
+      DAC960_SCSI_Inquiry_T
+	InquiryStandardData[DAC960_V1_MaxChannels][DAC960_V1_MaxTargets];
+      DAC960_SCSI_Inquiry_T *NewInquiryStandardData;
+      dma_addr_t NewInquiryStandardDataDMA;
+
+      DAC960_SCSI_Inquiry_UnitSerialNumber_T
+	InquiryUnitSerialNumber[DAC960_V1_MaxChannels][DAC960_V1_MaxTargets];
+      DAC960_SCSI_Inquiry_UnitSerialNumber_T *NewInquiryUnitSerialNumber;
+      dma_addr_t NewInquiryUnitSerialNumberDMA;
+
+      int DeviceResetCount[DAC960_V1_MaxChannels][DAC960_V1_MaxTargets];
+      bool DirectCommandActive[DAC960_V1_MaxChannels][DAC960_V1_MaxTargets];
+    } V1;
+    struct {
+      unsigned int StatusChangeCounter;
+      unsigned int NextEventSequenceNumber;
+      unsigned int PhysicalDeviceIndex;
+      bool NeedLogicalDeviceInformation;
+      bool NeedPhysicalDeviceInformation;
+      bool NeedDeviceSerialNumberInformation;
+      bool StartLogicalDeviceInformationScan;
+      bool StartPhysicalDeviceInformationScan;
+      struct pci_pool *RequestSensePool;
+
+      dma_addr_t	FirstCommandMailboxDMA;
+      DAC960_V2_CommandMailbox_T *FirstCommandMailbox;
+      DAC960_V2_CommandMailbox_T *LastCommandMailbox;
+      DAC960_V2_CommandMailbox_T *NextCommandMailbox;
+      DAC960_V2_CommandMailbox_T *PreviousCommandMailbox1;
+      DAC960_V2_CommandMailbox_T *PreviousCommandMailbox2;
+
+      dma_addr_t	FirstStatusMailboxDMA;
+      DAC960_V2_StatusMailbox_T *FirstStatusMailbox;
+      DAC960_V2_StatusMailbox_T *LastStatusMailbox;
+      DAC960_V2_StatusMailbox_T *NextStatusMailbox;
+
+      dma_addr_t	HealthStatusBufferDMA;
+      DAC960_V2_HealthStatusBuffer_T *HealthStatusBuffer;
+
+      DAC960_V2_ControllerInfo_T ControllerInformation;
+      DAC960_V2_ControllerInfo_T *NewControllerInformation;
+      dma_addr_t	NewControllerInformationDMA;
+
+      DAC960_V2_LogicalDeviceInfo_T
+	*LogicalDeviceInformation[DAC960_MaxLogicalDrives];
+      DAC960_V2_LogicalDeviceInfo_T *NewLogicalDeviceInformation;
+      dma_addr_t	 NewLogicalDeviceInformationDMA;
+
+      DAC960_V2_PhysicalDeviceInfo_T
+	*PhysicalDeviceInformation[DAC960_V2_MaxPhysicalDevices];
+      DAC960_V2_PhysicalDeviceInfo_T *NewPhysicalDeviceInformation;
+      dma_addr_t	NewPhysicalDeviceInformationDMA;
+
+      DAC960_SCSI_Inquiry_UnitSerialNumber_T *NewInquiryUnitSerialNumber;
+      dma_addr_t	NewInquiryUnitSerialNumberDMA;
+      DAC960_SCSI_Inquiry_UnitSerialNumber_T
+	*InquiryUnitSerialNumber[DAC960_V2_MaxPhysicalDevices];
+
+      DAC960_V2_Event_T *Event;
+      dma_addr_t EventDMA;
+
+      DAC960_V2_PhysicalToLogicalDevice_T *PhysicalToLogicalDevice;
+      dma_addr_t PhysicalToLogicalDeviceDMA;
+
+      DAC960_V2_PhysicalDevice_T
+	LogicalDriveToVirtualDevice[DAC960_MaxLogicalDrives];
+      bool LogicalDriveFoundDuringScan[DAC960_MaxLogicalDrives];
+    } V2;
+  } FW;
+  unsigned char ProgressBuffer[DAC960_ProgressBufferSize];
+  unsigned char UserStatusBuffer[DAC960_UserMessageSize];
+}
+DAC960_Controller_T;
+
+
+/*
+  Simplify access to Firmware Version Dependent Data Structure Components
+  and Functions.
+*/
+
+#define V1				FW.V1
+#define V2				FW.V2
+#define DAC960_QueueCommand(Command) \
+  (Controller->QueueCommand)(Command)
+#define DAC960_ReadControllerConfiguration(Controller) \
+  (Controller->ReadControllerConfiguration)(Controller)
+#define DAC960_ReadDeviceConfiguration(Controller) \
+  (Controller->ReadDeviceConfiguration)(Controller)
+#define DAC960_ReportDeviceConfiguration(Controller) \
+  (Controller->ReportDeviceConfiguration)(Controller)
+#define DAC960_QueueReadWriteCommand(Command) \
+  (Controller->QueueReadWriteCommand)(Command)
+
+/*
+ * dma_addr_writeql is provided to write dma_addr_t types
+ * to a 64-bit pci address space register.  The controller
+ * will accept having the register written as two 32-bit
+ * values.
+ *
+ * In HIGHMEM kernels, dma_addr_t is a 64-bit value.
+ * without HIGHMEM,  dma_addr_t is a 32-bit value.
+ *
+ * The compiler should always fix up the assignment
+ * to u.wq appropriately, depending upon the size of
+ * dma_addr_t.
+ */
+static inline
+void dma_addr_writeql(dma_addr_t addr, void __iomem *write_address)
+{
+	union {
+		u64 wq;
+		uint wl[2];
+	} u;
+
+	u.wq = addr;
+
+	writel(u.wl[0], write_address);
+	writel(u.wl[1], write_address + 4);
+}
+
+/*
+  Define the DAC960 GEM Series Controller Interface Register Offsets.
+ */
+
+#define DAC960_GEM_RegisterWindowSize	0x600
+
+typedef enum
+{
+  DAC960_GEM_InboundDoorBellRegisterReadSetOffset   =   0x214,
+  DAC960_GEM_InboundDoorBellRegisterClearOffset     =   0x218,
+  DAC960_GEM_OutboundDoorBellRegisterReadSetOffset  =   0x224,
+  DAC960_GEM_OutboundDoorBellRegisterClearOffset    =   0x228,
+  DAC960_GEM_InterruptStatusRegisterOffset          =   0x208,
+  DAC960_GEM_InterruptMaskRegisterReadSetOffset     =   0x22C,
+  DAC960_GEM_InterruptMaskRegisterClearOffset       =   0x230,
+  DAC960_GEM_CommandMailboxBusAddressOffset         =   0x510,
+  DAC960_GEM_CommandStatusOffset                    =   0x518,
+  DAC960_GEM_ErrorStatusRegisterReadSetOffset       =   0x224,
+  DAC960_GEM_ErrorStatusRegisterClearOffset         =   0x228,
+}
+DAC960_GEM_RegisterOffsets_T;
+
+/*
+  Define the structure of the DAC960 GEM Series Inbound Door Bell
+ */
+
+typedef union DAC960_GEM_InboundDoorBellRegister
+{
+  unsigned int All;
+  struct {
+    unsigned int :24;
+    bool HardwareMailboxNewCommand:1;
+    bool AcknowledgeHardwareMailboxStatus:1;
+    bool GenerateInterrupt:1;
+    bool ControllerReset:1;
+    bool MemoryMailboxNewCommand:1;
+    unsigned int :3;
+  } Write;
+  struct {
+    unsigned int :24;
+    bool HardwareMailboxFull:1;
+    bool InitializationInProgress:1;
+    unsigned int :6;
+  } Read;
+}
+DAC960_GEM_InboundDoorBellRegister_T;
+
+/*
+  Define the structure of the DAC960 GEM Series Outbound Door Bell Register.
+ */
+typedef union DAC960_GEM_OutboundDoorBellRegister
+{
+  unsigned int All;
+  struct {
+    unsigned int :24;
+    bool AcknowledgeHardwareMailboxInterrupt:1;
+    bool AcknowledgeMemoryMailboxInterrupt:1;
+    unsigned int :6;
+  } Write;
+  struct {
+    unsigned int :24;
+    bool HardwareMailboxStatusAvailable:1;
+    bool MemoryMailboxStatusAvailable:1;
+    unsigned int :6;
+  } Read;
+}
+DAC960_GEM_OutboundDoorBellRegister_T;
+
+/*
+  Define the structure of the DAC960 GEM Series Interrupt Mask Register.
+ */
+typedef union DAC960_GEM_InterruptMaskRegister
+{
+  unsigned int All;
+  struct {
+    unsigned int :16;
+    unsigned int :8;
+    unsigned int HardwareMailboxInterrupt:1;
+    unsigned int MemoryMailboxInterrupt:1;
+    unsigned int :6;
+  } Bits;
+}
+DAC960_GEM_InterruptMaskRegister_T;
+
+/*
+  Define the structure of the DAC960 GEM Series Error Status Register.
+ */
+
+typedef union DAC960_GEM_ErrorStatusRegister
+{
+  unsigned int All;
+  struct {
+    unsigned int :24;
+    unsigned int :5;
+    bool ErrorStatusPending:1;
+    unsigned int :2;
+  } Bits;
+}
+DAC960_GEM_ErrorStatusRegister_T;
+
+/*
+  Define inline functions to provide an abstraction for reading and writing the
+  DAC960 GEM Series Controller Interface Registers.
+*/
+
+static inline
+void DAC960_GEM_HardwareMailboxNewCommand(void __iomem *ControllerBaseAddress)
+{
+  DAC960_GEM_InboundDoorBellRegister_T InboundDoorBellRegister;
+  InboundDoorBellRegister.All = 0;
+  InboundDoorBellRegister.Write.HardwareMailboxNewCommand = true;
+  writel(InboundDoorBellRegister.All,
+	 ControllerBaseAddress + DAC960_GEM_InboundDoorBellRegisterReadSetOffset);
+}
+
+static inline
+void DAC960_GEM_AcknowledgeHardwareMailboxStatus(void __iomem *ControllerBaseAddress)
+{
+  DAC960_GEM_InboundDoorBellRegister_T InboundDoorBellRegister;
+  InboundDoorBellRegister.All = 0;
+  InboundDoorBellRegister.Write.AcknowledgeHardwareMailboxStatus = true;
+  writel(InboundDoorBellRegister.All,
+	 ControllerBaseAddress + DAC960_GEM_InboundDoorBellRegisterClearOffset);
+}
+
+static inline
+void DAC960_GEM_GenerateInterrupt(void __iomem *ControllerBaseAddress)
+{
+  DAC960_GEM_InboundDoorBellRegister_T InboundDoorBellRegister;
+  InboundDoorBellRegister.All = 0;
+  InboundDoorBellRegister.Write.GenerateInterrupt = true;
+  writel(InboundDoorBellRegister.All,
+	 ControllerBaseAddress + DAC960_GEM_InboundDoorBellRegisterReadSetOffset);
+}
+
+static inline
+void DAC960_GEM_ControllerReset(void __iomem *ControllerBaseAddress)
+{
+  DAC960_GEM_InboundDoorBellRegister_T InboundDoorBellRegister;
+  InboundDoorBellRegister.All = 0;
+  InboundDoorBellRegister.Write.ControllerReset = true;
+  writel(InboundDoorBellRegister.All,
+	 ControllerBaseAddress + DAC960_GEM_InboundDoorBellRegisterReadSetOffset);
+}
+
+static inline
+void DAC960_GEM_MemoryMailboxNewCommand(void __iomem *ControllerBaseAddress)
+{
+  DAC960_GEM_InboundDoorBellRegister_T InboundDoorBellRegister;
+  InboundDoorBellRegister.All = 0;
+  InboundDoorBellRegister.Write.MemoryMailboxNewCommand = true;
+  writel(InboundDoorBellRegister.All,
+	 ControllerBaseAddress + DAC960_GEM_InboundDoorBellRegisterReadSetOffset);
+}
+
+static inline
+bool DAC960_GEM_HardwareMailboxFullP(void __iomem *ControllerBaseAddress)
+{
+  DAC960_GEM_InboundDoorBellRegister_T InboundDoorBellRegister;
+  InboundDoorBellRegister.All =
+    readl(ControllerBaseAddress +
+          DAC960_GEM_InboundDoorBellRegisterReadSetOffset);
+  return InboundDoorBellRegister.Read.HardwareMailboxFull;
+}
+
+static inline
+bool DAC960_GEM_InitializationInProgressP(void __iomem *ControllerBaseAddress)
+{
+  DAC960_GEM_InboundDoorBellRegister_T InboundDoorBellRegister;
+  InboundDoorBellRegister.All =
+    readl(ControllerBaseAddress +
+          DAC960_GEM_InboundDoorBellRegisterReadSetOffset);
+  return InboundDoorBellRegister.Read.InitializationInProgress;
+}
+
+static inline
+void DAC960_GEM_AcknowledgeHardwareMailboxInterrupt(void __iomem *ControllerBaseAddress)
+{
+  DAC960_GEM_OutboundDoorBellRegister_T OutboundDoorBellRegister;
+  OutboundDoorBellRegister.All = 0;
+  OutboundDoorBellRegister.Write.AcknowledgeHardwareMailboxInterrupt = true;
+  writel(OutboundDoorBellRegister.All,
+	 ControllerBaseAddress + DAC960_GEM_OutboundDoorBellRegisterClearOffset);
+}
+
+static inline
+void DAC960_GEM_AcknowledgeMemoryMailboxInterrupt(void __iomem *ControllerBaseAddress)
+{
+  DAC960_GEM_OutboundDoorBellRegister_T OutboundDoorBellRegister;
+  OutboundDoorBellRegister.All = 0;
+  OutboundDoorBellRegister.Write.AcknowledgeMemoryMailboxInterrupt = true;
+  writel(OutboundDoorBellRegister.All,
+	 ControllerBaseAddress + DAC960_GEM_OutboundDoorBellRegisterClearOffset);
+}
+
+static inline
+void DAC960_GEM_AcknowledgeInterrupt(void __iomem *ControllerBaseAddress)
+{
+  DAC960_GEM_OutboundDoorBellRegister_T OutboundDoorBellRegister;
+  OutboundDoorBellRegister.All = 0;
+  OutboundDoorBellRegister.Write.AcknowledgeHardwareMailboxInterrupt = true;
+  OutboundDoorBellRegister.Write.AcknowledgeMemoryMailboxInterrupt = true;
+  writel(OutboundDoorBellRegister.All,
+	 ControllerBaseAddress + DAC960_GEM_OutboundDoorBellRegisterClearOffset);
+}
+
+static inline
+bool DAC960_GEM_HardwareMailboxStatusAvailableP(void __iomem *ControllerBaseAddress)
+{
+  DAC960_GEM_OutboundDoorBellRegister_T OutboundDoorBellRegister;
+  OutboundDoorBellRegister.All =
+    readl(ControllerBaseAddress +
+          DAC960_GEM_OutboundDoorBellRegisterReadSetOffset);
+  return OutboundDoorBellRegister.Read.HardwareMailboxStatusAvailable;
+}
+
+static inline
+bool DAC960_GEM_MemoryMailboxStatusAvailableP(void __iomem *ControllerBaseAddress)
+{
+  DAC960_GEM_OutboundDoorBellRegister_T OutboundDoorBellRegister;
+  OutboundDoorBellRegister.All =
+    readl(ControllerBaseAddress +
+          DAC960_GEM_OutboundDoorBellRegisterReadSetOffset);
+  return OutboundDoorBellRegister.Read.MemoryMailboxStatusAvailable;
+}
+
+static inline
+void DAC960_GEM_EnableInterrupts(void __iomem *ControllerBaseAddress)
+{
+  DAC960_GEM_InterruptMaskRegister_T InterruptMaskRegister;
+  InterruptMaskRegister.All = 0;
+  InterruptMaskRegister.Bits.HardwareMailboxInterrupt = true;
+  InterruptMaskRegister.Bits.MemoryMailboxInterrupt = true;
+  writel(InterruptMaskRegister.All,
+	 ControllerBaseAddress + DAC960_GEM_InterruptMaskRegisterClearOffset);
+}
+
+static inline
+void DAC960_GEM_DisableInterrupts(void __iomem *ControllerBaseAddress)
+{
+  DAC960_GEM_InterruptMaskRegister_T InterruptMaskRegister;
+  InterruptMaskRegister.All = 0;
+  InterruptMaskRegister.Bits.HardwareMailboxInterrupt = true;
+  InterruptMaskRegister.Bits.MemoryMailboxInterrupt = true;
+  writel(InterruptMaskRegister.All,
+	 ControllerBaseAddress + DAC960_GEM_InterruptMaskRegisterReadSetOffset);
+}
+
+static inline
+bool DAC960_GEM_InterruptsEnabledP(void __iomem *ControllerBaseAddress)
+{
+  DAC960_GEM_InterruptMaskRegister_T InterruptMaskRegister;
+  InterruptMaskRegister.All =
+    readl(ControllerBaseAddress +
+          DAC960_GEM_InterruptMaskRegisterReadSetOffset);
+  return !(InterruptMaskRegister.Bits.HardwareMailboxInterrupt ||
+           InterruptMaskRegister.Bits.MemoryMailboxInterrupt);
+}
+
+static inline
+void DAC960_GEM_WriteCommandMailbox(DAC960_V2_CommandMailbox_T
+				     *MemoryCommandMailbox,
+				   DAC960_V2_CommandMailbox_T
+				     *CommandMailbox)
+{
+  memcpy(&MemoryCommandMailbox->Words[1], &CommandMailbox->Words[1],
+	 sizeof(DAC960_V2_CommandMailbox_T) - sizeof(unsigned int));
+  wmb();
+  MemoryCommandMailbox->Words[0] = CommandMailbox->Words[0];
+  mb();
+}
+
+static inline
+void DAC960_GEM_WriteHardwareMailbox(void __iomem *ControllerBaseAddress,
+				    dma_addr_t CommandMailboxDMA)
+{
+	dma_addr_writeql(CommandMailboxDMA,
+		ControllerBaseAddress +
+		DAC960_GEM_CommandMailboxBusAddressOffset);
+}
+
+static inline DAC960_V2_CommandIdentifier_T
+DAC960_GEM_ReadCommandIdentifier(void __iomem *ControllerBaseAddress)
+{
+  return readw(ControllerBaseAddress + DAC960_GEM_CommandStatusOffset);
+}
+
+static inline DAC960_V2_CommandStatus_T
+DAC960_GEM_ReadCommandStatus(void __iomem *ControllerBaseAddress)
+{
+  return readw(ControllerBaseAddress + DAC960_GEM_CommandStatusOffset + 2);
+}
+
+static inline bool
+DAC960_GEM_ReadErrorStatus(void __iomem *ControllerBaseAddress,
+			  unsigned char *ErrorStatus,
+			  unsigned char *Parameter0,
+			  unsigned char *Parameter1)
+{
+  DAC960_GEM_ErrorStatusRegister_T ErrorStatusRegister;
+  ErrorStatusRegister.All =
+    readl(ControllerBaseAddress + DAC960_GEM_ErrorStatusRegisterReadSetOffset);
+  if (!ErrorStatusRegister.Bits.ErrorStatusPending) return false;
+  ErrorStatusRegister.Bits.ErrorStatusPending = false;
+  *ErrorStatus = ErrorStatusRegister.All;
+  *Parameter0 =
+    readb(ControllerBaseAddress + DAC960_GEM_CommandMailboxBusAddressOffset + 0);
+  *Parameter1 =
+    readb(ControllerBaseAddress + DAC960_GEM_CommandMailboxBusAddressOffset + 1);
+  writel(0x03000000, ControllerBaseAddress +
+         DAC960_GEM_ErrorStatusRegisterClearOffset);
+  return true;
+}
+
+/*
+  Define the DAC960 BA Series Controller Interface Register Offsets.
+*/
+
+#define DAC960_BA_RegisterWindowSize		0x80
+
+typedef enum
+{
+  DAC960_BA_InboundDoorBellRegisterOffset =	0x60,
+  DAC960_BA_OutboundDoorBellRegisterOffset =	0x61,
+  DAC960_BA_InterruptStatusRegisterOffset =	0x30,
+  DAC960_BA_InterruptMaskRegisterOffset =	0x34,
+  DAC960_BA_CommandMailboxBusAddressOffset =	0x50,
+  DAC960_BA_CommandStatusOffset =		0x58,
+  DAC960_BA_ErrorStatusRegisterOffset =		0x63
+}
+DAC960_BA_RegisterOffsets_T;
+
+
+/*
+  Define the structure of the DAC960 BA Series Inbound Door Bell Register.
+*/
+
+typedef union DAC960_BA_InboundDoorBellRegister
+{
+  unsigned char All;
+  struct {
+    bool HardwareMailboxNewCommand:1;			/* Bit 0 */
+    bool AcknowledgeHardwareMailboxStatus:1;		/* Bit 1 */
+    bool GenerateInterrupt:1;				/* Bit 2 */
+    bool ControllerReset:1;				/* Bit 3 */
+    bool MemoryMailboxNewCommand:1;			/* Bit 4 */
+    unsigned char :3;					/* Bits 5-7 */
+  } Write;
+  struct {
+    bool HardwareMailboxEmpty:1;			/* Bit 0 */
+    bool InitializationNotInProgress:1;			/* Bit 1 */
+    unsigned char :6;					/* Bits 2-7 */
+  } Read;
+}
+DAC960_BA_InboundDoorBellRegister_T;
+
+
+/*
+  Define the structure of the DAC960 BA Series Outbound Door Bell Register.
+*/
+
+typedef union DAC960_BA_OutboundDoorBellRegister
+{
+  unsigned char All;
+  struct {
+    bool AcknowledgeHardwareMailboxInterrupt:1;		/* Bit 0 */
+    bool AcknowledgeMemoryMailboxInterrupt:1;		/* Bit 1 */
+    unsigned char :6;					/* Bits 2-7 */
+  } Write;
+  struct {
+    bool HardwareMailboxStatusAvailable:1;		/* Bit 0 */
+    bool MemoryMailboxStatusAvailable:1;		/* Bit 1 */
+    unsigned char :6;					/* Bits 2-7 */
+  } Read;
+}
+DAC960_BA_OutboundDoorBellRegister_T;
+
+
+/*
+  Define the structure of the DAC960 BA Series Interrupt Mask Register.
+*/
+
+typedef union DAC960_BA_InterruptMaskRegister
+{
+  unsigned char All;
+  struct {
+    unsigned int :2;					/* Bits 0-1 */
+    bool DisableInterrupts:1;				/* Bit 2 */
+    bool DisableInterruptsI2O:1;			/* Bit 3 */
+    unsigned int :4;					/* Bits 4-7 */
+  } Bits;
+}
+DAC960_BA_InterruptMaskRegister_T;
+
+
+/*
+  Define the structure of the DAC960 BA Series Error Status Register.
+*/
+
+typedef union DAC960_BA_ErrorStatusRegister
+{
+  unsigned char All;
+  struct {
+    unsigned int :2;					/* Bits 0-1 */
+    bool ErrorStatusPending:1;				/* Bit 2 */
+    unsigned int :5;					/* Bits 3-7 */
+  } Bits;
+}
+DAC960_BA_ErrorStatusRegister_T;
+
+
+/*
+  Define inline functions to provide an abstraction for reading and writing the
+  DAC960 BA Series Controller Interface Registers.
+*/
+
+static inline
+void DAC960_BA_HardwareMailboxNewCommand(void __iomem *ControllerBaseAddress)
+{
+  DAC960_BA_InboundDoorBellRegister_T InboundDoorBellRegister;
+  InboundDoorBellRegister.All = 0;
+  InboundDoorBellRegister.Write.HardwareMailboxNewCommand = true;
+  writeb(InboundDoorBellRegister.All,
+	 ControllerBaseAddress + DAC960_BA_InboundDoorBellRegisterOffset);
+}
+
+static inline
+void DAC960_BA_AcknowledgeHardwareMailboxStatus(void __iomem *ControllerBaseAddress)
+{
+  DAC960_BA_InboundDoorBellRegister_T InboundDoorBellRegister;
+  InboundDoorBellRegister.All = 0;
+  InboundDoorBellRegister.Write.AcknowledgeHardwareMailboxStatus = true;
+  writeb(InboundDoorBellRegister.All,
+	 ControllerBaseAddress + DAC960_BA_InboundDoorBellRegisterOffset);
+}
+
+static inline
+void DAC960_BA_GenerateInterrupt(void __iomem *ControllerBaseAddress)
+{
+  DAC960_BA_InboundDoorBellRegister_T InboundDoorBellRegister;
+  InboundDoorBellRegister.All = 0;
+  InboundDoorBellRegister.Write.GenerateInterrupt = true;
+  writeb(InboundDoorBellRegister.All,
+	 ControllerBaseAddress + DAC960_BA_InboundDoorBellRegisterOffset);
+}
+
+static inline
+void DAC960_BA_ControllerReset(void __iomem *ControllerBaseAddress)
+{
+  DAC960_BA_InboundDoorBellRegister_T InboundDoorBellRegister;
+  InboundDoorBellRegister.All = 0;
+  InboundDoorBellRegister.Write.ControllerReset = true;
+  writeb(InboundDoorBellRegister.All,
+	 ControllerBaseAddress + DAC960_BA_InboundDoorBellRegisterOffset);
+}
+
+static inline
+void DAC960_BA_MemoryMailboxNewCommand(void __iomem *ControllerBaseAddress)
+{
+  DAC960_BA_InboundDoorBellRegister_T InboundDoorBellRegister;
+  InboundDoorBellRegister.All = 0;
+  InboundDoorBellRegister.Write.MemoryMailboxNewCommand = true;
+  writeb(InboundDoorBellRegister.All,
+	 ControllerBaseAddress + DAC960_BA_InboundDoorBellRegisterOffset);
+}
+
+static inline
+bool DAC960_BA_HardwareMailboxFullP(void __iomem *ControllerBaseAddress)
+{
+  DAC960_BA_InboundDoorBellRegister_T InboundDoorBellRegister;
+  InboundDoorBellRegister.All =
+    readb(ControllerBaseAddress + DAC960_BA_InboundDoorBellRegisterOffset);
+  return !InboundDoorBellRegister.Read.HardwareMailboxEmpty;
+}
+
+static inline
+bool DAC960_BA_InitializationInProgressP(void __iomem *ControllerBaseAddress)
+{
+  DAC960_BA_InboundDoorBellRegister_T InboundDoorBellRegister;
+  InboundDoorBellRegister.All =
+    readb(ControllerBaseAddress + DAC960_BA_InboundDoorBellRegisterOffset);
+  return !InboundDoorBellRegister.Read.InitializationNotInProgress;
+}
+
+static inline
+void DAC960_BA_AcknowledgeHardwareMailboxInterrupt(void __iomem *ControllerBaseAddress)
+{
+  DAC960_BA_OutboundDoorBellRegister_T OutboundDoorBellRegister;
+  OutboundDoorBellRegister.All = 0;
+  OutboundDoorBellRegister.Write.AcknowledgeHardwareMailboxInterrupt = true;
+  writeb(OutboundDoorBellRegister.All,
+	 ControllerBaseAddress + DAC960_BA_OutboundDoorBellRegisterOffset);
+}
+
+static inline
+void DAC960_BA_AcknowledgeMemoryMailboxInterrupt(void __iomem *ControllerBaseAddress)
+{
+  DAC960_BA_OutboundDoorBellRegister_T OutboundDoorBellRegister;
+  OutboundDoorBellRegister.All = 0;
+  OutboundDoorBellRegister.Write.AcknowledgeMemoryMailboxInterrupt = true;
+  writeb(OutboundDoorBellRegister.All,
+	 ControllerBaseAddress + DAC960_BA_OutboundDoorBellRegisterOffset);
+}
+
+static inline
+void DAC960_BA_AcknowledgeInterrupt(void __iomem *ControllerBaseAddress)
+{
+  DAC960_BA_OutboundDoorBellRegister_T OutboundDoorBellRegister;
+  OutboundDoorBellRegister.All = 0;
+  OutboundDoorBellRegister.Write.AcknowledgeHardwareMailboxInterrupt = true;
+  OutboundDoorBellRegister.Write.AcknowledgeMemoryMailboxInterrupt = true;
+  writeb(OutboundDoorBellRegister.All,
+	 ControllerBaseAddress + DAC960_BA_OutboundDoorBellRegisterOffset);
+}
+
+static inline
+bool DAC960_BA_HardwareMailboxStatusAvailableP(void __iomem *ControllerBaseAddress)
+{
+  DAC960_BA_OutboundDoorBellRegister_T OutboundDoorBellRegister;
+  OutboundDoorBellRegister.All =
+    readb(ControllerBaseAddress + DAC960_BA_OutboundDoorBellRegisterOffset);
+  return OutboundDoorBellRegister.Read.HardwareMailboxStatusAvailable;
+}
+
+static inline
+bool DAC960_BA_MemoryMailboxStatusAvailableP(void __iomem *ControllerBaseAddress)
+{
+  DAC960_BA_OutboundDoorBellRegister_T OutboundDoorBellRegister;
+  OutboundDoorBellRegister.All =
+    readb(ControllerBaseAddress + DAC960_BA_OutboundDoorBellRegisterOffset);
+  return OutboundDoorBellRegister.Read.MemoryMailboxStatusAvailable;
+}
+
+static inline
+void DAC960_BA_EnableInterrupts(void __iomem *ControllerBaseAddress)
+{
+  DAC960_BA_InterruptMaskRegister_T InterruptMaskRegister;
+  InterruptMaskRegister.All = 0xFF;
+  InterruptMaskRegister.Bits.DisableInterrupts = false;
+  InterruptMaskRegister.Bits.DisableInterruptsI2O = true;
+  writeb(InterruptMaskRegister.All,
+	 ControllerBaseAddress + DAC960_BA_InterruptMaskRegisterOffset);
+}
+
+static inline
+void DAC960_BA_DisableInterrupts(void __iomem *ControllerBaseAddress)
+{
+  DAC960_BA_InterruptMaskRegister_T InterruptMaskRegister;
+  InterruptMaskRegister.All = 0xFF;
+  InterruptMaskRegister.Bits.DisableInterrupts = true;
+  InterruptMaskRegister.Bits.DisableInterruptsI2O = true;
+  writeb(InterruptMaskRegister.All,
+	 ControllerBaseAddress + DAC960_BA_InterruptMaskRegisterOffset);
+}
+
+static inline
+bool DAC960_BA_InterruptsEnabledP(void __iomem *ControllerBaseAddress)
+{
+  DAC960_BA_InterruptMaskRegister_T InterruptMaskRegister;
+  InterruptMaskRegister.All =
+    readb(ControllerBaseAddress + DAC960_BA_InterruptMaskRegisterOffset);
+  return !InterruptMaskRegister.Bits.DisableInterrupts;
+}
+
+static inline
+void DAC960_BA_WriteCommandMailbox(DAC960_V2_CommandMailbox_T
+				     *MemoryCommandMailbox,
+				   DAC960_V2_CommandMailbox_T
+				     *CommandMailbox)
+{
+  memcpy(&MemoryCommandMailbox->Words[1], &CommandMailbox->Words[1],
+	 sizeof(DAC960_V2_CommandMailbox_T) - sizeof(unsigned int));
+  wmb();
+  MemoryCommandMailbox->Words[0] = CommandMailbox->Words[0];
+  mb();
+}
+
+
+static inline
+void DAC960_BA_WriteHardwareMailbox(void __iomem *ControllerBaseAddress,
+				    dma_addr_t CommandMailboxDMA)
+{
+	dma_addr_writeql(CommandMailboxDMA,
+		ControllerBaseAddress +
+		DAC960_BA_CommandMailboxBusAddressOffset);
+}
+
+static inline DAC960_V2_CommandIdentifier_T
+DAC960_BA_ReadCommandIdentifier(void __iomem *ControllerBaseAddress)
+{
+  return readw(ControllerBaseAddress + DAC960_BA_CommandStatusOffset);
+}
+
+static inline DAC960_V2_CommandStatus_T
+DAC960_BA_ReadCommandStatus(void __iomem *ControllerBaseAddress)
+{
+  return readw(ControllerBaseAddress + DAC960_BA_CommandStatusOffset + 2);
+}
+
+static inline bool
+DAC960_BA_ReadErrorStatus(void __iomem *ControllerBaseAddress,
+			  unsigned char *ErrorStatus,
+			  unsigned char *Parameter0,
+			  unsigned char *Parameter1)
+{
+  DAC960_BA_ErrorStatusRegister_T ErrorStatusRegister;
+  ErrorStatusRegister.All =
+    readb(ControllerBaseAddress + DAC960_BA_ErrorStatusRegisterOffset);
+  if (!ErrorStatusRegister.Bits.ErrorStatusPending) return false;
+  ErrorStatusRegister.Bits.ErrorStatusPending = false;
+  *ErrorStatus = ErrorStatusRegister.All;
+  *Parameter0 =
+    readb(ControllerBaseAddress + DAC960_BA_CommandMailboxBusAddressOffset + 0);
+  *Parameter1 =
+    readb(ControllerBaseAddress + DAC960_BA_CommandMailboxBusAddressOffset + 1);
+  writeb(0xFF, ControllerBaseAddress + DAC960_BA_ErrorStatusRegisterOffset);
+  return true;
+}
+
+
+/*
+  Define the DAC960 LP Series Controller Interface Register Offsets.
+*/
+
+#define DAC960_LP_RegisterWindowSize		0x80
+
+typedef enum
+{
+  DAC960_LP_InboundDoorBellRegisterOffset =	0x20,
+  DAC960_LP_OutboundDoorBellRegisterOffset =	0x2C,
+  DAC960_LP_InterruptStatusRegisterOffset =	0x30,
+  DAC960_LP_InterruptMaskRegisterOffset =	0x34,
+  DAC960_LP_CommandMailboxBusAddressOffset =	0x10,
+  DAC960_LP_CommandStatusOffset =		0x18,
+  DAC960_LP_ErrorStatusRegisterOffset =		0x2E
+}
+DAC960_LP_RegisterOffsets_T;
+
+
+/*
+  Define the structure of the DAC960 LP Series Inbound Door Bell Register.
+*/
+
+typedef union DAC960_LP_InboundDoorBellRegister
+{
+  unsigned char All;
+  struct {
+    bool HardwareMailboxNewCommand:1;			/* Bit 0 */
+    bool AcknowledgeHardwareMailboxStatus:1;		/* Bit 1 */
+    bool GenerateInterrupt:1;				/* Bit 2 */
+    bool ControllerReset:1;				/* Bit 3 */
+    bool MemoryMailboxNewCommand:1;			/* Bit 4 */
+    unsigned char :3;					/* Bits 5-7 */
+  } Write;
+  struct {
+    bool HardwareMailboxFull:1;				/* Bit 0 */
+    bool InitializationInProgress:1;			/* Bit 1 */
+    unsigned char :6;					/* Bits 2-7 */
+  } Read;
+}
+DAC960_LP_InboundDoorBellRegister_T;
+
+
+/*
+  Define the structure of the DAC960 LP Series Outbound Door Bell Register.
+*/
+
+typedef union DAC960_LP_OutboundDoorBellRegister
+{
+  unsigned char All;
+  struct {
+    bool AcknowledgeHardwareMailboxInterrupt:1;		/* Bit 0 */
+    bool AcknowledgeMemoryMailboxInterrupt:1;		/* Bit 1 */
+    unsigned char :6;					/* Bits 2-7 */
+  } Write;
+  struct {
+    bool HardwareMailboxStatusAvailable:1;		/* Bit 0 */
+    bool MemoryMailboxStatusAvailable:1;		/* Bit 1 */
+    unsigned char :6;					/* Bits 2-7 */
+  } Read;
+}
+DAC960_LP_OutboundDoorBellRegister_T;
+
+
+/*
+  Define the structure of the DAC960 LP Series Interrupt Mask Register.
+*/
+
+typedef union DAC960_LP_InterruptMaskRegister
+{
+  unsigned char All;
+  struct {
+    unsigned int :2;					/* Bits 0-1 */
+    bool DisableInterrupts:1;				/* Bit 2 */
+    unsigned int :5;					/* Bits 3-7 */
+  } Bits;
+}
+DAC960_LP_InterruptMaskRegister_T;
+
+
+/*
+  Define the structure of the DAC960 LP Series Error Status Register.
+*/
+
+typedef union DAC960_LP_ErrorStatusRegister
+{
+  unsigned char All;
+  struct {
+    unsigned int :2;					/* Bits 0-1 */
+    bool ErrorStatusPending:1;				/* Bit 2 */
+    unsigned int :5;					/* Bits 3-7 */
+  } Bits;
+}
+DAC960_LP_ErrorStatusRegister_T;
+
+
+/*
+  Define inline functions to provide an abstraction for reading and writing the
+  DAC960 LP Series Controller Interface Registers.
+*/
+
+static inline
+void DAC960_LP_HardwareMailboxNewCommand(void __iomem *ControllerBaseAddress)
+{
+  DAC960_LP_InboundDoorBellRegister_T InboundDoorBellRegister;
+  InboundDoorBellRegister.All = 0;
+  InboundDoorBellRegister.Write.HardwareMailboxNewCommand = true;
+  writeb(InboundDoorBellRegister.All,
+	 ControllerBaseAddress + DAC960_LP_InboundDoorBellRegisterOffset);
+}
+
+static inline
+void DAC960_LP_AcknowledgeHardwareMailboxStatus(void __iomem *ControllerBaseAddress)
+{
+  DAC960_LP_InboundDoorBellRegister_T InboundDoorBellRegister;
+  InboundDoorBellRegister.All = 0;
+  InboundDoorBellRegister.Write.AcknowledgeHardwareMailboxStatus = true;
+  writeb(InboundDoorBellRegister.All,
+	 ControllerBaseAddress + DAC960_LP_InboundDoorBellRegisterOffset);
+}
+
+static inline
+void DAC960_LP_GenerateInterrupt(void __iomem *ControllerBaseAddress)
+{
+  DAC960_LP_InboundDoorBellRegister_T InboundDoorBellRegister;
+  InboundDoorBellRegister.All = 0;
+  InboundDoorBellRegister.Write.GenerateInterrupt = true;
+  writeb(InboundDoorBellRegister.All,
+	 ControllerBaseAddress + DAC960_LP_InboundDoorBellRegisterOffset);
+}
+
+static inline
+void DAC960_LP_ControllerReset(void __iomem *ControllerBaseAddress)
+{
+  DAC960_LP_InboundDoorBellRegister_T InboundDoorBellRegister;
+  InboundDoorBellRegister.All = 0;
+  InboundDoorBellRegister.Write.ControllerReset = true;
+  writeb(InboundDoorBellRegister.All,
+	 ControllerBaseAddress + DAC960_LP_InboundDoorBellRegisterOffset);
+}
+
+static inline
+void DAC960_LP_MemoryMailboxNewCommand(void __iomem *ControllerBaseAddress)
+{
+  DAC960_LP_InboundDoorBellRegister_T InboundDoorBellRegister;
+  InboundDoorBellRegister.All = 0;
+  InboundDoorBellRegister.Write.MemoryMailboxNewCommand = true;
+  writeb(InboundDoorBellRegister.All,
+	 ControllerBaseAddress + DAC960_LP_InboundDoorBellRegisterOffset);
+}
+
+static inline
+bool DAC960_LP_HardwareMailboxFullP(void __iomem *ControllerBaseAddress)
+{
+  DAC960_LP_InboundDoorBellRegister_T InboundDoorBellRegister;
+  InboundDoorBellRegister.All =
+    readb(ControllerBaseAddress + DAC960_LP_InboundDoorBellRegisterOffset);
+  return InboundDoorBellRegister.Read.HardwareMailboxFull;
+}
+
+static inline
+bool DAC960_LP_InitializationInProgressP(void __iomem *ControllerBaseAddress)
+{
+  DAC960_LP_InboundDoorBellRegister_T InboundDoorBellRegister;
+  InboundDoorBellRegister.All =
+    readb(ControllerBaseAddress + DAC960_LP_InboundDoorBellRegisterOffset);
+  return InboundDoorBellRegister.Read.InitializationInProgress;
+}
+
+static inline
+void DAC960_LP_AcknowledgeHardwareMailboxInterrupt(void __iomem *ControllerBaseAddress)
+{
+  DAC960_LP_OutboundDoorBellRegister_T OutboundDoorBellRegister;
+  OutboundDoorBellRegister.All = 0;
+  OutboundDoorBellRegister.Write.AcknowledgeHardwareMailboxInterrupt = true;
+  writeb(OutboundDoorBellRegister.All,
+	 ControllerBaseAddress + DAC960_LP_OutboundDoorBellRegisterOffset);
+}
+
+static inline
+void DAC960_LP_AcknowledgeMemoryMailboxInterrupt(void __iomem *ControllerBaseAddress)
+{
+  DAC960_LP_OutboundDoorBellRegister_T OutboundDoorBellRegister;
+  OutboundDoorBellRegister.All = 0;
+  OutboundDoorBellRegister.Write.AcknowledgeMemoryMailboxInterrupt = true;
+  writeb(OutboundDoorBellRegister.All,
+	 ControllerBaseAddress + DAC960_LP_OutboundDoorBellRegisterOffset);
+}
+
+static inline
+void DAC960_LP_AcknowledgeInterrupt(void __iomem *ControllerBaseAddress)
+{
+  DAC960_LP_OutboundDoorBellRegister_T OutboundDoorBellRegister;
+  OutboundDoorBellRegister.All = 0;
+  OutboundDoorBellRegister.Write.AcknowledgeHardwareMailboxInterrupt = true;
+  OutboundDoorBellRegister.Write.AcknowledgeMemoryMailboxInterrupt = true;
+  writeb(OutboundDoorBellRegister.All,
+	 ControllerBaseAddress + DAC960_LP_OutboundDoorBellRegisterOffset);
+}
+
+static inline
+bool DAC960_LP_HardwareMailboxStatusAvailableP(void __iomem *ControllerBaseAddress)
+{
+  DAC960_LP_OutboundDoorBellRegister_T OutboundDoorBellRegister;
+  OutboundDoorBellRegister.All =
+    readb(ControllerBaseAddress + DAC960_LP_OutboundDoorBellRegisterOffset);
+  return OutboundDoorBellRegister.Read.HardwareMailboxStatusAvailable;
+}
+
+static inline
+bool DAC960_LP_MemoryMailboxStatusAvailableP(void __iomem *ControllerBaseAddress)
+{
+  DAC960_LP_OutboundDoorBellRegister_T OutboundDoorBellRegister;
+  OutboundDoorBellRegister.All =
+    readb(ControllerBaseAddress + DAC960_LP_OutboundDoorBellRegisterOffset);
+  return OutboundDoorBellRegister.Read.MemoryMailboxStatusAvailable;
+}
+
+static inline
+void DAC960_LP_EnableInterrupts(void __iomem *ControllerBaseAddress)
+{
+  DAC960_LP_InterruptMaskRegister_T InterruptMaskRegister;
+  InterruptMaskRegister.All = 0xFF;
+  InterruptMaskRegister.Bits.DisableInterrupts = false;
+  writeb(InterruptMaskRegister.All,
+	 ControllerBaseAddress + DAC960_LP_InterruptMaskRegisterOffset);
+}
+
+static inline
+void DAC960_LP_DisableInterrupts(void __iomem *ControllerBaseAddress)
+{
+  DAC960_LP_InterruptMaskRegister_T InterruptMaskRegister;
+  InterruptMaskRegister.All = 0xFF;
+  InterruptMaskRegister.Bits.DisableInterrupts = true;
+  writeb(InterruptMaskRegister.All,
+	 ControllerBaseAddress + DAC960_LP_InterruptMaskRegisterOffset);
+}
+
+static inline
+bool DAC960_LP_InterruptsEnabledP(void __iomem *ControllerBaseAddress)
+{
+  DAC960_LP_InterruptMaskRegister_T InterruptMaskRegister;
+  InterruptMaskRegister.All =
+    readb(ControllerBaseAddress + DAC960_LP_InterruptMaskRegisterOffset);
+  return !InterruptMaskRegister.Bits.DisableInterrupts;
+}
+
+static inline
+void DAC960_LP_WriteCommandMailbox(DAC960_V2_CommandMailbox_T
+				     *MemoryCommandMailbox,
+				   DAC960_V2_CommandMailbox_T
+				     *CommandMailbox)
+{
+  memcpy(&MemoryCommandMailbox->Words[1], &CommandMailbox->Words[1],
+	 sizeof(DAC960_V2_CommandMailbox_T) - sizeof(unsigned int));
+  wmb();
+  MemoryCommandMailbox->Words[0] = CommandMailbox->Words[0];
+  mb();
+}
+
+static inline
+void DAC960_LP_WriteHardwareMailbox(void __iomem *ControllerBaseAddress,
+				    dma_addr_t CommandMailboxDMA)
+{
+	dma_addr_writeql(CommandMailboxDMA,
+		ControllerBaseAddress +
+		DAC960_LP_CommandMailboxBusAddressOffset);
+}
+
+static inline DAC960_V2_CommandIdentifier_T
+DAC960_LP_ReadCommandIdentifier(void __iomem *ControllerBaseAddress)
+{
+  return readw(ControllerBaseAddress + DAC960_LP_CommandStatusOffset);
+}
+
+static inline DAC960_V2_CommandStatus_T
+DAC960_LP_ReadCommandStatus(void __iomem *ControllerBaseAddress)
+{
+  return readw(ControllerBaseAddress + DAC960_LP_CommandStatusOffset + 2);
+}
+
+static inline bool
+DAC960_LP_ReadErrorStatus(void __iomem *ControllerBaseAddress,
+			  unsigned char *ErrorStatus,
+			  unsigned char *Parameter0,
+			  unsigned char *Parameter1)
+{
+  DAC960_LP_ErrorStatusRegister_T ErrorStatusRegister;
+  ErrorStatusRegister.All =
+    readb(ControllerBaseAddress + DAC960_LP_ErrorStatusRegisterOffset);
+  if (!ErrorStatusRegister.Bits.ErrorStatusPending) return false;
+  ErrorStatusRegister.Bits.ErrorStatusPending = false;
+  *ErrorStatus = ErrorStatusRegister.All;
+  *Parameter0 =
+    readb(ControllerBaseAddress + DAC960_LP_CommandMailboxBusAddressOffset + 0);
+  *Parameter1 =
+    readb(ControllerBaseAddress + DAC960_LP_CommandMailboxBusAddressOffset + 1);
+  writeb(0xFF, ControllerBaseAddress + DAC960_LP_ErrorStatusRegisterOffset);
+  return true;
+}
+
+
+/*
+  Define the DAC960 LA Series Controller Interface Register Offsets.
+*/
+
+#define DAC960_LA_RegisterWindowSize		0x80
+
+typedef enum
+{
+  DAC960_LA_InboundDoorBellRegisterOffset =	0x60,
+  DAC960_LA_OutboundDoorBellRegisterOffset =	0x61,
+  DAC960_LA_InterruptMaskRegisterOffset =	0x34,
+  DAC960_LA_CommandOpcodeRegisterOffset =	0x50,
+  DAC960_LA_CommandIdentifierRegisterOffset =	0x51,
+  DAC960_LA_MailboxRegister2Offset =		0x52,
+  DAC960_LA_MailboxRegister3Offset =		0x53,
+  DAC960_LA_MailboxRegister4Offset =		0x54,
+  DAC960_LA_MailboxRegister5Offset =		0x55,
+  DAC960_LA_MailboxRegister6Offset =		0x56,
+  DAC960_LA_MailboxRegister7Offset =		0x57,
+  DAC960_LA_MailboxRegister8Offset =		0x58,
+  DAC960_LA_MailboxRegister9Offset =		0x59,
+  DAC960_LA_MailboxRegister10Offset =		0x5A,
+  DAC960_LA_MailboxRegister11Offset =		0x5B,
+  DAC960_LA_MailboxRegister12Offset =		0x5C,
+  DAC960_LA_StatusCommandIdentifierRegOffset =	0x5D,
+  DAC960_LA_StatusRegisterOffset =		0x5E,
+  DAC960_LA_ErrorStatusRegisterOffset =		0x63
+}
+DAC960_LA_RegisterOffsets_T;
+
+
+/*
+  Define the structure of the DAC960 LA Series Inbound Door Bell Register.
+*/
+
+typedef union DAC960_LA_InboundDoorBellRegister
+{
+  unsigned char All;
+  struct {
+    bool HardwareMailboxNewCommand:1;			/* Bit 0 */
+    bool AcknowledgeHardwareMailboxStatus:1;		/* Bit 1 */
+    bool GenerateInterrupt:1;				/* Bit 2 */
+    bool ControllerReset:1;				/* Bit 3 */
+    bool MemoryMailboxNewCommand:1;			/* Bit 4 */
+    unsigned char :3;					/* Bits 5-7 */
+  } Write;
+  struct {
+    bool HardwareMailboxEmpty:1;			/* Bit 0 */
+    bool InitializationNotInProgress:1;		/* Bit 1 */
+    unsigned char :6;					/* Bits 2-7 */
+  } Read;
+}
+DAC960_LA_InboundDoorBellRegister_T;
+
+
+/*
+  Define the structure of the DAC960 LA Series Outbound Door Bell Register.
+*/
+
+typedef union DAC960_LA_OutboundDoorBellRegister
+{
+  unsigned char All;
+  struct {
+    bool AcknowledgeHardwareMailboxInterrupt:1;		/* Bit 0 */
+    bool AcknowledgeMemoryMailboxInterrupt:1;		/* Bit 1 */
+    unsigned char :6;					/* Bits 2-7 */
+  } Write;
+  struct {
+    bool HardwareMailboxStatusAvailable:1;		/* Bit 0 */
+    bool MemoryMailboxStatusAvailable:1;		/* Bit 1 */
+    unsigned char :6;					/* Bits 2-7 */
+  } Read;
+}
+DAC960_LA_OutboundDoorBellRegister_T;
+
+
+/*
+  Define the structure of the DAC960 LA Series Interrupt Mask Register.
+*/
+
+typedef union DAC960_LA_InterruptMaskRegister
+{
+  unsigned char All;
+  struct {
+    unsigned char :2;					/* Bits 0-1 */
+    bool DisableInterrupts:1;				/* Bit 2 */
+    unsigned char :5;					/* Bits 3-7 */
+  } Bits;
+}
+DAC960_LA_InterruptMaskRegister_T;
+
+
+/*
+  Define the structure of the DAC960 LA Series Error Status Register.
+*/
+
+typedef union DAC960_LA_ErrorStatusRegister
+{
+  unsigned char All;
+  struct {
+    unsigned int :2;					/* Bits 0-1 */
+    bool ErrorStatusPending:1;				/* Bit 2 */
+    unsigned int :5;					/* Bits 3-7 */
+  } Bits;
+}
+DAC960_LA_ErrorStatusRegister_T;
+
+
+/*
+  Define inline functions to provide an abstraction for reading and writing the
+  DAC960 LA Series Controller Interface Registers.
+*/
+
+static inline
+void DAC960_LA_HardwareMailboxNewCommand(void __iomem *ControllerBaseAddress)
+{
+  DAC960_LA_InboundDoorBellRegister_T InboundDoorBellRegister;
+  InboundDoorBellRegister.All = 0;
+  InboundDoorBellRegister.Write.HardwareMailboxNewCommand = true;
+  writeb(InboundDoorBellRegister.All,
+	 ControllerBaseAddress + DAC960_LA_InboundDoorBellRegisterOffset);
+}
+
+static inline
+void DAC960_LA_AcknowledgeHardwareMailboxStatus(void __iomem *ControllerBaseAddress)
+{
+  DAC960_LA_InboundDoorBellRegister_T InboundDoorBellRegister;
+  InboundDoorBellRegister.All = 0;
+  InboundDoorBellRegister.Write.AcknowledgeHardwareMailboxStatus = true;
+  writeb(InboundDoorBellRegister.All,
+	 ControllerBaseAddress + DAC960_LA_InboundDoorBellRegisterOffset);
+}
+
+static inline
+void DAC960_LA_GenerateInterrupt(void __iomem *ControllerBaseAddress)
+{
+  DAC960_LA_InboundDoorBellRegister_T InboundDoorBellRegister;
+  InboundDoorBellRegister.All = 0;
+  InboundDoorBellRegister.Write.GenerateInterrupt = true;
+  writeb(InboundDoorBellRegister.All,
+	 ControllerBaseAddress + DAC960_LA_InboundDoorBellRegisterOffset);
+}
+
+static inline
+void DAC960_LA_ControllerReset(void __iomem *ControllerBaseAddress)
+{
+  DAC960_LA_InboundDoorBellRegister_T InboundDoorBellRegister;
+  InboundDoorBellRegister.All = 0;
+  InboundDoorBellRegister.Write.ControllerReset = true;
+  writeb(InboundDoorBellRegister.All,
+	 ControllerBaseAddress + DAC960_LA_InboundDoorBellRegisterOffset);
+}
+
+static inline
+void DAC960_LA_MemoryMailboxNewCommand(void __iomem *ControllerBaseAddress)
+{
+  DAC960_LA_InboundDoorBellRegister_T InboundDoorBellRegister;
+  InboundDoorBellRegister.All = 0;
+  InboundDoorBellRegister.Write.MemoryMailboxNewCommand = true;
+  writeb(InboundDoorBellRegister.All,
+	 ControllerBaseAddress + DAC960_LA_InboundDoorBellRegisterOffset);
+}
+
+static inline
+bool DAC960_LA_HardwareMailboxFullP(void __iomem *ControllerBaseAddress)
+{
+  DAC960_LA_InboundDoorBellRegister_T InboundDoorBellRegister;
+  InboundDoorBellRegister.All =
+    readb(ControllerBaseAddress + DAC960_LA_InboundDoorBellRegisterOffset);
+  return !InboundDoorBellRegister.Read.HardwareMailboxEmpty;
+}
+
+static inline
+bool DAC960_LA_InitializationInProgressP(void __iomem *ControllerBaseAddress)
+{
+  DAC960_LA_InboundDoorBellRegister_T InboundDoorBellRegister;
+  InboundDoorBellRegister.All =
+    readb(ControllerBaseAddress + DAC960_LA_InboundDoorBellRegisterOffset);
+  return !InboundDoorBellRegister.Read.InitializationNotInProgress;
+}
+
+static inline
+void DAC960_LA_AcknowledgeHardwareMailboxInterrupt(void __iomem *ControllerBaseAddress)
+{
+  DAC960_LA_OutboundDoorBellRegister_T OutboundDoorBellRegister;
+  OutboundDoorBellRegister.All = 0;
+  OutboundDoorBellRegister.Write.AcknowledgeHardwareMailboxInterrupt = true;
+  writeb(OutboundDoorBellRegister.All,
+	 ControllerBaseAddress + DAC960_LA_OutboundDoorBellRegisterOffset);
+}
+
+static inline
+void DAC960_LA_AcknowledgeMemoryMailboxInterrupt(void __iomem *ControllerBaseAddress)
+{
+  DAC960_LA_OutboundDoorBellRegister_T OutboundDoorBellRegister;
+  OutboundDoorBellRegister.All = 0;
+  OutboundDoorBellRegister.Write.AcknowledgeMemoryMailboxInterrupt = true;
+  writeb(OutboundDoorBellRegister.All,
+	 ControllerBaseAddress + DAC960_LA_OutboundDoorBellRegisterOffset);
+}
+
+static inline
+void DAC960_LA_AcknowledgeInterrupt(void __iomem *ControllerBaseAddress)
+{
+  DAC960_LA_OutboundDoorBellRegister_T OutboundDoorBellRegister;
+  OutboundDoorBellRegister.All = 0;
+  OutboundDoorBellRegister.Write.AcknowledgeHardwareMailboxInterrupt = true;
+  OutboundDoorBellRegister.Write.AcknowledgeMemoryMailboxInterrupt = true;
+  writeb(OutboundDoorBellRegister.All,
+	 ControllerBaseAddress + DAC960_LA_OutboundDoorBellRegisterOffset);
+}
+
+static inline
+bool DAC960_LA_HardwareMailboxStatusAvailableP(void __iomem *ControllerBaseAddress)
+{
+  DAC960_LA_OutboundDoorBellRegister_T OutboundDoorBellRegister;
+  OutboundDoorBellRegister.All =
+    readb(ControllerBaseAddress + DAC960_LA_OutboundDoorBellRegisterOffset);
+  return OutboundDoorBellRegister.Read.HardwareMailboxStatusAvailable;
+}
+
+static inline
+bool DAC960_LA_MemoryMailboxStatusAvailableP(void __iomem *ControllerBaseAddress)
+{
+  DAC960_LA_OutboundDoorBellRegister_T OutboundDoorBellRegister;
+  OutboundDoorBellRegister.All =
+    readb(ControllerBaseAddress + DAC960_LA_OutboundDoorBellRegisterOffset);
+  return OutboundDoorBellRegister.Read.MemoryMailboxStatusAvailable;
+}
+
+static inline
+void DAC960_LA_EnableInterrupts(void __iomem *ControllerBaseAddress)
+{
+  DAC960_LA_InterruptMaskRegister_T InterruptMaskRegister;
+  InterruptMaskRegister.All = 0xFF;
+  InterruptMaskRegister.Bits.DisableInterrupts = false;
+  writeb(InterruptMaskRegister.All,
+	 ControllerBaseAddress + DAC960_LA_InterruptMaskRegisterOffset);
+}
+
+static inline
+void DAC960_LA_DisableInterrupts(void __iomem *ControllerBaseAddress)
+{
+  DAC960_LA_InterruptMaskRegister_T InterruptMaskRegister;
+  InterruptMaskRegister.All = 0xFF;
+  InterruptMaskRegister.Bits.DisableInterrupts = true;
+  writeb(InterruptMaskRegister.All,
+	 ControllerBaseAddress + DAC960_LA_InterruptMaskRegisterOffset);
+}
+
+static inline
+bool DAC960_LA_InterruptsEnabledP(void __iomem *ControllerBaseAddress)
+{
+  DAC960_LA_InterruptMaskRegister_T InterruptMaskRegister;
+  InterruptMaskRegister.All =
+    readb(ControllerBaseAddress + DAC960_LA_InterruptMaskRegisterOffset);
+  return !InterruptMaskRegister.Bits.DisableInterrupts;
+}
+
+static inline
+void DAC960_LA_WriteCommandMailbox(DAC960_V1_CommandMailbox_T
+				     *MemoryCommandMailbox,
+				   DAC960_V1_CommandMailbox_T
+				     *CommandMailbox)
+{
+  MemoryCommandMailbox->Words[1] = CommandMailbox->Words[1];
+  MemoryCommandMailbox->Words[2] = CommandMailbox->Words[2];
+  MemoryCommandMailbox->Words[3] = CommandMailbox->Words[3];
+  wmb();
+  MemoryCommandMailbox->Words[0] = CommandMailbox->Words[0];
+  mb();
+}
+
+static inline
+void DAC960_LA_WriteHardwareMailbox(void __iomem *ControllerBaseAddress,
+				    DAC960_V1_CommandMailbox_T *CommandMailbox)
+{
+  writel(CommandMailbox->Words[0],
+	 ControllerBaseAddress + DAC960_LA_CommandOpcodeRegisterOffset);
+  writel(CommandMailbox->Words[1],
+	 ControllerBaseAddress + DAC960_LA_MailboxRegister4Offset);
+  writel(CommandMailbox->Words[2],
+	 ControllerBaseAddress + DAC960_LA_MailboxRegister8Offset);
+  writeb(CommandMailbox->Bytes[12],
+	 ControllerBaseAddress + DAC960_LA_MailboxRegister12Offset);
+}
+
+static inline DAC960_V1_CommandIdentifier_T
+DAC960_LA_ReadStatusCommandIdentifier(void __iomem *ControllerBaseAddress)
+{
+  return readb(ControllerBaseAddress
+	       + DAC960_LA_StatusCommandIdentifierRegOffset);
+}
+
+static inline DAC960_V1_CommandStatus_T
+DAC960_LA_ReadStatusRegister(void __iomem *ControllerBaseAddress)
+{
+  return readw(ControllerBaseAddress + DAC960_LA_StatusRegisterOffset);
+}
+
+static inline bool
+DAC960_LA_ReadErrorStatus(void __iomem *ControllerBaseAddress,
+			  unsigned char *ErrorStatus,
+			  unsigned char *Parameter0,
+			  unsigned char *Parameter1)
+{
+  DAC960_LA_ErrorStatusRegister_T ErrorStatusRegister;
+  ErrorStatusRegister.All =
+    readb(ControllerBaseAddress + DAC960_LA_ErrorStatusRegisterOffset);
+  if (!ErrorStatusRegister.Bits.ErrorStatusPending) return false;
+  ErrorStatusRegister.Bits.ErrorStatusPending = false;
+  *ErrorStatus = ErrorStatusRegister.All;
+  *Parameter0 =
+    readb(ControllerBaseAddress + DAC960_LA_CommandOpcodeRegisterOffset);
+  *Parameter1 =
+    readb(ControllerBaseAddress + DAC960_LA_CommandIdentifierRegisterOffset);
+  writeb(0xFF, ControllerBaseAddress + DAC960_LA_ErrorStatusRegisterOffset);
+  return true;
+}
+
+/*
+  Define the DAC960 PG Series Controller Interface Register Offsets.
+*/
+
+#define DAC960_PG_RegisterWindowSize		0x2000
+
+typedef enum
+{
+  DAC960_PG_InboundDoorBellRegisterOffset =	0x0020,
+  DAC960_PG_OutboundDoorBellRegisterOffset =	0x002C,
+  DAC960_PG_InterruptMaskRegisterOffset =	0x0034,
+  DAC960_PG_CommandOpcodeRegisterOffset =	0x1000,
+  DAC960_PG_CommandIdentifierRegisterOffset =	0x1001,
+  DAC960_PG_MailboxRegister2Offset =		0x1002,
+  DAC960_PG_MailboxRegister3Offset =		0x1003,
+  DAC960_PG_MailboxRegister4Offset =		0x1004,
+  DAC960_PG_MailboxRegister5Offset =		0x1005,
+  DAC960_PG_MailboxRegister6Offset =		0x1006,
+  DAC960_PG_MailboxRegister7Offset =		0x1007,
+  DAC960_PG_MailboxRegister8Offset =		0x1008,
+  DAC960_PG_MailboxRegister9Offset =		0x1009,
+  DAC960_PG_MailboxRegister10Offset =		0x100A,
+  DAC960_PG_MailboxRegister11Offset =		0x100B,
+  DAC960_PG_MailboxRegister12Offset =		0x100C,
+  DAC960_PG_StatusCommandIdentifierRegOffset =	0x1018,
+  DAC960_PG_StatusRegisterOffset =		0x101A,
+  DAC960_PG_ErrorStatusRegisterOffset =		0x103F
+}
+DAC960_PG_RegisterOffsets_T;
+
+
+/*
+  Define the structure of the DAC960 PG Series Inbound Door Bell Register.
+*/
+
+typedef union DAC960_PG_InboundDoorBellRegister
+{
+  unsigned int All;
+  struct {
+    bool HardwareMailboxNewCommand:1;			/* Bit 0 */
+    bool AcknowledgeHardwareMailboxStatus:1;		/* Bit 1 */
+    bool GenerateInterrupt:1;				/* Bit 2 */
+    bool ControllerReset:1;				/* Bit 3 */
+    bool MemoryMailboxNewCommand:1;			/* Bit 4 */
+    unsigned int :27;					/* Bits 5-31 */
+  } Write;
+  struct {
+    bool HardwareMailboxFull:1;				/* Bit 0 */
+    bool InitializationInProgress:1;			/* Bit 1 */
+    unsigned int :30;					/* Bits 2-31 */
+  } Read;
+}
+DAC960_PG_InboundDoorBellRegister_T;
+
+
+/*
+  Define the structure of the DAC960 PG Series Outbound Door Bell Register.
+*/
+
+typedef union DAC960_PG_OutboundDoorBellRegister
+{
+  unsigned int All;
+  struct {
+    bool AcknowledgeHardwareMailboxInterrupt:1;		/* Bit 0 */
+    bool AcknowledgeMemoryMailboxInterrupt:1;		/* Bit 1 */
+    unsigned int :30;					/* Bits 2-31 */
+  } Write;
+  struct {
+    bool HardwareMailboxStatusAvailable:1;		/* Bit 0 */
+    bool MemoryMailboxStatusAvailable:1;		/* Bit 1 */
+    unsigned int :30;					/* Bits 2-31 */
+  } Read;
+}
+DAC960_PG_OutboundDoorBellRegister_T;
+
+
+/*
+  Define the structure of the DAC960 PG Series Interrupt Mask Register.
+*/
+
+typedef union DAC960_PG_InterruptMaskRegister
+{
+  unsigned int All;
+  struct {
+    unsigned int MessageUnitInterruptMask1:2;		/* Bits 0-1 */
+    bool DisableInterrupts:1;				/* Bit 2 */
+    unsigned int MessageUnitInterruptMask2:5;		/* Bits 3-7 */
+    unsigned int Reserved0:24;				/* Bits 8-31 */
+  } Bits;
+}
+DAC960_PG_InterruptMaskRegister_T;
+
+
+/*
+  Define the structure of the DAC960 PG Series Error Status Register.
+*/
+
+typedef union DAC960_PG_ErrorStatusRegister
+{
+  unsigned char All;
+  struct {
+    unsigned int :2;					/* Bits 0-1 */
+    bool ErrorStatusPending:1;				/* Bit 2 */
+    unsigned int :5;					/* Bits 3-7 */
+  } Bits;
+}
+DAC960_PG_ErrorStatusRegister_T;
+
+
+/*
+  Define inline functions to provide an abstraction for reading and writing the
+  DAC960 PG Series Controller Interface Registers.
+*/
+
+static inline
+void DAC960_PG_HardwareMailboxNewCommand(void __iomem *ControllerBaseAddress)
+{
+  DAC960_PG_InboundDoorBellRegister_T InboundDoorBellRegister;
+  InboundDoorBellRegister.All = 0;
+  InboundDoorBellRegister.Write.HardwareMailboxNewCommand = true;
+  writel(InboundDoorBellRegister.All,
+	 ControllerBaseAddress + DAC960_PG_InboundDoorBellRegisterOffset);
+}
+
+static inline
+void DAC960_PG_AcknowledgeHardwareMailboxStatus(void __iomem *ControllerBaseAddress)
+{
+  DAC960_PG_InboundDoorBellRegister_T InboundDoorBellRegister;
+  InboundDoorBellRegister.All = 0;
+  InboundDoorBellRegister.Write.AcknowledgeHardwareMailboxStatus = true;
+  writel(InboundDoorBellRegister.All,
+	 ControllerBaseAddress + DAC960_PG_InboundDoorBellRegisterOffset);
+}
+
+static inline
+void DAC960_PG_GenerateInterrupt(void __iomem *ControllerBaseAddress)
+{
+  DAC960_PG_InboundDoorBellRegister_T InboundDoorBellRegister;
+  InboundDoorBellRegister.All = 0;
+  InboundDoorBellRegister.Write.GenerateInterrupt = true;
+  writel(InboundDoorBellRegister.All,
+	 ControllerBaseAddress + DAC960_PG_InboundDoorBellRegisterOffset);
+}
+
+static inline
+void DAC960_PG_ControllerReset(void __iomem *ControllerBaseAddress)
+{
+  DAC960_PG_InboundDoorBellRegister_T InboundDoorBellRegister;
+  InboundDoorBellRegister.All = 0;
+  InboundDoorBellRegister.Write.ControllerReset = true;
+  writel(InboundDoorBellRegister.All,
+	 ControllerBaseAddress + DAC960_PG_InboundDoorBellRegisterOffset);
+}
+
+static inline
+void DAC960_PG_MemoryMailboxNewCommand(void __iomem *ControllerBaseAddress)
+{
+  DAC960_PG_InboundDoorBellRegister_T InboundDoorBellRegister;
+  InboundDoorBellRegister.All = 0;
+  InboundDoorBellRegister.Write.MemoryMailboxNewCommand = true;
+  writel(InboundDoorBellRegister.All,
+	 ControllerBaseAddress + DAC960_PG_InboundDoorBellRegisterOffset);
+}
+
+static inline
+bool DAC960_PG_HardwareMailboxFullP(void __iomem *ControllerBaseAddress)
+{
+  DAC960_PG_InboundDoorBellRegister_T InboundDoorBellRegister;
+  InboundDoorBellRegister.All =
+    readl(ControllerBaseAddress + DAC960_PG_InboundDoorBellRegisterOffset);
+  return InboundDoorBellRegister.Read.HardwareMailboxFull;
+}
+
+static inline
+bool DAC960_PG_InitializationInProgressP(void __iomem *ControllerBaseAddress)
+{
+  DAC960_PG_InboundDoorBellRegister_T InboundDoorBellRegister;
+  InboundDoorBellRegister.All =
+    readl(ControllerBaseAddress + DAC960_PG_InboundDoorBellRegisterOffset);
+  return InboundDoorBellRegister.Read.InitializationInProgress;
+}
+
+static inline
+void DAC960_PG_AcknowledgeHardwareMailboxInterrupt(void __iomem *ControllerBaseAddress)
+{
+  DAC960_PG_OutboundDoorBellRegister_T OutboundDoorBellRegister;
+  OutboundDoorBellRegister.All = 0;
+  OutboundDoorBellRegister.Write.AcknowledgeHardwareMailboxInterrupt = true;
+  writel(OutboundDoorBellRegister.All,
+	 ControllerBaseAddress + DAC960_PG_OutboundDoorBellRegisterOffset);
+}
+
+static inline
+void DAC960_PG_AcknowledgeMemoryMailboxInterrupt(void __iomem *ControllerBaseAddress)
+{
+  DAC960_PG_OutboundDoorBellRegister_T OutboundDoorBellRegister;
+  OutboundDoorBellRegister.All = 0;
+  OutboundDoorBellRegister.Write.AcknowledgeMemoryMailboxInterrupt = true;
+  writel(OutboundDoorBellRegister.All,
+	 ControllerBaseAddress + DAC960_PG_OutboundDoorBellRegisterOffset);
+}
+
+static inline
+void DAC960_PG_AcknowledgeInterrupt(void __iomem *ControllerBaseAddress)
+{
+  DAC960_PG_OutboundDoorBellRegister_T OutboundDoorBellRegister;
+  OutboundDoorBellRegister.All = 0;
+  OutboundDoorBellRegister.Write.AcknowledgeHardwareMailboxInterrupt = true;
+  OutboundDoorBellRegister.Write.AcknowledgeMemoryMailboxInterrupt = true;
+  writel(OutboundDoorBellRegister.All,
+	 ControllerBaseAddress + DAC960_PG_OutboundDoorBellRegisterOffset);
+}
+
+static inline
+bool DAC960_PG_HardwareMailboxStatusAvailableP(void __iomem *ControllerBaseAddress)
+{
+  DAC960_PG_OutboundDoorBellRegister_T OutboundDoorBellRegister;
+  OutboundDoorBellRegister.All =
+    readl(ControllerBaseAddress + DAC960_PG_OutboundDoorBellRegisterOffset);
+  return OutboundDoorBellRegister.Read.HardwareMailboxStatusAvailable;
+}
+
+static inline
+bool DAC960_PG_MemoryMailboxStatusAvailableP(void __iomem *ControllerBaseAddress)
+{
+  DAC960_PG_OutboundDoorBellRegister_T OutboundDoorBellRegister;
+  OutboundDoorBellRegister.All =
+    readl(ControllerBaseAddress + DAC960_PG_OutboundDoorBellRegisterOffset);
+  return OutboundDoorBellRegister.Read.MemoryMailboxStatusAvailable;
+}
+
+static inline
+void DAC960_PG_EnableInterrupts(void __iomem *ControllerBaseAddress)
+{
+  DAC960_PG_InterruptMaskRegister_T InterruptMaskRegister;
+  InterruptMaskRegister.All = 0;
+  InterruptMaskRegister.Bits.MessageUnitInterruptMask1 = 0x3;
+  InterruptMaskRegister.Bits.DisableInterrupts = false;
+  InterruptMaskRegister.Bits.MessageUnitInterruptMask2 = 0x1F;
+  writel(InterruptMaskRegister.All,
+	 ControllerBaseAddress + DAC960_PG_InterruptMaskRegisterOffset);
+}
+
+static inline
+void DAC960_PG_DisableInterrupts(void __iomem *ControllerBaseAddress)
+{
+  DAC960_PG_InterruptMaskRegister_T InterruptMaskRegister;
+  InterruptMaskRegister.All = 0;
+  InterruptMaskRegister.Bits.MessageUnitInterruptMask1 = 0x3;
+  InterruptMaskRegister.Bits.DisableInterrupts = true;
+  InterruptMaskRegister.Bits.MessageUnitInterruptMask2 = 0x1F;
+  writel(InterruptMaskRegister.All,
+	 ControllerBaseAddress + DAC960_PG_InterruptMaskRegisterOffset);
+}
+
+static inline
+bool DAC960_PG_InterruptsEnabledP(void __iomem *ControllerBaseAddress)
+{
+  DAC960_PG_InterruptMaskRegister_T InterruptMaskRegister;
+  InterruptMaskRegister.All =
+    readl(ControllerBaseAddress + DAC960_PG_InterruptMaskRegisterOffset);
+  return !InterruptMaskRegister.Bits.DisableInterrupts;
+}
+
+static inline
+void DAC960_PG_WriteCommandMailbox(DAC960_V1_CommandMailbox_T
+				     *MemoryCommandMailbox,
+				   DAC960_V1_CommandMailbox_T
+				     *CommandMailbox)
+{
+  MemoryCommandMailbox->Words[1] = CommandMailbox->Words[1];
+  MemoryCommandMailbox->Words[2] = CommandMailbox->Words[2];
+  MemoryCommandMailbox->Words[3] = CommandMailbox->Words[3];
+  wmb();
+  MemoryCommandMailbox->Words[0] = CommandMailbox->Words[0];
+  mb();
+}
+
+static inline
+void DAC960_PG_WriteHardwareMailbox(void __iomem *ControllerBaseAddress,
+				    DAC960_V1_CommandMailbox_T *CommandMailbox)
+{
+  writel(CommandMailbox->Words[0],
+	 ControllerBaseAddress + DAC960_PG_CommandOpcodeRegisterOffset);
+  writel(CommandMailbox->Words[1],
+	 ControllerBaseAddress + DAC960_PG_MailboxRegister4Offset);
+  writel(CommandMailbox->Words[2],
+	 ControllerBaseAddress + DAC960_PG_MailboxRegister8Offset);
+  writeb(CommandMailbox->Bytes[12],
+	 ControllerBaseAddress + DAC960_PG_MailboxRegister12Offset);
+}
+
+static inline DAC960_V1_CommandIdentifier_T
+DAC960_PG_ReadStatusCommandIdentifier(void __iomem *ControllerBaseAddress)
+{
+  return readb(ControllerBaseAddress
+	       + DAC960_PG_StatusCommandIdentifierRegOffset);
+}
+
+static inline DAC960_V1_CommandStatus_T
+DAC960_PG_ReadStatusRegister(void __iomem *ControllerBaseAddress)
+{
+  return readw(ControllerBaseAddress + DAC960_PG_StatusRegisterOffset);
+}
+
+static inline bool
+DAC960_PG_ReadErrorStatus(void __iomem *ControllerBaseAddress,
+			  unsigned char *ErrorStatus,
+			  unsigned char *Parameter0,
+			  unsigned char *Parameter1)
+{
+  DAC960_PG_ErrorStatusRegister_T ErrorStatusRegister;
+  ErrorStatusRegister.All =
+    readb(ControllerBaseAddress + DAC960_PG_ErrorStatusRegisterOffset);
+  if (!ErrorStatusRegister.Bits.ErrorStatusPending) return false;
+  ErrorStatusRegister.Bits.ErrorStatusPending = false;
+  *ErrorStatus = ErrorStatusRegister.All;
+  *Parameter0 =
+    readb(ControllerBaseAddress + DAC960_PG_CommandOpcodeRegisterOffset);
+  *Parameter1 =
+    readb(ControllerBaseAddress + DAC960_PG_CommandIdentifierRegisterOffset);
+  writeb(0, ControllerBaseAddress + DAC960_PG_ErrorStatusRegisterOffset);
+  return true;
+}
+
+/*
+  Define the DAC960 PD Series Controller Interface Register Offsets.
+*/
+
+#define DAC960_PD_RegisterWindowSize		0x80
+
+typedef enum
+{
+  DAC960_PD_CommandOpcodeRegisterOffset =	0x00,
+  DAC960_PD_CommandIdentifierRegisterOffset =	0x01,
+  DAC960_PD_MailboxRegister2Offset =		0x02,
+  DAC960_PD_MailboxRegister3Offset =		0x03,
+  DAC960_PD_MailboxRegister4Offset =		0x04,
+  DAC960_PD_MailboxRegister5Offset =		0x05,
+  DAC960_PD_MailboxRegister6Offset =		0x06,
+  DAC960_PD_MailboxRegister7Offset =		0x07,
+  DAC960_PD_MailboxRegister8Offset =		0x08,
+  DAC960_PD_MailboxRegister9Offset =		0x09,
+  DAC960_PD_MailboxRegister10Offset =		0x0A,
+  DAC960_PD_MailboxRegister11Offset =		0x0B,
+  DAC960_PD_MailboxRegister12Offset =		0x0C,
+  DAC960_PD_StatusCommandIdentifierRegOffset =	0x0D,
+  DAC960_PD_StatusRegisterOffset =		0x0E,
+  DAC960_PD_ErrorStatusRegisterOffset =		0x3F,
+  DAC960_PD_InboundDoorBellRegisterOffset =	0x40,
+  DAC960_PD_OutboundDoorBellRegisterOffset =	0x41,
+  DAC960_PD_InterruptEnableRegisterOffset =	0x43
+}
+DAC960_PD_RegisterOffsets_T;
+
+
+/*
+  Define the structure of the DAC960 PD Series Inbound Door Bell Register.
+*/
+
+typedef union DAC960_PD_InboundDoorBellRegister
+{
+  unsigned char All;
+  struct {
+    bool NewCommand:1;					/* Bit 0 */
+    bool AcknowledgeStatus:1;				/* Bit 1 */
+    bool GenerateInterrupt:1;				/* Bit 2 */
+    bool ControllerReset:1;				/* Bit 3 */
+    unsigned char :4;					/* Bits 4-7 */
+  } Write;
+  struct {
+    bool MailboxFull:1;					/* Bit 0 */
+    bool InitializationInProgress:1;			/* Bit 1 */
+    unsigned char :6;					/* Bits 2-7 */
+  } Read;
+}
+DAC960_PD_InboundDoorBellRegister_T;
+
+
+/*
+  Define the structure of the DAC960 PD Series Outbound Door Bell Register.
+*/
+
+typedef union DAC960_PD_OutboundDoorBellRegister
+{
+  unsigned char All;
+  struct {
+    bool AcknowledgeInterrupt:1;			/* Bit 0 */
+    unsigned char :7;					/* Bits 1-7 */
+  } Write;
+  struct {
+    bool StatusAvailable:1;				/* Bit 0 */
+    unsigned char :7;					/* Bits 1-7 */
+  } Read;
+}
+DAC960_PD_OutboundDoorBellRegister_T;
+
+
+/*
+  Define the structure of the DAC960 PD Series Interrupt Enable Register.
+*/
+
+typedef union DAC960_PD_InterruptEnableRegister
+{
+  unsigned char All;
+  struct {
+    bool EnableInterrupts:1;				/* Bit 0 */
+    unsigned char :7;					/* Bits 1-7 */
+  } Bits;
+}
+DAC960_PD_InterruptEnableRegister_T;
+
+
+/*
+  Define the structure of the DAC960 PD Series Error Status Register.
+*/
+
+typedef union DAC960_PD_ErrorStatusRegister
+{
+  unsigned char All;
+  struct {
+    unsigned int :2;					/* Bits 0-1 */
+    bool ErrorStatusPending:1;				/* Bit 2 */
+    unsigned int :5;					/* Bits 3-7 */
+  } Bits;
+}
+DAC960_PD_ErrorStatusRegister_T;
+
+
+/*
+  Define inline functions to provide an abstraction for reading and writing the
+  DAC960 PD Series Controller Interface Registers.
+*/
+
+static inline
+void DAC960_PD_NewCommand(void __iomem *ControllerBaseAddress)
+{
+  DAC960_PD_InboundDoorBellRegister_T InboundDoorBellRegister;
+  InboundDoorBellRegister.All = 0;
+  InboundDoorBellRegister.Write.NewCommand = true;
+  writeb(InboundDoorBellRegister.All,
+	 ControllerBaseAddress + DAC960_PD_InboundDoorBellRegisterOffset);
+}
+
+static inline
+void DAC960_PD_AcknowledgeStatus(void __iomem *ControllerBaseAddress)
+{
+  DAC960_PD_InboundDoorBellRegister_T InboundDoorBellRegister;
+  InboundDoorBellRegister.All = 0;
+  InboundDoorBellRegister.Write.AcknowledgeStatus = true;
+  writeb(InboundDoorBellRegister.All,
+	 ControllerBaseAddress + DAC960_PD_InboundDoorBellRegisterOffset);
+}
+
+static inline
+void DAC960_PD_GenerateInterrupt(void __iomem *ControllerBaseAddress)
+{
+  DAC960_PD_InboundDoorBellRegister_T InboundDoorBellRegister;
+  InboundDoorBellRegister.All = 0;
+  InboundDoorBellRegister.Write.GenerateInterrupt = true;
+  writeb(InboundDoorBellRegister.All,
+	 ControllerBaseAddress + DAC960_PD_InboundDoorBellRegisterOffset);
+}
+
+static inline
+void DAC960_PD_ControllerReset(void __iomem *ControllerBaseAddress)
+{
+  DAC960_PD_InboundDoorBellRegister_T InboundDoorBellRegister;
+  InboundDoorBellRegister.All = 0;
+  InboundDoorBellRegister.Write.ControllerReset = true;
+  writeb(InboundDoorBellRegister.All,
+	 ControllerBaseAddress + DAC960_PD_InboundDoorBellRegisterOffset);
+}
+
+static inline
+bool DAC960_PD_MailboxFullP(void __iomem *ControllerBaseAddress)
+{
+  DAC960_PD_InboundDoorBellRegister_T InboundDoorBellRegister;
+  InboundDoorBellRegister.All =
+    readb(ControllerBaseAddress + DAC960_PD_InboundDoorBellRegisterOffset);
+  return InboundDoorBellRegister.Read.MailboxFull;
+}
+
+static inline
+bool DAC960_PD_InitializationInProgressP(void __iomem *ControllerBaseAddress)
+{
+  DAC960_PD_InboundDoorBellRegister_T InboundDoorBellRegister;
+  InboundDoorBellRegister.All =
+    readb(ControllerBaseAddress + DAC960_PD_InboundDoorBellRegisterOffset);
+  return InboundDoorBellRegister.Read.InitializationInProgress;
+}
+
+static inline
+void DAC960_PD_AcknowledgeInterrupt(void __iomem *ControllerBaseAddress)
+{
+  DAC960_PD_OutboundDoorBellRegister_T OutboundDoorBellRegister;
+  OutboundDoorBellRegister.All = 0;
+  OutboundDoorBellRegister.Write.AcknowledgeInterrupt = true;
+  writeb(OutboundDoorBellRegister.All,
+	 ControllerBaseAddress + DAC960_PD_OutboundDoorBellRegisterOffset);
+}
+
+static inline
+bool DAC960_PD_StatusAvailableP(void __iomem *ControllerBaseAddress)
+{
+  DAC960_PD_OutboundDoorBellRegister_T OutboundDoorBellRegister;
+  OutboundDoorBellRegister.All =
+    readb(ControllerBaseAddress + DAC960_PD_OutboundDoorBellRegisterOffset);
+  return OutboundDoorBellRegister.Read.StatusAvailable;
+}
+
+static inline
+void DAC960_PD_EnableInterrupts(void __iomem *ControllerBaseAddress)
+{
+  DAC960_PD_InterruptEnableRegister_T InterruptEnableRegister;
+  InterruptEnableRegister.All = 0;
+  InterruptEnableRegister.Bits.EnableInterrupts = true;
+  writeb(InterruptEnableRegister.All,
+	 ControllerBaseAddress + DAC960_PD_InterruptEnableRegisterOffset);
+}
+
+static inline
+void DAC960_PD_DisableInterrupts(void __iomem *ControllerBaseAddress)
+{
+  DAC960_PD_InterruptEnableRegister_T InterruptEnableRegister;
+  InterruptEnableRegister.All = 0;
+  InterruptEnableRegister.Bits.EnableInterrupts = false;
+  writeb(InterruptEnableRegister.All,
+	 ControllerBaseAddress + DAC960_PD_InterruptEnableRegisterOffset);
+}
+
+static inline
+bool DAC960_PD_InterruptsEnabledP(void __iomem *ControllerBaseAddress)
+{
+  DAC960_PD_InterruptEnableRegister_T InterruptEnableRegister;
+  InterruptEnableRegister.All =
+    readb(ControllerBaseAddress + DAC960_PD_InterruptEnableRegisterOffset);
+  return InterruptEnableRegister.Bits.EnableInterrupts;
+}
+
+static inline
+void DAC960_PD_WriteCommandMailbox(void __iomem *ControllerBaseAddress,
+				   DAC960_V1_CommandMailbox_T *CommandMailbox)
+{
+  writel(CommandMailbox->Words[0],
+	 ControllerBaseAddress + DAC960_PD_CommandOpcodeRegisterOffset);
+  writel(CommandMailbox->Words[1],
+	 ControllerBaseAddress + DAC960_PD_MailboxRegister4Offset);
+  writel(CommandMailbox->Words[2],
+	 ControllerBaseAddress + DAC960_PD_MailboxRegister8Offset);
+  writeb(CommandMailbox->Bytes[12],
+	 ControllerBaseAddress + DAC960_PD_MailboxRegister12Offset);
+}
+
+static inline DAC960_V1_CommandIdentifier_T
+DAC960_PD_ReadStatusCommandIdentifier(void __iomem *ControllerBaseAddress)
+{
+  return readb(ControllerBaseAddress
+	       + DAC960_PD_StatusCommandIdentifierRegOffset);
+}
+
+static inline DAC960_V1_CommandStatus_T
+DAC960_PD_ReadStatusRegister(void __iomem *ControllerBaseAddress)
+{
+  return readw(ControllerBaseAddress + DAC960_PD_StatusRegisterOffset);
+}
+
+static inline bool
+DAC960_PD_ReadErrorStatus(void __iomem *ControllerBaseAddress,
+			  unsigned char *ErrorStatus,
+			  unsigned char *Parameter0,
+			  unsigned char *Parameter1)
+{
+  DAC960_PD_ErrorStatusRegister_T ErrorStatusRegister;
+  ErrorStatusRegister.All =
+    readb(ControllerBaseAddress + DAC960_PD_ErrorStatusRegisterOffset);
+  if (!ErrorStatusRegister.Bits.ErrorStatusPending) return false;
+  ErrorStatusRegister.Bits.ErrorStatusPending = false;
+  *ErrorStatus = ErrorStatusRegister.All;
+  *Parameter0 =
+    readb(ControllerBaseAddress + DAC960_PD_CommandOpcodeRegisterOffset);
+  *Parameter1 =
+    readb(ControllerBaseAddress + DAC960_PD_CommandIdentifierRegisterOffset);
+  writeb(0, ControllerBaseAddress + DAC960_PD_ErrorStatusRegisterOffset);
+  return true;
+}
+
+static inline void DAC960_P_To_PD_TranslateEnquiry(void *Enquiry)
+{
+  memcpy(Enquiry + 132, Enquiry + 36, 64);
+  memset(Enquiry + 36, 0, 96);
+}
+
+static inline void DAC960_P_To_PD_TranslateDeviceState(void *DeviceState)
+{
+  memcpy(DeviceState + 2, DeviceState + 3, 1);
+  memmove(DeviceState + 4, DeviceState + 5, 2);
+  memmove(DeviceState + 6, DeviceState + 8, 4);
+}
+
+static inline
+void DAC960_PD_To_P_TranslateReadWriteCommand(DAC960_V1_CommandMailbox_T
+					      *CommandMailbox)
+{
+  int LogicalDriveNumber = CommandMailbox->Type5.LD.LogicalDriveNumber;
+  CommandMailbox->Bytes[3] &= 0x7;
+  CommandMailbox->Bytes[3] |= CommandMailbox->Bytes[7] << 6;
+  CommandMailbox->Bytes[7] = LogicalDriveNumber;
+}
+
+static inline
+void DAC960_P_To_PD_TranslateReadWriteCommand(DAC960_V1_CommandMailbox_T
+					      *CommandMailbox)
+{
+  int LogicalDriveNumber = CommandMailbox->Bytes[7];
+  CommandMailbox->Bytes[7] = CommandMailbox->Bytes[3] >> 6;
+  CommandMailbox->Bytes[3] &= 0x7;
+  CommandMailbox->Bytes[3] |= LogicalDriveNumber << 3;
+}
+
+
+/*
+  Define prototypes for the forward referenced DAC960 Driver Internal Functions.
+*/
+
+static void DAC960_FinalizeController(DAC960_Controller_T *);
+static void DAC960_V1_QueueReadWriteCommand(DAC960_Command_T *);
+static void DAC960_V2_QueueReadWriteCommand(DAC960_Command_T *); 
+static void DAC960_RequestFunction(struct request_queue *);
+static irqreturn_t DAC960_BA_InterruptHandler(int, void *);
+static irqreturn_t DAC960_LP_InterruptHandler(int, void *);
+static irqreturn_t DAC960_LA_InterruptHandler(int, void *);
+static irqreturn_t DAC960_PG_InterruptHandler(int, void *);
+static irqreturn_t DAC960_PD_InterruptHandler(int, void *);
+static irqreturn_t DAC960_P_InterruptHandler(int, void *);
+static void DAC960_V1_QueueMonitoringCommand(DAC960_Command_T *);
+static void DAC960_V2_QueueMonitoringCommand(DAC960_Command_T *);
+static void DAC960_MonitoringTimerFunction(unsigned long);
+static void DAC960_Message(DAC960_MessageLevel_T, unsigned char *,
+			   DAC960_Controller_T *, ...);
+static void DAC960_CreateProcEntries(DAC960_Controller_T *);
+static void DAC960_DestroyProcEntries(DAC960_Controller_T *);
+
+#endif /* DAC960_DriverVersion */
diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig
new file mode 100644
index 00000000..717d6e4e
--- /dev/null
+++ b/drivers/block/Kconfig
@@ -0,0 +1,530 @@
+#
+# Block device driver configuration
+#
+
+menuconfig BLK_DEV
+	bool "Block devices"
+	depends on BLOCK
+	default y
+	---help---
+	  Say Y here to get to see options for various different block device
+	  drivers. This option alone does not add any kernel code.
+
+	  If you say N, all options in this submenu will be skipped and disabled;
+	  only do this if you know what you are doing.
+
+if BLK_DEV
+
+config BLK_DEV_FD
+	tristate "Normal floppy disk support"
+	depends on ARCH_MAY_HAVE_PC_FDC
+	---help---
+	  If you want to use the floppy disk drive(s) of your PC under Linux,
+	  say Y. Information about this driver, especially important for IBM
+	  Thinkpad users, is contained in
+	  <file:Documentation/blockdev/floppy.txt>.
+	  That file also contains the location of the Floppy driver FAQ as
+	  well as location of the fdutils package used to configure additional
+	  parameters of the driver at run time.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called floppy.
+
+config AMIGA_FLOPPY
+	tristate "Amiga floppy support"
+	depends on AMIGA
+
+config ATARI_FLOPPY
+	tristate "Atari floppy support"
+	depends on ATARI
+
+config MAC_FLOPPY
+	tristate "Support for PowerMac floppy"
+	depends on PPC_PMAC && !PPC_PMAC64
+	help
+	  If you have a SWIM-3 (Super Woz Integrated Machine 3; from Apple)
+	  floppy controller, say Y here. Most commonly found in PowerMacs.
+
+config BLK_DEV_SWIM
+	tristate "Support for SWIM Macintosh floppy"
+	depends on M68K && MAC
+	help
+	  You should select this option if you want floppy support
+	  and you don't have a II, IIfx, Q900, Q950 or AV series.
+
+config AMIGA_Z2RAM
+	tristate "Amiga Zorro II ramdisk support"
+	depends on ZORRO
+	help
+	  This enables support for using Chip RAM and Zorro II RAM as a
+	  ramdisk or as a swap partition. Say Y if you want to include this
+	  driver in the kernel.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called z2ram.
+
+config BLK_DEV_XD
+	tristate "XT hard disk support"
+	depends on ISA && ISA_DMA_API
+	select CHECK_SIGNATURE
+	help
+	  Very old 8 bit hard disk controllers used in the IBM XT computer
+	  will be supported if you say Y here.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called xd.
+
+	  It's pretty unlikely that you have one of these: say N.
+
+config GDROM
+	tristate "SEGA Dreamcast GD-ROM drive"
+	depends on SH_DREAMCAST
+	help
+	  A standard SEGA Dreamcast comes with a modified CD ROM drive called a
+	  "GD-ROM" by SEGA to signify it is capable of reading special disks
+	  with up to 1 GB of data. This drive will also read standard CD ROM
+	  disks. Select this option to access any disks in your GD ROM drive.
+	  Most users will want to say "Y" here.
+	  You can also build this as a module which will be called gdrom.
+
+config PARIDE
+	tristate "Parallel port IDE device support"
+	depends on PARPORT_PC
+	---help---
+	  There are many external CD-ROM and disk devices that connect through
+	  your computer's parallel port. Most of them are actually IDE devices
+	  using a parallel port IDE adapter. This option enables the PARIDE
+	  subsystem which contains drivers for many of these external drives.
+	  Read <file:Documentation/blockdev/paride.txt> for more information.
+
+	  If you have said Y to the "Parallel-port support" configuration
+	  option, you may share a single port between your printer and other
+	  parallel port devices. Answer Y to build PARIDE support into your
+	  kernel, or M if you would like to build it as a loadable module. If
+	  your parallel port support is in a loadable module, you must build
+	  PARIDE as a module. If you built PARIDE support into your kernel,
+	  you may still build the individual protocol modules and high-level
+	  drivers as loadable modules. If you build this support as a module,
+	  it will be called paride.
+
+	  To use the PARIDE support, you must say Y or M here and also to at
+	  least one high-level driver (e.g. "Parallel port IDE disks",
+	  "Parallel port ATAPI CD-ROMs", "Parallel port ATAPI disks" etc.) and
+	  to at least one protocol driver (e.g. "ATEN EH-100 protocol",
+	  "MicroSolutions backpack protocol", "DataStor Commuter protocol"
+	  etc.).
+
+source "drivers/block/paride/Kconfig"
+
+config BLK_CPQ_DA
+	tristate "Compaq SMART2 support"
+	depends on PCI && VIRT_TO_BUS
+	help
+	  This is the driver for Compaq Smart Array controllers.  Everyone
+	  using these boards should say Y here.  See the file
+	  <file:Documentation/blockdev/cpqarray.txt> for the current list of
+	  boards supported by this driver, and for further information on the
+	  use of this driver.
+
+config BLK_CPQ_CISS_DA
+	tristate "Compaq Smart Array 5xxx support"
+	depends on PCI
+	help
+	  This is the driver for Compaq Smart Array 5xxx controllers.
+	  Everyone using these boards should say Y here.
+	  See <file:Documentation/blockdev/cciss.txt> for the current list of
+	  boards supported by this driver, and for further information
+	  on the use of this driver.
+
+config CISS_SCSI_TAPE
+	bool "SCSI tape drive support for Smart Array 5xxx"
+	depends on BLK_CPQ_CISS_DA && PROC_FS
+	depends on SCSI=y || SCSI=BLK_CPQ_CISS_DA
+	help
+	  When enabled (Y), this option allows SCSI tape drives and SCSI medium
+	  changers (tape robots) to be accessed via a Compaq 5xxx array 
+	  controller.  (See <file:Documentation/blockdev/cciss.txt> for more details.)
+
+	  "SCSI support" and "SCSI tape support" must also be enabled for this 
+	  option to work.
+
+	  When this option is disabled (N), the SCSI portion of the driver 
+	  is not compiled.
+
+config BLK_DEV_DAC960
+	tristate "Mylex DAC960/DAC1100 PCI RAID Controller support"
+	depends on PCI
+	help
+	  This driver adds support for the Mylex DAC960, AcceleRAID, and
+	  eXtremeRAID PCI RAID controllers.  See the file
+	  <file:Documentation/blockdev/README.DAC960> for further information
+	  about this driver.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called DAC960.
+
+config BLK_DEV_UMEM
+	tristate "Micro Memory MM5415 Battery Backed RAM support (EXPERIMENTAL)"
+	depends on PCI && EXPERIMENTAL
+	---help---
+	  Saying Y here will include support for the MM5415 family of
+	  battery backed (Non-volatile) RAM cards.
+	  <http://www.umem.com/>
+
+	  The cards appear as block devices that can be partitioned into
+	  as many as 15 partitions.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called umem.
+
+	  The umem driver has not yet been allocated a MAJOR number, so
+	  one is chosen dynamically.
+
+config BLK_DEV_UBD
+	bool "Virtual block device"
+	depends on UML
+	---help---
+          The User-Mode Linux port includes a driver called UBD which will let
+          you access arbitrary files on the host computer as block devices.
+          Unless you know that you do not need such virtual block devices say
+          Y here.
+
+config BLK_DEV_UBD_SYNC
+	bool "Always do synchronous disk IO for UBD"
+	depends on BLK_DEV_UBD
+	---help---
+	  Writes to the virtual block device are not immediately written to the
+	  host's disk; this may cause problems if, for example, the User-Mode
+	  Linux 'Virtual Machine' uses a journalling filesystem and the host
+	  computer crashes.
+
+          Synchronous operation (i.e. always writing data to the host's disk
+          immediately) is configurable on a per-UBD basis by using a special
+          kernel command line option.  Alternatively, you can say Y here to
+          turn on synchronous operation by default for all block devices.
+
+          If you're running a journalling file system (like reiserfs, for
+          example) in your virtual machine, you will want to say Y here.  If
+          you care for the safety of the data in your virtual machine, Y is a
+          wise choice too.  In all other cases (for example, if you're just
+          playing around with User-Mode Linux) you can choose N.
+
+config BLK_DEV_COW_COMMON
+	bool
+	default BLK_DEV_UBD
+
+config BLK_DEV_LOOP
+	tristate "Loopback device support"
+	---help---
+	  Saying Y here will allow you to use a regular file as a block
+	  device; you can then create a file system on that block device and
+	  mount it just as you would mount other block devices such as hard
+	  drive partitions, CD-ROM drives or floppy drives. The loop devices
+	  are block special device files with major number 7 and typically
+	  called /dev/loop0, /dev/loop1 etc.
+
+	  This is useful if you want to check an ISO 9660 file system before
+	  burning the CD, or if you want to use floppy images without first
+	  writing them to floppy. Furthermore, some Linux distributions avoid
+	  the need for a dedicated Linux partition by keeping their complete
+	  root file system inside a DOS FAT file using this loop device
+	  driver.
+
+	  To use the loop device, you need the losetup utility, found in the
+	  util-linux package, see
+	  <ftp://ftp.kernel.org/pub/linux/utils/util-linux/>.
+
+	  The loop device driver can also be used to "hide" a file system in
+	  a disk partition, floppy, or regular file, either using encryption
+	  (scrambling the data) or steganography (hiding the data in the low
+	  bits of, say, a sound file). This is also safe if the file resides
+	  on a remote file server.
+
+	  There are several ways of encrypting disks. Some of these require
+	  kernel patches. The vanilla kernel offers the cryptoloop option
+	  and a Device Mapper target (which is superior, as it supports all
+	  file systems). If you want to use the cryptoloop, say Y to both
+	  LOOP and CRYPTOLOOP, and make sure you have a recent (version 2.12
+	  or later) version of util-linux. Additionally, be aware that
+	  the cryptoloop is not safe for storing journaled filesystems.
+
+	  Note that this loop device has nothing to do with the loopback
+	  device used for network connections from the machine to itself.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called loop.
+
+	  Most users will answer N here.
+
+config BLK_DEV_CRYPTOLOOP
+	tristate "Cryptoloop Support"
+	select CRYPTO
+	select CRYPTO_CBC
+	depends on BLK_DEV_LOOP
+	---help---
+	  Say Y here if you want to be able to use the ciphers that are 
+	  provided by the CryptoAPI as loop transformation. This might be
+	  used as hard disk encryption.
+
+	  WARNING: This device is not safe for journaled file systems like
+	  ext3 or Reiserfs. Please use the Device Mapper crypto module
+	  instead, which can be configured to be on-disk compatible with the
+	  cryptoloop device.
+
+source "drivers/block/drbd/Kconfig"
+
+config BLK_DEV_NBD
+	tristate "Network block device support"
+	depends on NET
+	---help---
+	  Saying Y here will allow your computer to be a client for network
+	  block devices, i.e. it will be able to use block devices exported by
+	  servers (mount file systems on them etc.). Communication between
+	  client and server works over TCP/IP networking, but to the client
+	  program this is hidden: it looks like a regular local file access to
+	  a block device special file such as /dev/nd0.
+
+	  Network block devices also allows you to run a block-device in
+	  userland (making server and client physically the same computer,
+	  communicating using the loopback network device).
+
+	  Read <file:Documentation/blockdev/nbd.txt> for more information,
+	  especially about where to find the server code, which runs in user
+	  space and does not need special kernel support.
+
+	  Note that this has nothing to do with the network file systems NFS
+	  or Coda; you can say N here even if you intend to use NFS or Coda.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called nbd.
+
+	  If unsure, say N.
+
+config BLK_DEV_OSD
+	tristate "OSD object-as-blkdev support"
+	depends on SCSI_OSD_ULD
+	---help---
+	  Saying Y or M here will allow the exporting of a single SCSI
+	  OSD (object-based storage) object as a Linux block device.
+
+	  For example, if you create a 2G object on an OSD device,
+	  you can then use this module to present that 2G object as
+	  a Linux block device.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called osdblk.
+
+	  If unsure, say N.
+
+config BLK_DEV_SX8
+	tristate "Promise SATA SX8 support"
+	depends on PCI
+	---help---
+	  Saying Y or M here will enable support for the 
+	  Promise SATA SX8 controllers.
+
+	  Use devices /dev/sx8/$N and /dev/sx8/$Np$M.
+
+config BLK_DEV_UB
+	tristate "Low Performance USB Block driver"
+	depends on USB
+	help
+	  This driver supports certain USB attached storage devices
+	  such as flash keys.
+
+	  If you enable this driver, it is recommended to avoid conflicts
+	  with usb-storage by enabling USB_LIBUSUAL.
+
+	  If unsure, say N.
+
+config BLK_DEV_RAM
+	tristate "RAM block device support"
+	---help---
+	  Saying Y here will allow you to use a portion of your RAM memory as
+	  a block device, so that you can make file systems on it, read and
+	  write to it and do all the other things that you can do with normal
+	  block devices (such as hard drives). It is usually used to load and
+	  store a copy of a minimal root file system off of a floppy into RAM
+	  during the initial install of Linux.
+
+	  Note that the kernel command line option "ramdisk=XX" is now obsolete.
+	  For details, read <file:Documentation/blockdev/ramdisk.txt>.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called rd.
+
+	  Most normal users won't need the RAM disk functionality, and can
+	  thus say N here.
+
+config BLK_DEV_RAM_COUNT
+	int "Default number of RAM disks"
+	default "16"
+	depends on BLK_DEV_RAM
+	help
+	  The default value is 16 RAM disks. Change this if you know what you
+	  are doing. If you boot from a filesystem that needs to be extracted
+	  in memory, you will need at least one RAM disk (e.g. root on cramfs).
+
+config BLK_DEV_RAM_SIZE
+	int "Default RAM disk size (kbytes)"
+	depends on BLK_DEV_RAM
+	default "4096"
+	help
+	  The default value is 4096 kilobytes. Only change this if you know
+	  what you are doing.
+
+config BLK_DEV_XIP
+	bool "Support XIP filesystems on RAM block device"
+	depends on BLK_DEV_RAM
+	default n
+	help
+	  Support XIP filesystems (such as ext2 with XIP support on) on
+	  top of block ram device. This will slightly enlarge the kernel, and
+	  will prevent RAM block device backing store memory from being
+	  allocated from highmem (only a problem for highmem systems).
+
+config CDROM_PKTCDVD
+	tristate "Packet writing on CD/DVD media"
+	depends on !UML
+	help
+	  If you have a CDROM/DVD drive that supports packet writing, say
+	  Y to include support. It should work with any MMC/Mt Fuji
+	  compliant ATAPI or SCSI drive, which is just about any newer
+	  DVD/CD writer.
+
+	  Currently only writing to CD-RW, DVD-RW, DVD+RW and DVDRAM discs
+	  is possible.
+	  DVD-RW disks must be in restricted overwrite mode.
+
+	  See the file <file:Documentation/cdrom/packet-writing.txt>
+	  for further information on the use of this driver.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called pktcdvd.
+
+config CDROM_PKTCDVD_BUFFERS
+	int "Free buffers for data gathering"
+	depends on CDROM_PKTCDVD
+	default "8"
+	help
+	  This controls the maximum number of active concurrent packets. More
+	  concurrent packets can increase write performance, but also require
+	  more memory. Each concurrent packet will require approximately 64Kb
+	  of non-swappable kernel memory, memory which will be allocated when
+	  a disc is opened for writing.
+
+config CDROM_PKTCDVD_WCACHE
+	bool "Enable write caching (EXPERIMENTAL)"
+	depends on CDROM_PKTCDVD && EXPERIMENTAL
+	help
+	  If enabled, write caching will be set for the CD-R/W device. For now
+	  this option is dangerous unless the CD-RW media is known good, as we
+	  don't do deferred write error handling yet.
+
+config ATA_OVER_ETH
+	tristate "ATA over Ethernet support"
+	depends on NET
+	help
+	This driver provides Support for ATA over Ethernet block
+	devices like the Coraid EtherDrive (R) Storage Blade.
+
+config MG_DISK
+	tristate "mGine mflash, gflash support"
+	depends on ARM && GPIOLIB
+	help
+	  mGine mFlash(gFlash) block device driver
+
+config MG_DISK_RES
+	int "Size of reserved area before MBR"
+	depends on MG_DISK
+	default 0
+	help
+	  Define size of reserved area that usually used for boot. Unit is KB.
+	  All of the block device operation will be taken this value as start
+	  offset
+	  Examples:
+			1024 => 1 MB
+
+config SUNVDC
+	tristate "Sun Virtual Disk Client support"
+	depends on SUN_LDOMS
+	help
+	  Support for virtual disk devices as a client under Sun
+	  Logical Domains.
+
+source "drivers/s390/block/Kconfig"
+
+config XILINX_SYSACE
+	tristate "Xilinx SystemACE support"
+	depends on 4xx || MICROBLAZE
+	help
+	  Include support for the Xilinx SystemACE CompactFlash interface
+
+config XEN_BLKDEV_FRONTEND
+	tristate "Xen virtual block device support"
+	depends on XEN
+	default y
+	select XEN_XENBUS_FRONTEND
+	help
+	  This driver implements the front-end of the Xen virtual
+	  block device driver.  It communicates with a back-end driver
+	  in another domain which drives the actual block device.
+
+config XEN_BLKDEV_BACKEND
+	tristate "Block-device backend driver"
+	depends on XEN_BACKEND
+	help
+	  The block-device backend driver allows the kernel to export its
+	  block devices to other guests via a high-performance shared-memory
+	  interface.
+
+	  The corresponding Linux frontend driver is enabled by the
+	  CONFIG_XEN_BLKDEV_FRONTEND configuration option.
+
+	  The backend driver attaches itself to a any block device specified
+	  in the XenBus configuration. There are no limits to what the block
+	  device as long as it has a major and minor.
+
+	  If you are compiling a kernel to run in a Xen block backend driver
+	  domain (often this is domain 0) you should say Y here. To
+	  compile this driver as a module, chose M here: the module
+	  will be called xen-blkback.
+
+
+config VIRTIO_BLK
+	tristate "Virtio block driver (EXPERIMENTAL)"
+	depends on EXPERIMENTAL && VIRTIO
+	---help---
+	  This is the virtual block driver for virtio.  It can be used with
+          lguest or QEMU based VMMs (like KVM or Xen).  Say Y or M.
+
+config BLK_DEV_HD
+	bool "Very old hard disk (MFM/RLL/IDE) driver"
+	depends on HAVE_IDE
+	depends on !ARM || ARCH_RPC || ARCH_SHARK || BROKEN
+	help
+	  This is a very old hard disk driver that lacks the enhanced
+	  functionality of the newer ones.
+
+	  It is required for systems with ancient MFM/RLL/ESDI drives.
+
+	  If unsure, say N.
+
+config BLK_DEV_RBD
+	tristate "Rados block device (RBD)"
+	depends on INET && EXPERIMENTAL && BLOCK
+	select CEPH_LIB
+	select LIBCRC32C
+	select CRYPTO_AES
+	select CRYPTO
+	default n
+	help
+	  Say Y here if you want include the Rados block device, which stripes
+	  a block device over objects stored in the Ceph distributed object
+	  store.
+
+	  More information at http://ceph.newdream.net/.
+
+	  If unsure, say N.
+
+endif # BLK_DEV
diff --git a/drivers/block/Makefile b/drivers/block/Makefile
new file mode 100644
index 00000000..76646e9a
--- /dev/null
+++ b/drivers/block/Makefile
@@ -0,0 +1,43 @@
+#
+# Makefile for the kernel block device drivers.
+#
+# 12 June 2000, Christoph Hellwig <hch@infradead.org>
+# Rewritten to use lists instead of if-statements.
+# 
+
+obj-$(CONFIG_MAC_FLOPPY)	+= swim3.o
+obj-$(CONFIG_BLK_DEV_SWIM)	+= swim_mod.o
+obj-$(CONFIG_BLK_DEV_FD)	+= floppy.o
+obj-$(CONFIG_AMIGA_FLOPPY)	+= amiflop.o
+obj-$(CONFIG_PS3_DISK)		+= ps3disk.o
+obj-$(CONFIG_PS3_VRAM)		+= ps3vram.o
+obj-$(CONFIG_ATARI_FLOPPY)	+= ataflop.o
+obj-$(CONFIG_AMIGA_Z2RAM)	+= z2ram.o
+obj-$(CONFIG_BLK_DEV_RAM)	+= brd.o
+obj-$(CONFIG_BLK_DEV_LOOP)	+= loop.o
+obj-$(CONFIG_BLK_DEV_XD)	+= xd.o
+obj-$(CONFIG_BLK_CPQ_DA)	+= cpqarray.o
+obj-$(CONFIG_BLK_CPQ_CISS_DA)  += cciss.o
+obj-$(CONFIG_BLK_DEV_DAC960)	+= DAC960.o
+obj-$(CONFIG_XILINX_SYSACE)	+= xsysace.o
+obj-$(CONFIG_CDROM_PKTCDVD)	+= pktcdvd.o
+obj-$(CONFIG_MG_DISK)		+= mg_disk.o
+obj-$(CONFIG_SUNVDC)		+= sunvdc.o
+obj-$(CONFIG_BLK_DEV_OSD)	+= osdblk.o
+
+obj-$(CONFIG_BLK_DEV_UMEM)	+= umem.o
+obj-$(CONFIG_BLK_DEV_NBD)	+= nbd.o
+obj-$(CONFIG_BLK_DEV_CRYPTOLOOP) += cryptoloop.o
+obj-$(CONFIG_VIRTIO_BLK)	+= virtio_blk.o
+
+obj-$(CONFIG_VIODASD)		+= viodasd.o
+obj-$(CONFIG_BLK_DEV_SX8)	+= sx8.o
+obj-$(CONFIG_BLK_DEV_UB)	+= ub.o
+obj-$(CONFIG_BLK_DEV_HD)	+= hd.o
+
+obj-$(CONFIG_XEN_BLKDEV_FRONTEND)	+= xen-blkfront.o
+obj-$(CONFIG_XEN_BLKDEV_BACKEND)	+= xen-blkback/
+obj-$(CONFIG_BLK_DEV_DRBD)     += drbd/
+obj-$(CONFIG_BLK_DEV_RBD)     += rbd.o
+
+swim_mod-y	:= swim.o swim_asm.o
diff --git a/drivers/block/amiflop.c b/drivers/block/amiflop.c
new file mode 100644
index 00000000..8eba86bb
--- /dev/null
+++ b/drivers/block/amiflop.c
@@ -0,0 +1,1895 @@
+/*
+ *  linux/amiga/amiflop.c
+ *
+ *  Copyright (C) 1993  Greg Harp
+ *  Portions of this driver are based on code contributed by Brad Pepers
+ *  
+ *  revised 28.5.95 by Joerg Dorchain
+ *  - now no bugs(?) any more for both HD & DD
+ *  - added support for 40 Track 5.25" drives, 80-track hopefully behaves
+ *    like 3.5" dd (no way to test - are there any 5.25" drives out there
+ *    that work on an A4000?)
+ *  - wrote formatting routine (maybe dirty, but works)
+ *
+ *  june/july 1995 added ms-dos support by Joerg Dorchain
+ *  (portions based on messydos.device and various contributors)
+ *  - currently only 9 and 18 sector disks
+ *
+ *  - fixed a bug with the internal trackbuffer when using multiple 
+ *    disks the same time
+ *  - made formatting a bit safer
+ *  - added command line and machine based default for "silent" df0
+ *
+ *  december 1995 adapted for 1.2.13pl4 by Joerg Dorchain
+ *  - works but I think it's inefficient. (look in redo_fd_request)
+ *    But the changes were very efficient. (only three and a half lines)
+ *
+ *  january 1996 added special ioctl for tracking down read/write problems
+ *  - usage ioctl(d, RAW_TRACK, ptr); the raw track buffer (MFM-encoded data
+ *    is copied to area. (area should be large enough since no checking is
+ *    done - 30K is currently sufficient). return the actual size of the
+ *    trackbuffer
+ *  - replaced udelays() by a timer (CIAA timer B) for the waits 
+ *    needed for the disk mechanic.
+ *
+ *  february 1996 fixed error recovery and multiple disk access
+ *  - both got broken the first time I tampered with the driver :-(
+ *  - still not safe, but better than before
+ *
+ *  revised Marts 3rd, 1996 by Jes Sorensen for use in the 1.3.28 kernel.
+ *  - Minor changes to accept the kdev_t.
+ *  - Replaced some more udelays with ms_delays. Udelay is just a loop,
+ *    and so the delay will be different depending on the given
+ *    processor :-(
+ *  - The driver could use a major cleanup because of the new
+ *    major/minor handling that came with kdev_t. It seems to work for
+ *    the time being, but I can't guarantee that it will stay like
+ *    that when we start using 16 (24?) bit minors.
+ *
+ * restructured jan 1997 by Joerg Dorchain
+ * - Fixed Bug accessing multiple disks
+ * - some code cleanup
+ * - added trackbuffer for each drive to speed things up
+ * - fixed some race conditions (who finds the next may send it to me ;-)
+ */
+
+#include <linux/module.h>
+#include <linux/slab.h>
+
+#include <linux/fd.h>
+#include <linux/hdreg.h>
+#include <linux/delay.h>
+#include <linux/init.h>
+#include <linux/mutex.h>
+#include <linux/amifdreg.h>
+#include <linux/amifd.h>
+#include <linux/buffer_head.h>
+#include <linux/blkdev.h>
+#include <linux/elevator.h>
+#include <linux/interrupt.h>
+#include <linux/platform_device.h>
+
+#include <asm/setup.h>
+#include <asm/uaccess.h>
+#include <asm/amigahw.h>
+#include <asm/amigaints.h>
+#include <asm/irq.h>
+
+#undef DEBUG /* print _LOTS_ of infos */
+
+#define RAW_IOCTL
+#ifdef RAW_IOCTL
+#define IOCTL_RAW_TRACK 0x5254524B  /* 'RTRK' */
+#endif
+
+/*
+ *  Defines
+ */
+
+/*
+ *  Error codes
+ */
+#define FD_OK		0	/* operation succeeded */
+#define FD_ERROR	-1	/* general error (seek, read, write, etc) */
+#define FD_NOUNIT	1	/* unit does not exist */
+#define FD_UNITBUSY	2	/* unit already active */
+#define FD_NOTACTIVE	3	/* unit is not active */
+#define FD_NOTREADY	4	/* unit is not ready (motor not on/no disk) */
+
+#define MFM_NOSYNC	1
+#define MFM_HEADER	2
+#define MFM_DATA	3
+#define MFM_TRACK	4
+
+/*
+ *  Floppy ID values
+ */
+#define FD_NODRIVE	0x00000000  /* response when no unit is present */
+#define FD_DD_3 	0xffffffff  /* double-density 3.5" (880K) drive */
+#define FD_HD_3 	0x55555555  /* high-density 3.5" (1760K) drive */
+#define FD_DD_5 	0xaaaaaaaa  /* double-density 5.25" (440K) drive */
+
+static DEFINE_MUTEX(amiflop_mutex);
+static unsigned long int fd_def_df0 = FD_DD_3;     /* default for df0 if it doesn't identify */
+
+module_param(fd_def_df0, ulong, 0);
+MODULE_LICENSE("GPL");
+
+/*
+ *  Macros
+ */
+#define MOTOR_ON	(ciab.prb &= ~DSKMOTOR)
+#define MOTOR_OFF	(ciab.prb |= DSKMOTOR)
+#define SELECT(mask)    (ciab.prb &= ~mask)
+#define DESELECT(mask)  (ciab.prb |= mask)
+#define SELMASK(drive)  (1 << (3 + (drive & 3)))
+
+static struct fd_drive_type drive_types[] = {
+/*  code	name	   tr he   rdsz   wrsz sm pc1 pc2 sd  st st*/
+/*  warning: times are now in milliseconds (ms)                    */
+{ FD_DD_3,	"DD 3.5",  80, 2, 14716, 13630, 1, 80,161, 3, 18, 1},
+{ FD_HD_3,	"HD 3.5",  80, 2, 28344, 27258, 2, 80,161, 3, 18, 1},
+{ FD_DD_5,	"DD 5.25", 40, 2, 14716, 13630, 1, 40, 81, 6, 30, 2},
+{ FD_NODRIVE, "No Drive", 0, 0,     0,     0, 0,  0,  0,  0,  0, 0}
+};
+static int num_dr_types = ARRAY_SIZE(drive_types);
+
+static int amiga_read(int), dos_read(int);
+static void amiga_write(int), dos_write(int);
+static struct fd_data_type data_types[] = {
+	{ "Amiga", 11 , amiga_read, amiga_write},
+	{ "MS-Dos", 9, dos_read, dos_write}
+};
+
+/* current info on each unit */
+static struct amiga_floppy_struct unit[FD_MAX_UNITS];
+
+static struct timer_list flush_track_timer[FD_MAX_UNITS];
+static struct timer_list post_write_timer;
+static struct timer_list motor_on_timer;
+static struct timer_list motor_off_timer[FD_MAX_UNITS];
+static int on_attempts;
+
+/* Synchronization of FDC access */
+/* request loop (trackbuffer) */
+static volatile int fdc_busy = -1;
+static volatile int fdc_nested;
+static DECLARE_WAIT_QUEUE_HEAD(fdc_wait);
+ 
+static DECLARE_COMPLETION(motor_on_completion);
+
+static volatile int selected = -1;	/* currently selected drive */
+
+static int writepending;
+static int writefromint;
+static char *raw_buf;
+static int fdc_queue;
+
+static DEFINE_SPINLOCK(amiflop_lock);
+
+#define RAW_BUF_SIZE 30000  /* size of raw disk data */
+
+/*
+ * These are global variables, as that's the easiest way to give
+ * information to interrupts. They are the data used for the current
+ * request.
+ */
+static volatile char block_flag;
+static DECLARE_WAIT_QUEUE_HEAD(wait_fd_block);
+
+/* MS-Dos MFM Coding tables (should go quick and easy) */
+static unsigned char mfmencode[16]={
+	0x2a, 0x29, 0x24, 0x25, 0x12, 0x11, 0x14, 0x15,
+	0x4a, 0x49, 0x44, 0x45, 0x52, 0x51, 0x54, 0x55
+};
+static unsigned char mfmdecode[128];
+
+/* floppy internal millisecond timer stuff */
+static DECLARE_COMPLETION(ms_wait_completion);
+#define MS_TICKS ((amiga_eclock+50)/1000)
+
+/*
+ * Note that MAX_ERRORS=X doesn't imply that we retry every bad read
+ * max X times - some types of errors increase the errorcount by 2 or
+ * even 3, so we might actually retry only X/2 times before giving up.
+ */
+#define MAX_ERRORS 12
+
+#define custom amiga_custom
+
+/* Prevent "aliased" accesses. */
+static int fd_ref[4] = { 0,0,0,0 };
+static int fd_device[4] = { 0, 0, 0, 0 };
+
+/*
+ * Here come the actual hardware access and helper functions.
+ * They are not reentrant and single threaded because all drives
+ * share the same hardware and the same trackbuffer.
+ */
+
+/* Milliseconds timer */
+
+static irqreturn_t ms_isr(int irq, void *dummy)
+{
+	complete(&ms_wait_completion);
+	return IRQ_HANDLED;
+}
+
+/* all waits are queued up 
+   A more generic routine would do a schedule a la timer.device */
+static void ms_delay(int ms)
+{
+	int ticks;
+	static DEFINE_MUTEX(mutex);
+
+	if (ms > 0) {
+		mutex_lock(&mutex);
+		ticks = MS_TICKS*ms-1;
+		ciaa.tblo=ticks%256;
+		ciaa.tbhi=ticks/256;
+		ciaa.crb=0x19; /*count eclock, force load, one-shoot, start */
+		wait_for_completion(&ms_wait_completion);
+		mutex_unlock(&mutex);
+	}
+}
+
+/* Hardware semaphore */
+
+/* returns true when we would get the semaphore */
+static inline int try_fdc(int drive)
+{
+	drive &= 3;
+	return ((fdc_busy < 0) || (fdc_busy == drive));
+}
+
+static void get_fdc(int drive)
+{
+	unsigned long flags;
+
+	drive &= 3;
+#ifdef DEBUG
+	printk("get_fdc: drive %d  fdc_busy %d  fdc_nested %d\n",drive,fdc_busy,fdc_nested);
+#endif
+	local_irq_save(flags);
+	wait_event(fdc_wait, try_fdc(drive));
+	fdc_busy = drive;
+	fdc_nested++;
+	local_irq_restore(flags);
+}
+
+static inline void rel_fdc(void)
+{
+#ifdef DEBUG
+	if (fdc_nested == 0)
+		printk("fd: unmatched rel_fdc\n");
+	printk("rel_fdc: fdc_busy %d fdc_nested %d\n",fdc_busy,fdc_nested);
+#endif
+	fdc_nested--;
+	if (fdc_nested == 0) {
+		fdc_busy = -1;
+		wake_up(&fdc_wait);
+	}
+}
+
+static void fd_select (int drive)
+{
+	unsigned char prb = ~0;
+
+	drive&=3;
+#ifdef DEBUG
+	printk("selecting %d\n",drive);
+#endif
+	if (drive == selected)
+		return;
+	get_fdc(drive);
+	selected = drive;
+
+	if (unit[drive].track % 2 != 0)
+		prb &= ~DSKSIDE;
+	if (unit[drive].motor == 1)
+		prb &= ~DSKMOTOR;
+	ciab.prb |= (SELMASK(0)|SELMASK(1)|SELMASK(2)|SELMASK(3));
+	ciab.prb = prb;
+	prb &= ~SELMASK(drive);
+	ciab.prb = prb;
+	rel_fdc();
+}
+
+static void fd_deselect (int drive)
+{
+	unsigned char prb;
+	unsigned long flags;
+
+	drive&=3;
+#ifdef DEBUG
+	printk("deselecting %d\n",drive);
+#endif
+	if (drive != selected) {
+		printk(KERN_WARNING "Deselecting drive %d while %d was selected!\n",drive,selected);
+		return;
+	}
+
+	get_fdc(drive);
+	local_irq_save(flags);
+
+	selected = -1;
+
+	prb = ciab.prb;
+	prb |= (SELMASK(0)|SELMASK(1)|SELMASK(2)|SELMASK(3));
+	ciab.prb = prb;
+
+	local_irq_restore (flags);
+	rel_fdc();
+
+}
+
+static void motor_on_callback(unsigned long nr)
+{
+	if (!(ciaa.pra & DSKRDY) || --on_attempts == 0) {
+		complete_all(&motor_on_completion);
+	} else {
+		motor_on_timer.expires = jiffies + HZ/10;
+		add_timer(&motor_on_timer);
+	}
+}
+
+static int fd_motor_on(int nr)
+{
+	nr &= 3;
+
+	del_timer(motor_off_timer + nr);
+
+	if (!unit[nr].motor) {
+		unit[nr].motor = 1;
+		fd_select(nr);
+
+		INIT_COMPLETION(motor_on_completion);
+		motor_on_timer.data = nr;
+		mod_timer(&motor_on_timer, jiffies + HZ/2);
+
+		on_attempts = 10;
+		wait_for_completion(&motor_on_completion);
+		fd_deselect(nr);
+	}
+
+	if (on_attempts == 0) {
+		on_attempts = -1;
+#if 0
+		printk (KERN_ERR "motor_on failed, turning motor off\n");
+		fd_motor_off (nr);
+		return 0;
+#else
+		printk (KERN_WARNING "DSKRDY not set after 1.5 seconds - assuming drive is spinning notwithstanding\n");
+#endif
+	}
+
+	return 1;
+}
+
+static void fd_motor_off(unsigned long drive)
+{
+	long calledfromint;
+#ifdef MODULE
+	long decusecount;
+
+	decusecount = drive & 0x40000000;
+#endif
+	calledfromint = drive & 0x80000000;
+	drive&=3;
+	if (calledfromint && !try_fdc(drive)) {
+		/* We would be blocked in an interrupt, so try again later */
+		motor_off_timer[drive].expires = jiffies + 1;
+		add_timer(motor_off_timer + drive);
+		return;
+	}
+	unit[drive].motor = 0;
+	fd_select(drive);
+	udelay (1);
+	fd_deselect(drive);
+}
+
+static void floppy_off (unsigned int nr)
+{
+	int drive;
+
+	drive = nr & 3;
+	/* called this way it is always from interrupt */
+	motor_off_timer[drive].data = nr | 0x80000000;
+	mod_timer(motor_off_timer + drive, jiffies + 3*HZ);
+}
+
+static int fd_calibrate(int drive)
+{
+	unsigned char prb;
+	int n;
+
+	drive &= 3;
+	get_fdc(drive);
+	if (!fd_motor_on (drive))
+		return 0;
+	fd_select (drive);
+	prb = ciab.prb;
+	prb |= DSKSIDE;
+	prb &= ~DSKDIREC;
+	ciab.prb = prb;
+	for (n = unit[drive].type->tracks/2; n != 0; --n) {
+		if (ciaa.pra & DSKTRACK0)
+			break;
+		prb &= ~DSKSTEP;
+		ciab.prb = prb;
+		prb |= DSKSTEP;
+		udelay (2);
+		ciab.prb = prb;
+		ms_delay(unit[drive].type->step_delay);
+	}
+	ms_delay (unit[drive].type->settle_time);
+	prb |= DSKDIREC;
+	n = unit[drive].type->tracks + 20;
+	for (;;) {
+		prb &= ~DSKSTEP;
+		ciab.prb = prb;
+		prb |= DSKSTEP;
+		udelay (2);
+		ciab.prb = prb;
+		ms_delay(unit[drive].type->step_delay + 1);
+		if ((ciaa.pra & DSKTRACK0) == 0)
+			break;
+		if (--n == 0) {
+			printk (KERN_ERR "fd%d: calibrate failed, turning motor off\n", drive);
+			fd_motor_off (drive);
+			unit[drive].track = -1;
+			rel_fdc();
+			return 0;
+		}
+	}
+	unit[drive].track = 0;
+	ms_delay(unit[drive].type->settle_time);
+
+	rel_fdc();
+	fd_deselect(drive);
+	return 1;
+}
+
+static int fd_seek(int drive, int track)
+{
+	unsigned char prb;
+	int cnt;
+
+#ifdef DEBUG
+	printk("seeking drive %d to track %d\n",drive,track);
+#endif
+	drive &= 3;
+	get_fdc(drive);
+	if (unit[drive].track == track) {
+		rel_fdc();
+		return 1;
+	}
+	if (!fd_motor_on(drive)) {
+		rel_fdc();
+		return 0;
+	}
+	if (unit[drive].track < 0 && !fd_calibrate(drive)) {
+		rel_fdc();
+		return 0;
+	}
+
+	fd_select (drive);
+	cnt = unit[drive].track/2 - track/2;
+	prb = ciab.prb;
+	prb |= DSKSIDE | DSKDIREC;
+	if (track % 2 != 0)
+		prb &= ~DSKSIDE;
+	if (cnt < 0) {
+		cnt = - cnt;
+		prb &= ~DSKDIREC;
+	}
+	ciab.prb = prb;
+	if (track % 2 != unit[drive].track % 2)
+		ms_delay (unit[drive].type->side_time);
+	unit[drive].track = track;
+	if (cnt == 0) {
+		rel_fdc();
+		fd_deselect(drive);
+		return 1;
+	}
+	do {
+		prb &= ~DSKSTEP;
+		ciab.prb = prb;
+		prb |= DSKSTEP;
+		udelay (1);
+		ciab.prb = prb;
+		ms_delay (unit[drive].type->step_delay);
+	} while (--cnt != 0);
+	ms_delay (unit[drive].type->settle_time);
+
+	rel_fdc();
+	fd_deselect(drive);
+	return 1;
+}
+
+static unsigned long fd_get_drive_id(int drive)
+{
+	int i;
+	ulong id = 0;
+
+  	drive&=3;
+  	get_fdc(drive);
+	/* set up for ID */
+	MOTOR_ON;
+	udelay(2);
+	SELECT(SELMASK(drive));
+	udelay(2);
+	DESELECT(SELMASK(drive));
+	udelay(2);
+	MOTOR_OFF;
+	udelay(2);
+	SELECT(SELMASK(drive));
+	udelay(2);
+	DESELECT(SELMASK(drive));
+	udelay(2);
+
+	/* loop and read disk ID */
+	for (i=0; i<32; i++) {
+		SELECT(SELMASK(drive));
+		udelay(2);
+
+		/* read and store value of DSKRDY */
+		id <<= 1;
+		id |= (ciaa.pra & DSKRDY) ? 0 : 1;	/* cia regs are low-active! */
+
+		DESELECT(SELMASK(drive));
+	}
+
+	rel_fdc();
+
+        /*
+         * RB: At least A500/A2000's df0: don't identify themselves.
+         * As every (real) Amiga has at least a 3.5" DD drive as df0:
+         * we default to that if df0: doesn't identify as a certain
+         * type.
+         */
+        if(drive == 0 && id == FD_NODRIVE)
+	{
+                id = fd_def_df0;
+                printk(KERN_NOTICE "fd: drive 0 didn't identify, setting default %08lx\n", (ulong)fd_def_df0);
+	}
+	/* return the ID value */
+	return (id);
+}
+
+static irqreturn_t fd_block_done(int irq, void *dummy)
+{
+	if (block_flag)
+		custom.dsklen = 0x4000;
+
+	if (block_flag == 2) { /* writing */
+		writepending = 2;
+		post_write_timer.expires = jiffies + 1; /* at least 2 ms */
+		post_write_timer.data = selected;
+		add_timer(&post_write_timer);
+	}
+	else {                /* reading */
+		block_flag = 0;
+		wake_up (&wait_fd_block);
+	}
+	return IRQ_HANDLED;
+}
+
+static void raw_read(int drive)
+{
+	drive&=3;
+	get_fdc(drive);
+	wait_event(wait_fd_block, !block_flag);
+	fd_select(drive);
+	/* setup adkcon bits correctly */
+	custom.adkcon = ADK_MSBSYNC;
+	custom.adkcon = ADK_SETCLR|ADK_WORDSYNC|ADK_FAST;
+
+	custom.dsksync = MFM_SYNC;
+
+	custom.dsklen = 0;
+	custom.dskptr = (u_char *)ZTWO_PADDR((u_char *)raw_buf);
+	custom.dsklen = unit[drive].type->read_size/sizeof(short) | DSKLEN_DMAEN;
+	custom.dsklen = unit[drive].type->read_size/sizeof(short) | DSKLEN_DMAEN;
+
+	block_flag = 1;
+
+	wait_event(wait_fd_block, !block_flag);
+
+	custom.dsklen = 0;
+	fd_deselect(drive);
+	rel_fdc();
+}
+
+static int raw_write(int drive)
+{
+	ushort adk;
+
+	drive&=3;
+	get_fdc(drive); /* corresponds to rel_fdc() in post_write() */
+	if ((ciaa.pra & DSKPROT) == 0) {
+		rel_fdc();
+		return 0;
+	}
+	wait_event(wait_fd_block, !block_flag);
+	fd_select(drive);
+	/* clear adkcon bits */
+	custom.adkcon = ADK_PRECOMP1|ADK_PRECOMP0|ADK_WORDSYNC|ADK_MSBSYNC;
+	/* set appropriate adkcon bits */
+	adk = ADK_SETCLR|ADK_FAST;
+	if ((ulong)unit[drive].track >= unit[drive].type->precomp2)
+		adk |= ADK_PRECOMP1;
+	else if ((ulong)unit[drive].track >= unit[drive].type->precomp1)
+		adk |= ADK_PRECOMP0;
+	custom.adkcon = adk;
+
+	custom.dsklen = DSKLEN_WRITE;
+	custom.dskptr = (u_char *)ZTWO_PADDR((u_char *)raw_buf);
+	custom.dsklen = unit[drive].type->write_size/sizeof(short) | DSKLEN_DMAEN|DSKLEN_WRITE;
+	custom.dsklen = unit[drive].type->write_size/sizeof(short) | DSKLEN_DMAEN|DSKLEN_WRITE;
+
+	block_flag = 2;
+	return 1;
+}
+
+/*
+ * to be called at least 2ms after the write has finished but before any
+ * other access to the hardware.
+ */
+static void post_write (unsigned long drive)
+{
+#ifdef DEBUG
+	printk("post_write for drive %ld\n",drive);
+#endif
+	drive &= 3;
+	custom.dsklen = 0;
+	block_flag = 0;
+	writepending = 0;
+	writefromint = 0;
+	unit[drive].dirty = 0;
+	wake_up(&wait_fd_block);
+	fd_deselect(drive);
+	rel_fdc(); /* corresponds to get_fdc() in raw_write */
+}
+
+
+/*
+ * The following functions are to convert the block contents into raw data
+ * written to disk and vice versa.
+ * (Add other formats here ;-))
+ */
+
+static unsigned long scan_sync(unsigned long raw, unsigned long end)
+{
+	ushort *ptr = (ushort *)raw, *endp = (ushort *)end;
+
+	while (ptr < endp && *ptr++ != 0x4489)
+		;
+	if (ptr < endp) {
+		while (*ptr == 0x4489 && ptr < endp)
+			ptr++;
+		return (ulong)ptr;
+	}
+	return 0;
+}
+
+static inline unsigned long checksum(unsigned long *addr, int len)
+{
+	unsigned long csum = 0;
+
+	len /= sizeof(*addr);
+	while (len-- > 0)
+		csum ^= *addr++;
+	csum = ((csum>>1) & 0x55555555)  ^  (csum & 0x55555555);
+
+	return csum;
+}
+
+static unsigned long decode (unsigned long *data, unsigned long *raw,
+			     int len)
+{
+	ulong *odd, *even;
+
+	/* convert length from bytes to longwords */
+	len >>= 2;
+	odd = raw;
+	even = odd + len;
+
+	/* prepare return pointer */
+	raw += len * 2;
+
+	do {
+		*data++ = ((*odd++ & 0x55555555) << 1) | (*even++ & 0x55555555);
+	} while (--len != 0);
+
+	return (ulong)raw;
+}
+
+struct header {
+	unsigned char magic;
+	unsigned char track;
+	unsigned char sect;
+	unsigned char ord;
+	unsigned char labels[16];
+	unsigned long hdrchk;
+	unsigned long datachk;
+};
+
+static int amiga_read(int drive)
+{
+	unsigned long raw;
+	unsigned long end;
+	int scnt;
+	unsigned long csum;
+	struct header hdr;
+
+	drive&=3;
+	raw = (long) raw_buf;
+	end = raw + unit[drive].type->read_size;
+
+	for (scnt = 0;scnt < unit[drive].dtype->sects * unit[drive].type->sect_mult; scnt++) {
+		if (!(raw = scan_sync(raw, end))) {
+			printk (KERN_INFO "can't find sync for sector %d\n", scnt);
+			return MFM_NOSYNC;
+		}
+
+		raw = decode ((ulong *)&hdr.magic, (ulong *)raw, 4);
+		raw = decode ((ulong *)&hdr.labels, (ulong *)raw, 16);
+		raw = decode ((ulong *)&hdr.hdrchk, (ulong *)raw, 4);
+		raw = decode ((ulong *)&hdr.datachk, (ulong *)raw, 4);
+		csum = checksum((ulong *)&hdr,
+				(char *)&hdr.hdrchk-(char *)&hdr);
+
+#ifdef DEBUG
+		printk ("(%x,%d,%d,%d) (%lx,%lx,%lx,%lx) %lx %lx\n",
+			hdr.magic, hdr.track, hdr.sect, hdr.ord,
+			*(ulong *)&hdr.labels[0], *(ulong *)&hdr.labels[4],
+			*(ulong *)&hdr.labels[8], *(ulong *)&hdr.labels[12],
+			hdr.hdrchk, hdr.datachk);
+#endif
+
+		if (hdr.hdrchk != csum) {
+			printk(KERN_INFO "MFM_HEADER: %08lx,%08lx\n", hdr.hdrchk, csum);
+			return MFM_HEADER;
+		}
+
+		/* verify track */
+		if (hdr.track != unit[drive].track) {
+			printk(KERN_INFO "MFM_TRACK: %d, %d\n", hdr.track, unit[drive].track);
+			return MFM_TRACK;
+		}
+
+		raw = decode ((ulong *)(unit[drive].trackbuf + hdr.sect*512),
+			      (ulong *)raw, 512);
+		csum = checksum((ulong *)(unit[drive].trackbuf + hdr.sect*512), 512);
+
+		if (hdr.datachk != csum) {
+			printk(KERN_INFO "MFM_DATA: (%x:%d:%d:%d) sc=%d %lx, %lx\n",
+			       hdr.magic, hdr.track, hdr.sect, hdr.ord, scnt,
+			       hdr.datachk, csum);
+			printk (KERN_INFO "data=(%lx,%lx,%lx,%lx)\n",
+				((ulong *)(unit[drive].trackbuf+hdr.sect*512))[0],
+				((ulong *)(unit[drive].trackbuf+hdr.sect*512))[1],
+				((ulong *)(unit[drive].trackbuf+hdr.sect*512))[2],
+				((ulong *)(unit[drive].trackbuf+hdr.sect*512))[3]);
+			return MFM_DATA;
+		}
+	}
+
+	return 0;
+}
+
+static void encode(unsigned long data, unsigned long *dest)
+{
+	unsigned long data2;
+
+	data &= 0x55555555;
+	data2 = data ^ 0x55555555;
+	data |= ((data2 >> 1) | 0x80000000) & (data2 << 1);
+
+	if (*(dest - 1) & 0x00000001)
+		data &= 0x7FFFFFFF;
+
+	*dest = data;
+}
+
+static void encode_block(unsigned long *dest, unsigned long *src, int len)
+{
+	int cnt, to_cnt = 0;
+	unsigned long data;
+
+	/* odd bits */
+	for (cnt = 0; cnt < len / 4; cnt++) {
+		data = src[cnt] >> 1;
+		encode(data, dest + to_cnt++);
+	}
+
+	/* even bits */
+	for (cnt = 0; cnt < len / 4; cnt++) {
+		data = src[cnt];
+		encode(data, dest + to_cnt++);
+	}
+}
+
+static unsigned long *putsec(int disk, unsigned long *raw, int cnt)
+{
+	struct header hdr;
+	int i;
+
+	disk&=3;
+	*raw = (raw[-1]&1) ? 0x2AAAAAAA : 0xAAAAAAAA;
+	raw++;
+	*raw++ = 0x44894489;
+
+	hdr.magic = 0xFF;
+	hdr.track = unit[disk].track;
+	hdr.sect = cnt;
+	hdr.ord = unit[disk].dtype->sects * unit[disk].type->sect_mult - cnt;
+	for (i = 0; i < 16; i++)
+		hdr.labels[i] = 0;
+	hdr.hdrchk = checksum((ulong *)&hdr,
+			      (char *)&hdr.hdrchk-(char *)&hdr);
+	hdr.datachk = checksum((ulong *)(unit[disk].trackbuf+cnt*512), 512);
+
+	encode_block(raw, (ulong *)&hdr.magic, 4);
+	raw += 2;
+	encode_block(raw, (ulong *)&hdr.labels, 16);
+	raw += 8;
+	encode_block(raw, (ulong *)&hdr.hdrchk, 4);
+	raw += 2;
+	encode_block(raw, (ulong *)&hdr.datachk, 4);
+	raw += 2;
+	encode_block(raw, (ulong *)(unit[disk].trackbuf+cnt*512), 512);
+	raw += 256;
+
+	return raw;
+}
+
+static void amiga_write(int disk)
+{
+	unsigned int cnt;
+	unsigned long *ptr = (unsigned long *)raw_buf;
+
+	disk&=3;
+	/* gap space */
+	for (cnt = 0; cnt < 415 * unit[disk].type->sect_mult; cnt++)
+		*ptr++ = 0xaaaaaaaa;
+
+	/* sectors */
+	for (cnt = 0; cnt < unit[disk].dtype->sects * unit[disk].type->sect_mult; cnt++)
+		ptr = putsec (disk, ptr, cnt);
+	*(ushort *)ptr = (ptr[-1]&1) ? 0x2AA8 : 0xAAA8;
+}
+
+
+struct dos_header {
+	unsigned char track,   /* 0-80 */
+		side,    /* 0-1 */
+		sec,     /* 0-...*/
+		len_desc;/* 2 */
+	unsigned short crc;     /* on 68000 we got an alignment problem, 
+				   but this compiler solves it  by adding silently 
+				   adding a pad byte so data won't fit
+				   and this took about 3h to discover.... */
+	unsigned char gap1[22];     /* for longword-alignedness (0x4e) */
+};
+
+/* crc routines are borrowed from the messydos-handler  */
+
+/* excerpt from the messydos-device           
+; The CRC is computed not only over the actual data, but including
+; the SYNC mark (3 * $a1) and the 'ID/DATA - Address Mark' ($fe/$fb).
+; As we don't read or encode these fields into our buffers, we have to
+; preload the registers containing the CRC with the values they would have
+; after stepping over these fields.
+;
+; How CRCs "really" work:
+;
+; First, you should regard a bitstring as a series of coefficients of
+; polynomials. We calculate with these polynomials in modulo-2
+; arithmetic, in which both add and subtract are done the same as
+; exclusive-or. Now, we modify our data (a very long polynomial) in
+; such a way that it becomes divisible by the CCITT-standard 16-bit
+;		 16   12   5
+; polynomial:	x  + x	+ x + 1, represented by $11021. The easiest
+; way to do this would be to multiply (using proper arithmetic) our
+; datablock with $11021. So we have:
+;   data * $11021		 =
+;   data * ($10000 + $1021)      =
+;   data * $10000 + data * $1021
+; The left part of this is simple: Just add two 0 bytes. But then
+; the right part (data $1021) remains difficult and even could have
+; a carry into the left part. The solution is to use a modified
+; multiplication, which has a result that is not correct, but with
+; a difference of any multiple of $11021. We then only need to keep
+; the 16 least significant bits of the result.
+;
+; The following algorithm does this for us:
+;
+;   unsigned char *data, c, crclo, crchi;
+;   while (not done) {
+;	c = *data++ + crchi;
+;	crchi = (@ c) >> 8 + crclo;
+;	crclo = @ c;
+;   }
+;
+; Remember, + is done with EOR, the @ operator is in two tables (high
+; and low byte separately), which is calculated as
+;
+;      $1021 * (c & $F0)
+;  xor $1021 * (c & $0F)
+;  xor $1021 * (c >> 4)         (* is regular multiplication)
+;
+;
+; Anyway, the end result is the same as the remainder of the division of
+; the data by $11021. I am afraid I need to study theory a bit more...
+
+
+my only works was to code this from manx to C....
+
+*/
+
+static ushort dos_crc(void * data_a3, int data_d0, int data_d1, int data_d3)
+{
+	static unsigned char CRCTable1[] = {
+		0x00,0x10,0x20,0x30,0x40,0x50,0x60,0x70,0x81,0x91,0xa1,0xb1,0xc1,0xd1,0xe1,0xf1,
+		0x12,0x02,0x32,0x22,0x52,0x42,0x72,0x62,0x93,0x83,0xb3,0xa3,0xd3,0xc3,0xf3,0xe3,
+		0x24,0x34,0x04,0x14,0x64,0x74,0x44,0x54,0xa5,0xb5,0x85,0x95,0xe5,0xf5,0xc5,0xd5,
+		0x36,0x26,0x16,0x06,0x76,0x66,0x56,0x46,0xb7,0xa7,0x97,0x87,0xf7,0xe7,0xd7,0xc7,
+		0x48,0x58,0x68,0x78,0x08,0x18,0x28,0x38,0xc9,0xd9,0xe9,0xf9,0x89,0x99,0xa9,0xb9,
+		0x5a,0x4a,0x7a,0x6a,0x1a,0x0a,0x3a,0x2a,0xdb,0xcb,0xfb,0xeb,0x9b,0x8b,0xbb,0xab,
+		0x6c,0x7c,0x4c,0x5c,0x2c,0x3c,0x0c,0x1c,0xed,0xfd,0xcd,0xdd,0xad,0xbd,0x8d,0x9d,
+		0x7e,0x6e,0x5e,0x4e,0x3e,0x2e,0x1e,0x0e,0xff,0xef,0xdf,0xcf,0xbf,0xaf,0x9f,0x8f,
+		0x91,0x81,0xb1,0xa1,0xd1,0xc1,0xf1,0xe1,0x10,0x00,0x30,0x20,0x50,0x40,0x70,0x60,
+		0x83,0x93,0xa3,0xb3,0xc3,0xd3,0xe3,0xf3,0x02,0x12,0x22,0x32,0x42,0x52,0x62,0x72,
+		0xb5,0xa5,0x95,0x85,0xf5,0xe5,0xd5,0xc5,0x34,0x24,0x14,0x04,0x74,0x64,0x54,0x44,
+		0xa7,0xb7,0x87,0x97,0xe7,0xf7,0xc7,0xd7,0x26,0x36,0x06,0x16,0x66,0x76,0x46,0x56,
+		0xd9,0xc9,0xf9,0xe9,0x99,0x89,0xb9,0xa9,0x58,0x48,0x78,0x68,0x18,0x08,0x38,0x28,
+		0xcb,0xdb,0xeb,0xfb,0x8b,0x9b,0xab,0xbb,0x4a,0x5a,0x6a,0x7a,0x0a,0x1a,0x2a,0x3a,
+		0xfd,0xed,0xdd,0xcd,0xbd,0xad,0x9d,0x8d,0x7c,0x6c,0x5c,0x4c,0x3c,0x2c,0x1c,0x0c,
+		0xef,0xff,0xcf,0xdf,0xaf,0xbf,0x8f,0x9f,0x6e,0x7e,0x4e,0x5e,0x2e,0x3e,0x0e,0x1e
+	};
+
+	static unsigned char CRCTable2[] = {
+		0x00,0x21,0x42,0x63,0x84,0xa5,0xc6,0xe7,0x08,0x29,0x4a,0x6b,0x8c,0xad,0xce,0xef,
+		0x31,0x10,0x73,0x52,0xb5,0x94,0xf7,0xd6,0x39,0x18,0x7b,0x5a,0xbd,0x9c,0xff,0xde,
+		0x62,0x43,0x20,0x01,0xe6,0xc7,0xa4,0x85,0x6a,0x4b,0x28,0x09,0xee,0xcf,0xac,0x8d,
+		0x53,0x72,0x11,0x30,0xd7,0xf6,0x95,0xb4,0x5b,0x7a,0x19,0x38,0xdf,0xfe,0x9d,0xbc,
+		0xc4,0xe5,0x86,0xa7,0x40,0x61,0x02,0x23,0xcc,0xed,0x8e,0xaf,0x48,0x69,0x0a,0x2b,
+		0xf5,0xd4,0xb7,0x96,0x71,0x50,0x33,0x12,0xfd,0xdc,0xbf,0x9e,0x79,0x58,0x3b,0x1a,
+		0xa6,0x87,0xe4,0xc5,0x22,0x03,0x60,0x41,0xae,0x8f,0xec,0xcd,0x2a,0x0b,0x68,0x49,
+		0x97,0xb6,0xd5,0xf4,0x13,0x32,0x51,0x70,0x9f,0xbe,0xdd,0xfc,0x1b,0x3a,0x59,0x78,
+		0x88,0xa9,0xca,0xeb,0x0c,0x2d,0x4e,0x6f,0x80,0xa1,0xc2,0xe3,0x04,0x25,0x46,0x67,
+		0xb9,0x98,0xfb,0xda,0x3d,0x1c,0x7f,0x5e,0xb1,0x90,0xf3,0xd2,0x35,0x14,0x77,0x56,
+		0xea,0xcb,0xa8,0x89,0x6e,0x4f,0x2c,0x0d,0xe2,0xc3,0xa0,0x81,0x66,0x47,0x24,0x05,
+		0xdb,0xfa,0x99,0xb8,0x5f,0x7e,0x1d,0x3c,0xd3,0xf2,0x91,0xb0,0x57,0x76,0x15,0x34,
+		0x4c,0x6d,0x0e,0x2f,0xc8,0xe9,0x8a,0xab,0x44,0x65,0x06,0x27,0xc0,0xe1,0x82,0xa3,
+		0x7d,0x5c,0x3f,0x1e,0xf9,0xd8,0xbb,0x9a,0x75,0x54,0x37,0x16,0xf1,0xd0,0xb3,0x92,
+		0x2e,0x0f,0x6c,0x4d,0xaa,0x8b,0xe8,0xc9,0x26,0x07,0x64,0x45,0xa2,0x83,0xe0,0xc1,
+		0x1f,0x3e,0x5d,0x7c,0x9b,0xba,0xd9,0xf8,0x17,0x36,0x55,0x74,0x93,0xb2,0xd1,0xf0
+	};
+
+/* look at the asm-code - what looks in C a bit strange is almost as good as handmade */
+	register int i;
+	register unsigned char *CRCT1, *CRCT2, *data, c, crch, crcl;
+
+	CRCT1=CRCTable1;
+	CRCT2=CRCTable2;
+	data=data_a3;
+	crcl=data_d1;
+	crch=data_d0;
+	for (i=data_d3; i>=0; i--) {
+		c = (*data++) ^ crch;
+		crch = CRCT1[c] ^ crcl;
+		crcl = CRCT2[c];
+	}
+	return (crch<<8)|crcl;
+}
+
+static inline ushort dos_hdr_crc (struct dos_header *hdr)
+{
+	return dos_crc(&(hdr->track), 0xb2, 0x30, 3); /* precomputed magic */
+}
+
+static inline ushort dos_data_crc(unsigned char *data)
+{
+	return dos_crc(data, 0xe2, 0x95 ,511); /* precomputed magic */
+}
+
+static inline unsigned char dos_decode_byte(ushort word)
+{
+	register ushort w2;
+	register unsigned char byte;
+	register unsigned char *dec = mfmdecode;
+
+	w2=word;
+	w2>>=8;
+	w2&=127;
+	byte = dec[w2];
+	byte <<= 4;
+	w2 = word & 127;
+	byte |= dec[w2];
+	return byte;
+}
+
+static unsigned long dos_decode(unsigned char *data, unsigned short *raw, int len)
+{
+	int i;
+
+	for (i = 0; i < len; i++)
+		*data++=dos_decode_byte(*raw++);
+	return ((ulong)raw);
+}
+
+#ifdef DEBUG
+static void dbg(unsigned long ptr)
+{
+	printk("raw data @%08lx: %08lx, %08lx ,%08lx, %08lx\n", ptr,
+	       ((ulong *)ptr)[0], ((ulong *)ptr)[1],
+	       ((ulong *)ptr)[2], ((ulong *)ptr)[3]);
+}
+#endif
+
+static int dos_read(int drive)
+{
+	unsigned long end;
+	unsigned long raw;
+	int scnt;
+	unsigned short crc,data_crc[2];
+	struct dos_header hdr;
+
+	drive&=3;
+	raw = (long) raw_buf;
+	end = raw + unit[drive].type->read_size;
+
+	for (scnt=0; scnt < unit[drive].dtype->sects * unit[drive].type->sect_mult; scnt++) {
+		do { /* search for the right sync of each sec-hdr */
+			if (!(raw = scan_sync (raw, end))) {
+				printk(KERN_INFO "dos_read: no hdr sync on "
+				       "track %d, unit %d for sector %d\n",
+				       unit[drive].track,drive,scnt);
+				return MFM_NOSYNC;
+			}
+#ifdef DEBUG
+			dbg(raw);
+#endif
+		} while (*((ushort *)raw)!=0x5554); /* loop usually only once done */
+		raw+=2; /* skip over headermark */
+		raw = dos_decode((unsigned char *)&hdr,(ushort *) raw,8);
+		crc = dos_hdr_crc(&hdr);
+
+#ifdef DEBUG
+		printk("(%3d,%d,%2d,%d) %x\n", hdr.track, hdr.side,
+		       hdr.sec, hdr.len_desc, hdr.crc);
+#endif
+
+		if (crc != hdr.crc) {
+			printk(KERN_INFO "dos_read: MFM_HEADER %04x,%04x\n",
+			       hdr.crc, crc);
+			return MFM_HEADER;
+		}
+		if (hdr.track != unit[drive].track/unit[drive].type->heads) {
+			printk(KERN_INFO "dos_read: MFM_TRACK %d, %d\n",
+			       hdr.track,
+			       unit[drive].track/unit[drive].type->heads);
+			return MFM_TRACK;
+		}
+
+		if (hdr.side != unit[drive].track%unit[drive].type->heads) {
+			printk(KERN_INFO "dos_read: MFM_SIDE %d, %d\n",
+			       hdr.side,
+			       unit[drive].track%unit[drive].type->heads);
+			return MFM_TRACK;
+		}
+
+		if (hdr.len_desc != 2) {
+			printk(KERN_INFO "dos_read: unknown sector len "
+			       "descriptor %d\n", hdr.len_desc);
+			return MFM_DATA;
+		}
+#ifdef DEBUG
+		printk("hdr accepted\n");
+#endif
+		if (!(raw = scan_sync (raw, end))) {
+			printk(KERN_INFO "dos_read: no data sync on track "
+			       "%d, unit %d for sector%d, disk sector %d\n",
+			       unit[drive].track, drive, scnt, hdr.sec);
+			return MFM_NOSYNC;
+		}
+#ifdef DEBUG
+		dbg(raw);
+#endif
+
+		if (*((ushort *)raw)!=0x5545) {
+			printk(KERN_INFO "dos_read: no data mark after "
+			       "sync (%d,%d,%d,%d) sc=%d\n",
+			       hdr.track,hdr.side,hdr.sec,hdr.len_desc,scnt);
+			return MFM_NOSYNC;
+		}
+
+		raw+=2;  /* skip data mark (included in checksum) */
+		raw = dos_decode((unsigned char *)(unit[drive].trackbuf + (hdr.sec - 1) * 512), (ushort *) raw, 512);
+		raw = dos_decode((unsigned char  *)data_crc,(ushort *) raw,4);
+		crc = dos_data_crc(unit[drive].trackbuf + (hdr.sec - 1) * 512);
+
+		if (crc != data_crc[0]) {
+			printk(KERN_INFO "dos_read: MFM_DATA (%d,%d,%d,%d) "
+			       "sc=%d, %x %x\n", hdr.track, hdr.side,
+			       hdr.sec, hdr.len_desc, scnt,data_crc[0], crc);
+			printk(KERN_INFO "data=(%lx,%lx,%lx,%lx,...)\n",
+			       ((ulong *)(unit[drive].trackbuf+(hdr.sec-1)*512))[0],
+			       ((ulong *)(unit[drive].trackbuf+(hdr.sec-1)*512))[1],
+			       ((ulong *)(unit[drive].trackbuf+(hdr.sec-1)*512))[2],
+			       ((ulong *)(unit[drive].trackbuf+(hdr.sec-1)*512))[3]);
+			return MFM_DATA;
+		}
+	}
+	return 0;
+}
+
+static inline ushort dos_encode_byte(unsigned char byte)
+{
+	register unsigned char *enc, b2, b1;
+	register ushort word;
+
+	enc=mfmencode;
+	b1=byte;
+	b2=b1>>4;
+	b1&=15;
+	word=enc[b2] <<8 | enc [b1];
+	return (word|((word&(256|64)) ? 0: 128));
+}
+
+static void dos_encode_block(ushort *dest, unsigned char *src, int len)
+{
+	int i;
+
+	for (i = 0; i < len; i++) {
+		*dest=dos_encode_byte(*src++);
+		*dest|=((dest[-1]&1)||(*dest&0x4000))? 0: 0x8000;
+		dest++;
+	}
+}
+
+static unsigned long *ms_putsec(int drive, unsigned long *raw, int cnt)
+{
+	static struct dos_header hdr={0,0,0,2,0,
+	  {78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78}};
+	int i;
+	static ushort crc[2]={0,0x4e4e};
+
+	drive&=3;
+/* id gap 1 */
+/* the MFM word before is always 9254 */
+	for(i=0;i<6;i++)
+		*raw++=0xaaaaaaaa;
+/* 3 sync + 1 headermark */
+	*raw++=0x44894489;
+	*raw++=0x44895554;
+
+/* fill in the variable parts of the header */
+	hdr.track=unit[drive].track/unit[drive].type->heads;
+	hdr.side=unit[drive].track%unit[drive].type->heads;
+	hdr.sec=cnt+1;
+	hdr.crc=dos_hdr_crc(&hdr);
+
+/* header (without "magic") and id gap 2*/
+	dos_encode_block((ushort *)raw,(unsigned char *) &hdr.track,28);
+	raw+=14;
+
+/*id gap 3 */
+	for(i=0;i<6;i++)
+		*raw++=0xaaaaaaaa;
+
+/* 3 syncs and 1 datamark */
+	*raw++=0x44894489;
+	*raw++=0x44895545;
+
+/* data */
+	dos_encode_block((ushort *)raw,
+			 (unsigned char *)unit[drive].trackbuf+cnt*512,512);
+	raw+=256;
+
+/*data crc + jd's special gap (long words :-/) */
+	crc[0]=dos_data_crc(unit[drive].trackbuf+cnt*512);
+	dos_encode_block((ushort *) raw,(unsigned char *)crc,4);
+	raw+=2;
+
+/* data gap */
+	for(i=0;i<38;i++)
+		*raw++=0x92549254;
+
+	return raw; /* wrote 652 MFM words */
+}
+
+static void dos_write(int disk)
+{
+	int cnt;
+	unsigned long raw = (unsigned long) raw_buf;
+	unsigned long *ptr=(unsigned long *)raw;
+
+	disk&=3;
+/* really gap4 + indexgap , but we write it first and round it up */
+	for (cnt=0;cnt<425;cnt++)
+		*ptr++=0x92549254;
+
+/* the following is just guessed */
+	if (unit[disk].type->sect_mult==2)  /* check for HD-Disks */
+		for(cnt=0;cnt<473;cnt++)
+			*ptr++=0x92549254;
+
+/* now the index marks...*/
+	for (cnt=0;cnt<20;cnt++)
+		*ptr++=0x92549254;
+	for (cnt=0;cnt<6;cnt++)
+		*ptr++=0xaaaaaaaa;
+	*ptr++=0x52245224;
+	*ptr++=0x52245552;
+	for (cnt=0;cnt<20;cnt++)
+		*ptr++=0x92549254;
+
+/* sectors */
+	for(cnt = 0; cnt < unit[disk].dtype->sects * unit[disk].type->sect_mult; cnt++)
+		ptr=ms_putsec(disk,ptr,cnt);
+
+	*(ushort *)ptr = 0xaaa8; /* MFM word before is always 0x9254 */
+}
+
+/*
+ * Here comes the high level stuff (i.e. the filesystem interface)
+ * and helper functions.
+ * Normally this should be the only part that has to be adapted to
+ * different kernel versions.
+ */
+
+/* FIXME: this assumes the drive is still spinning -
+ * which is only true if we complete writing a track within three seconds
+ */
+static void flush_track_callback(unsigned long nr)
+{
+	nr&=3;
+	writefromint = 1;
+	if (!try_fdc(nr)) {
+		/* we might block in an interrupt, so try again later */
+		flush_track_timer[nr].expires = jiffies + 1;
+		add_timer(flush_track_timer + nr);
+		return;
+	}
+	get_fdc(nr);
+	(*unit[nr].dtype->write_fkt)(nr);
+	if (!raw_write(nr)) {
+		printk (KERN_NOTICE "floppy disk write protected\n");
+		writefromint = 0;
+		writepending = 0;
+	}
+	rel_fdc();
+}
+
+static int non_int_flush_track (unsigned long nr)
+{
+	unsigned long flags;
+
+	nr&=3;
+	writefromint = 0;
+	del_timer(&post_write_timer);
+	get_fdc(nr);
+	if (!fd_motor_on(nr)) {
+		writepending = 0;
+		rel_fdc();
+		return 0;
+	}
+	local_irq_save(flags);
+	if (writepending != 2) {
+		local_irq_restore(flags);
+		(*unit[nr].dtype->write_fkt)(nr);
+		if (!raw_write(nr)) {
+			printk (KERN_NOTICE "floppy disk write protected "
+				"in write!\n");
+			writepending = 0;
+			return 0;
+		}
+		wait_event(wait_fd_block, block_flag != 2);
+	}
+	else {
+		local_irq_restore(flags);
+		ms_delay(2); /* 2 ms post_write delay */
+		post_write(nr);
+	}
+	rel_fdc();
+	return 1;
+}
+
+static int get_track(int drive, int track)
+{
+	int error, errcnt;
+
+	drive&=3;
+	if (unit[drive].track == track)
+		return 0;
+	get_fdc(drive);
+	if (!fd_motor_on(drive)) {
+		rel_fdc();
+		return -1;
+	}
+
+	if (unit[drive].dirty == 1) {
+		del_timer (flush_track_timer + drive);
+		non_int_flush_track (drive);
+	}
+	errcnt = 0;
+	while (errcnt < MAX_ERRORS) {
+		if (!fd_seek(drive, track))
+			return -1;
+		raw_read(drive);
+		error = (*unit[drive].dtype->read_fkt)(drive);
+		if (error == 0) {
+			rel_fdc();
+			return 0;
+		}
+		/* Read Error Handling: recalibrate and try again */
+		unit[drive].track = -1;
+		errcnt++;
+	}
+	rel_fdc();
+	return -1;
+}
+
+/*
+ * Round-robin between our available drives, doing one request from each
+ */
+static struct request *set_next_request(void)
+{
+	struct request_queue *q;
+	int cnt = FD_MAX_UNITS;
+	struct request *rq = NULL;
+
+	/* Find next queue we can dispatch from */
+	fdc_queue = fdc_queue + 1;
+	if (fdc_queue == FD_MAX_UNITS)
+		fdc_queue = 0;
+
+	for(cnt = FD_MAX_UNITS; cnt > 0; cnt--) {
+
+		if (unit[fdc_queue].type->code == FD_NODRIVE) {
+			if (++fdc_queue == FD_MAX_UNITS)
+				fdc_queue = 0;
+			continue;
+		}
+
+		q = unit[fdc_queue].gendisk->queue;
+		if (q) {
+			rq = blk_fetch_request(q);
+			if (rq)
+				break;
+		}
+
+		if (++fdc_queue == FD_MAX_UNITS)
+			fdc_queue = 0;
+	}
+
+	return rq;
+}
+
+static void redo_fd_request(void)
+{
+	struct request *rq;
+	unsigned int cnt, block, track, sector;
+	int drive;
+	struct amiga_floppy_struct *floppy;
+	char *data;
+	unsigned long flags;
+	int err;
+
+next_req:
+	rq = set_next_request();
+	if (!rq) {
+		/* Nothing left to do */
+		return;
+	}
+
+	floppy = rq->rq_disk->private_data;
+	drive = floppy - unit;
+
+next_segment:
+	/* Here someone could investigate to be more efficient */
+	for (cnt = 0, err = 0; cnt < blk_rq_cur_sectors(rq); cnt++) {
+#ifdef DEBUG
+		printk("fd: sector %ld + %d requested for %s\n",
+		       blk_rq_pos(rq), cnt,
+		       (rq_data_dir(rq) == READ) ? "read" : "write");
+#endif
+		block = blk_rq_pos(rq) + cnt;
+		if ((int)block > floppy->blocks) {
+			err = -EIO;
+			break;
+		}
+
+		track = block / (floppy->dtype->sects * floppy->type->sect_mult);
+		sector = block % (floppy->dtype->sects * floppy->type->sect_mult);
+		data = rq->buffer + 512 * cnt;
+#ifdef DEBUG
+		printk("access to track %d, sector %d, with buffer at "
+		       "0x%08lx\n", track, sector, data);
+#endif
+
+		if (get_track(drive, track) == -1) {
+			err = -EIO;
+			break;
+		}
+
+		if (rq_data_dir(rq) == READ) {
+			memcpy(data, floppy->trackbuf + sector * 512, 512);
+		} else {
+			memcpy(floppy->trackbuf + sector * 512, data, 512);
+
+			/* keep the drive spinning while writes are scheduled */
+			if (!fd_motor_on(drive)) {
+				err = -EIO;
+				break;
+			}
+			/*
+			 * setup a callback to write the track buffer
+			 * after a short (1 tick) delay.
+			 */
+			local_irq_save(flags);
+
+			floppy->dirty = 1;
+		        /* reset the timer */
+			mod_timer (flush_track_timer + drive, jiffies + 1);
+			local_irq_restore(flags);
+		}
+	}
+
+	if (__blk_end_request_cur(rq, err))
+		goto next_segment;
+	goto next_req;
+}
+
+static void do_fd_request(struct request_queue * q)
+{
+	redo_fd_request();
+}
+
+static int fd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
+{
+	int drive = MINOR(bdev->bd_dev) & 3;
+
+	geo->heads = unit[drive].type->heads;
+	geo->sectors = unit[drive].dtype->sects * unit[drive].type->sect_mult;
+	geo->cylinders = unit[drive].type->tracks;
+	return 0;
+}
+
+static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode,
+		    unsigned int cmd, unsigned long param)
+{
+	struct amiga_floppy_struct *p = bdev->bd_disk->private_data;
+	int drive = p - unit;
+	static struct floppy_struct getprm;
+	void __user *argp = (void __user *)param;
+
+	switch(cmd){
+	case FDFMTBEG:
+		get_fdc(drive);
+		if (fd_ref[drive] > 1) {
+			rel_fdc();
+			return -EBUSY;
+		}
+		fsync_bdev(bdev);
+		if (fd_motor_on(drive) == 0) {
+			rel_fdc();
+			return -ENODEV;
+		}
+		if (fd_calibrate(drive) == 0) {
+			rel_fdc();
+			return -ENXIO;
+		}
+		floppy_off(drive);
+		rel_fdc();
+		break;
+	case FDFMTTRK:
+		if (param < p->type->tracks * p->type->heads)
+		{
+			get_fdc(drive);
+			if (fd_seek(drive,param) != 0){
+				memset(p->trackbuf, FD_FILL_BYTE,
+				       p->dtype->sects * p->type->sect_mult * 512);
+				non_int_flush_track(drive);
+			}
+			floppy_off(drive);
+			rel_fdc();
+		}
+		else
+			return -EINVAL;
+		break;
+	case FDFMTEND:
+		floppy_off(drive);
+		invalidate_bdev(bdev);
+		break;
+	case FDGETPRM:
+		memset((void *)&getprm, 0, sizeof (getprm));
+		getprm.track=p->type->tracks;
+		getprm.head=p->type->heads;
+		getprm.sect=p->dtype->sects * p->type->sect_mult;
+		getprm.size=p->blocks;
+		if (copy_to_user(argp, &getprm, sizeof(struct floppy_struct)))
+			return -EFAULT;
+		break;
+	case FDSETPRM:
+	case FDDEFPRM:
+		return -EINVAL;
+	case FDFLUSH: /* unconditionally, even if not needed */
+		del_timer (flush_track_timer + drive);
+		non_int_flush_track(drive);
+		break;
+#ifdef RAW_IOCTL
+	case IOCTL_RAW_TRACK:
+		if (copy_to_user(argp, raw_buf, p->type->read_size))
+			return -EFAULT;
+		else
+			return p->type->read_size;
+#endif
+	default:
+		printk(KERN_DEBUG "fd_ioctl: unknown cmd %d for drive %d.",
+		       cmd, drive);
+		return -ENOSYS;
+	}
+	return 0;
+}
+
+static int fd_ioctl(struct block_device *bdev, fmode_t mode,
+			     unsigned int cmd, unsigned long param)
+{
+	int ret;
+
+	mutex_lock(&amiflop_mutex);
+	ret = fd_locked_ioctl(bdev, mode, cmd, param);
+	mutex_unlock(&amiflop_mutex);
+
+	return ret;
+}
+
+static void fd_probe(int dev)
+{
+	unsigned long code;
+	int type;
+	int drive;
+
+	drive = dev & 3;
+	code = fd_get_drive_id(drive);
+
+	/* get drive type */
+	for (type = 0; type < num_dr_types; type++)
+		if (drive_types[type].code == code)
+			break;
+
+	if (type >= num_dr_types) {
+		printk(KERN_WARNING "fd_probe: unsupported drive type "
+		       "%08lx found\n", code);
+		unit[drive].type = &drive_types[num_dr_types-1]; /* FD_NODRIVE */
+		return;
+	}
+
+	unit[drive].type = drive_types + type;
+	unit[drive].track = -1;
+
+	unit[drive].disk = -1;
+	unit[drive].motor = 0;
+	unit[drive].busy = 0;
+	unit[drive].status = -1;
+}
+
+/*
+ * floppy_open check for aliasing (/dev/fd0 can be the same as
+ * /dev/PS0 etc), and disallows simultaneous access to the same
+ * drive with different device numbers.
+ */
+static int floppy_open(struct block_device *bdev, fmode_t mode)
+{
+	int drive = MINOR(bdev->bd_dev) & 3;
+	int system =  (MINOR(bdev->bd_dev) & 4) >> 2;
+	int old_dev;
+	unsigned long flags;
+
+	mutex_lock(&amiflop_mutex);
+	old_dev = fd_device[drive];
+
+	if (fd_ref[drive] && old_dev != system) {
+		mutex_unlock(&amiflop_mutex);
+		return -EBUSY;
+	}
+
+	if (mode & (FMODE_READ|FMODE_WRITE)) {
+		check_disk_change(bdev);
+		if (mode & FMODE_WRITE) {
+			int wrprot;
+
+			get_fdc(drive);
+			fd_select (drive);
+			wrprot = !(ciaa.pra & DSKPROT);
+			fd_deselect (drive);
+			rel_fdc();
+
+			if (wrprot) {
+				mutex_unlock(&amiflop_mutex);
+				return -EROFS;
+			}
+		}
+	}
+
+	local_irq_save(flags);
+	fd_ref[drive]++;
+	fd_device[drive] = system;
+	local_irq_restore(flags);
+
+	unit[drive].dtype=&data_types[system];
+	unit[drive].blocks=unit[drive].type->heads*unit[drive].type->tracks*
+		data_types[system].sects*unit[drive].type->sect_mult;
+	set_capacity(unit[drive].gendisk, unit[drive].blocks);
+
+	printk(KERN_INFO "fd%d: accessing %s-disk with %s-layout\n",drive,
+	       unit[drive].type->name, data_types[system].name);
+
+	mutex_unlock(&amiflop_mutex);
+	return 0;
+}
+
+static int floppy_release(struct gendisk *disk, fmode_t mode)
+{
+	struct amiga_floppy_struct *p = disk->private_data;
+	int drive = p - unit;
+
+	mutex_lock(&amiflop_mutex);
+	if (unit[drive].dirty == 1) {
+		del_timer (flush_track_timer + drive);
+		non_int_flush_track (drive);
+	}
+  
+	if (!fd_ref[drive]--) {
+		printk(KERN_CRIT "floppy_release with fd_ref == 0");
+		fd_ref[drive] = 0;
+	}
+#ifdef MODULE
+/* the mod_use counter is handled this way */
+	floppy_off (drive | 0x40000000);
+#endif
+	mutex_unlock(&amiflop_mutex);
+	return 0;
+}
+
+/*
+ * check_events is never called from an interrupt, so we can relax a bit
+ * here, sleep etc. Note that floppy-on tries to set current_DOR to point
+ * to the desired drive, but it will probably not survive the sleep if
+ * several floppies are used at the same time: thus the loop.
+ */
+static unsigned amiga_check_events(struct gendisk *disk, unsigned int clearing)
+{
+	struct amiga_floppy_struct *p = disk->private_data;
+	int drive = p - unit;
+	int changed;
+	static int first_time = 1;
+
+	if (first_time)
+		changed = first_time--;
+	else {
+		get_fdc(drive);
+		fd_select (drive);
+		changed = !(ciaa.pra & DSKCHANGE);
+		fd_deselect (drive);
+		rel_fdc();
+	}
+
+	if (changed) {
+		fd_probe(drive);
+		p->track = -1;
+		p->dirty = 0;
+		writepending = 0; /* if this was true before, too bad! */
+		writefromint = 0;
+		return DISK_EVENT_MEDIA_CHANGE;
+	}
+	return 0;
+}
+
+static const struct block_device_operations floppy_fops = {
+	.owner		= THIS_MODULE,
+	.open		= floppy_open,
+	.release	= floppy_release,
+	.ioctl		= fd_ioctl,
+	.getgeo		= fd_getgeo,
+	.check_events	= amiga_check_events,
+};
+
+static int __init fd_probe_drives(void)
+{
+	int drive,drives,nomem;
+
+	printk(KERN_INFO "FD: probing units\nfound ");
+	drives=0;
+	nomem=0;
+	for(drive=0;drive<FD_MAX_UNITS;drive++) {
+		struct gendisk *disk;
+		fd_probe(drive);
+		if (unit[drive].type->code == FD_NODRIVE)
+			continue;
+		disk = alloc_disk(1);
+		if (!disk) {
+			unit[drive].type->code = FD_NODRIVE;
+			continue;
+		}
+		unit[drive].gendisk = disk;
+
+		disk->queue = blk_init_queue(do_fd_request, &amiflop_lock);
+		if (!disk->queue) {
+			unit[drive].type->code = FD_NODRIVE;
+			continue;
+		}
+
+		drives++;
+		if ((unit[drive].trackbuf = kmalloc(FLOPPY_MAX_SECTORS * 512, GFP_KERNEL)) == NULL) {
+			printk("no mem for ");
+			unit[drive].type = &drive_types[num_dr_types - 1]; /* FD_NODRIVE */
+			drives--;
+			nomem = 1;
+		}
+		printk("fd%d ",drive);
+		disk->major = FLOPPY_MAJOR;
+		disk->first_minor = drive;
+		disk->fops = &floppy_fops;
+		sprintf(disk->disk_name, "fd%d", drive);
+		disk->private_data = &unit[drive];
+		set_capacity(disk, 880*2);
+		add_disk(disk);
+	}
+	if ((drives > 0) || (nomem == 0)) {
+		if (drives == 0)
+			printk("no drives");
+		printk("\n");
+		return drives;
+	}
+	printk("\n");
+	return -ENOMEM;
+}
+ 
+static struct kobject *floppy_find(dev_t dev, int *part, void *data)
+{
+	int drive = *part & 3;
+	if (unit[drive].type->code == FD_NODRIVE)
+		return NULL;
+	*part = 0;
+	return get_disk(unit[drive].gendisk);
+}
+
+static int __init amiga_floppy_probe(struct platform_device *pdev)
+{
+	int i, ret;
+
+	if (register_blkdev(FLOPPY_MAJOR,"fd"))
+		return -EBUSY;
+
+	ret = -ENOMEM;
+	raw_buf = amiga_chip_alloc(RAW_BUF_SIZE, "Floppy");
+	if (!raw_buf) {
+		printk("fd: cannot get chip mem buffer\n");
+		goto out_blkdev;
+	}
+
+	ret = -EBUSY;
+	if (request_irq(IRQ_AMIGA_DSKBLK, fd_block_done, 0, "floppy_dma", NULL)) {
+		printk("fd: cannot get irq for dma\n");
+		goto out_irq;
+	}
+
+	if (request_irq(IRQ_AMIGA_CIAA_TB, ms_isr, 0, "floppy_timer", NULL)) {
+		printk("fd: cannot get irq for timer\n");
+		goto out_irq2;
+	}
+
+	ret = -ENODEV;
+	if (fd_probe_drives() < 1) /* No usable drives */
+		goto out_probe;
+
+	blk_register_region(MKDEV(FLOPPY_MAJOR, 0), 256, THIS_MODULE,
+				floppy_find, NULL, NULL);
+
+	/* initialize variables */
+	init_timer(&motor_on_timer);
+	motor_on_timer.expires = 0;
+	motor_on_timer.data = 0;
+	motor_on_timer.function = motor_on_callback;
+	for (i = 0; i < FD_MAX_UNITS; i++) {
+		init_timer(&motor_off_timer[i]);
+		motor_off_timer[i].expires = 0;
+		motor_off_timer[i].data = i|0x80000000;
+		motor_off_timer[i].function = fd_motor_off;
+		init_timer(&flush_track_timer[i]);
+		flush_track_timer[i].expires = 0;
+		flush_track_timer[i].data = i;
+		flush_track_timer[i].function = flush_track_callback;
+
+		unit[i].track = -1;
+	}
+
+	init_timer(&post_write_timer);
+	post_write_timer.expires = 0;
+	post_write_timer.data = 0;
+	post_write_timer.function = post_write;
+  
+	for (i = 0; i < 128; i++)
+		mfmdecode[i]=255;
+	for (i = 0; i < 16; i++)
+		mfmdecode[mfmencode[i]]=i;
+
+	/* make sure that disk DMA is enabled */
+	custom.dmacon = DMAF_SETCLR | DMAF_DISK;
+
+	/* init ms timer */
+	ciaa.crb = 8; /* one-shot, stop */
+	return 0;
+
+out_probe:
+	free_irq(IRQ_AMIGA_CIAA_TB, NULL);
+out_irq2:
+	free_irq(IRQ_AMIGA_DSKBLK, NULL);
+out_irq:
+	amiga_chip_free(raw_buf);
+out_blkdev:
+	unregister_blkdev(FLOPPY_MAJOR,"fd");
+	return ret;
+}
+
+#if 0 /* not safe to unload */
+static int __exit amiga_floppy_remove(struct platform_device *pdev)
+{
+	int i;
+
+	for( i = 0; i < FD_MAX_UNITS; i++) {
+		if (unit[i].type->code != FD_NODRIVE) {
+			struct request_queue *q = unit[i].gendisk->queue;
+			del_gendisk(unit[i].gendisk);
+			put_disk(unit[i].gendisk);
+			kfree(unit[i].trackbuf);
+			if (q)
+				blk_cleanup_queue(q);
+		}
+	}
+	blk_unregister_region(MKDEV(FLOPPY_MAJOR, 0), 256);
+	free_irq(IRQ_AMIGA_CIAA_TB, NULL);
+	free_irq(IRQ_AMIGA_DSKBLK, NULL);
+	custom.dmacon = DMAF_DISK; /* disable DMA */
+	amiga_chip_free(raw_buf);
+	unregister_blkdev(FLOPPY_MAJOR, "fd");
+}
+#endif
+
+static struct platform_driver amiga_floppy_driver = {
+	.driver   = {
+		.name	= "amiga-floppy",
+		.owner	= THIS_MODULE,
+	},
+};
+
+static int __init amiga_floppy_init(void)
+{
+	return platform_driver_probe(&amiga_floppy_driver, amiga_floppy_probe);
+}
+
+module_init(amiga_floppy_init);
+
+#ifndef MODULE
+static int __init amiga_floppy_setup (char *str)
+{
+	int n;
+	if (!MACH_IS_AMIGA)
+		return 0;
+	if (!get_option(&str, &n))
+		return 0;
+	printk (KERN_INFO "amiflop: Setting default df0 to %x\n", n);
+	fd_def_df0 = n;
+	return 1;
+}
+
+__setup("floppy=", amiga_floppy_setup);
+#endif
+
+MODULE_ALIAS("platform:amiga-floppy");
diff --git a/drivers/block/aoe/Makefile b/drivers/block/aoe/Makefile
new file mode 100644
index 00000000..06ea82cd
--- /dev/null
+++ b/drivers/block/aoe/Makefile
@@ -0,0 +1,6 @@
+#
+# Makefile for ATA over Ethernet
+#
+
+obj-$(CONFIG_ATA_OVER_ETH)	+= aoe.o
+aoe-y := aoeblk.o aoechr.o aoecmd.o aoedev.o aoemain.o aoenet.o
diff --git a/drivers/block/aoe/aoe.h b/drivers/block/aoe/aoe.h
new file mode 100644
index 00000000..db195aba
--- /dev/null
+++ b/drivers/block/aoe/aoe.h
@@ -0,0 +1,203 @@
+/* Copyright (c) 2007 Coraid, Inc.  See COPYING for GPL terms. */
+#define VERSION "47"
+#define AOE_MAJOR 152
+#define DEVICE_NAME "aoe"
+
+/* set AOE_PARTITIONS to 1 to use whole-disks only
+ * default is 16, which is 15 partitions plus the whole disk
+ */
+#ifndef AOE_PARTITIONS
+#define AOE_PARTITIONS (16)
+#endif
+
+#define SYSMINOR(aoemajor, aoeminor) ((aoemajor) * NPERSHELF + (aoeminor))
+#define AOEMAJOR(sysminor) ((sysminor) / NPERSHELF)
+#define AOEMINOR(sysminor) ((sysminor) % NPERSHELF)
+#define WHITESPACE " \t\v\f\n"
+
+enum {
+	AOECMD_ATA,
+	AOECMD_CFG,
+	AOECMD_VEND_MIN = 0xf0,
+
+	AOEFL_RSP = (1<<3),
+	AOEFL_ERR = (1<<2),
+
+	AOEAFL_EXT = (1<<6),
+	AOEAFL_DEV = (1<<4),
+	AOEAFL_ASYNC = (1<<1),
+	AOEAFL_WRITE = (1<<0),
+
+	AOECCMD_READ = 0,
+	AOECCMD_TEST,
+	AOECCMD_PTEST,
+	AOECCMD_SET,
+	AOECCMD_FSET,
+
+	AOE_HVER = 0x10,
+};
+
+struct aoe_hdr {
+	unsigned char dst[6];
+	unsigned char src[6];
+	__be16 type;
+	unsigned char verfl;
+	unsigned char err;
+	__be16 major;
+	unsigned char minor;
+	unsigned char cmd;
+	__be32 tag;
+};
+
+struct aoe_atahdr {
+	unsigned char aflags;
+	unsigned char errfeat;
+	unsigned char scnt;
+	unsigned char cmdstat;
+	unsigned char lba0;
+	unsigned char lba1;
+	unsigned char lba2;
+	unsigned char lba3;
+	unsigned char lba4;
+	unsigned char lba5;
+	unsigned char res[2];
+};
+
+struct aoe_cfghdr {
+	__be16 bufcnt;
+	__be16 fwver;
+	unsigned char scnt;
+	unsigned char aoeccmd;
+	unsigned char cslen[2];
+};
+
+enum {
+	DEVFL_UP = 1,	/* device is installed in system and ready for AoE->ATA commands */
+	DEVFL_TKILL = (1<<1),	/* flag for timer to know when to kill self */
+	DEVFL_EXT = (1<<2),	/* device accepts lba48 commands */
+	DEVFL_CLOSEWAIT = (1<<3), /* device is waiting for all closes to revalidate */
+	DEVFL_GDALLOC = (1<<4),	/* need to alloc gendisk */
+	DEVFL_KICKME = (1<<5),	/* slow polling network card catch */
+	DEVFL_NEWSIZE = (1<<6),	/* need to update dev size in block layer */
+
+	BUFFL_FAIL = 1,
+};
+
+enum {
+	DEFAULTBCNT = 2 * 512,	/* 2 sectors */
+	NPERSHELF = 16,		/* number of slots per shelf address */
+	FREETAG = -1,
+	MIN_BUFS = 16,
+	NTARGETS = 8,
+	NAOEIFS = 8,
+	NSKBPOOLMAX = 128,
+
+	TIMERTICK = HZ / 10,
+	MINTIMER = HZ >> 2,
+	MAXTIMER = HZ << 1,
+	HELPWAIT = 20,
+};
+
+struct buf {
+	struct list_head bufs;
+	ulong stime;	/* for disk stats */
+	ulong flags;
+	ulong nframesout;
+	ulong resid;
+	ulong bv_resid;
+	ulong bv_off;
+	sector_t sector;
+	struct bio *bio;
+	struct bio_vec *bv;
+};
+
+struct frame {
+	int tag;
+	ulong waited;
+	struct buf *buf;
+	char *bufaddr;
+	ulong bcnt;
+	sector_t lba;
+	struct sk_buff *skb;
+};
+
+struct aoeif {
+	struct net_device *nd;
+	unsigned char lost;
+	unsigned char lostjumbo;
+	ushort maxbcnt;
+};
+
+struct aoetgt {
+	unsigned char addr[6];
+	ushort nframes;
+	struct frame *frames;
+	struct aoeif ifs[NAOEIFS];
+	struct aoeif *ifp;	/* current aoeif in use */
+	ushort nout;
+	ushort maxout;
+	u16 lasttag;		/* last tag sent */
+	u16 useme;
+	ulong lastwadj;		/* last window adjustment */
+	int wpkts, rpkts;
+	int dataref;
+};
+
+struct aoedev {
+	struct aoedev *next;
+	ulong sysminor;
+	ulong aoemajor;
+	u16 aoeminor;
+	u16 flags;
+	u16 nopen;		/* (bd_openers isn't available without sleeping) */
+	u16 rttavg;		/* round trip average of requests/responses */
+	u16 mintimer;
+	u16 fw_ver;		/* version of blade's firmware */
+	struct work_struct work;/* disk create work struct */
+	struct gendisk *gd;
+	struct request_queue *blkq;
+	struct hd_geometry geo; 
+	sector_t ssize;
+	struct timer_list timer;
+	spinlock_t lock;
+	struct sk_buff_head sendq;
+	struct sk_buff_head skbpool;
+	mempool_t *bufpool;	/* for deadlock-free Buf allocation */
+	struct list_head bufq;	/* queue of bios to work on */
+	struct buf *inprocess;	/* the one we're currently working on */
+	struct aoetgt *targets[NTARGETS];
+	struct aoetgt **tgt;	/* target in use when working */
+	struct aoetgt **htgt;	/* target needing rexmit assistance */
+};
+
+
+int aoeblk_init(void);
+void aoeblk_exit(void);
+void aoeblk_gdalloc(void *);
+void aoedisk_rm_sysfs(struct aoedev *d);
+
+int aoechr_init(void);
+void aoechr_exit(void);
+void aoechr_error(char *);
+
+void aoecmd_work(struct aoedev *d);
+void aoecmd_cfg(ushort aoemajor, unsigned char aoeminor);
+void aoecmd_ata_rsp(struct sk_buff *);
+void aoecmd_cfg_rsp(struct sk_buff *);
+void aoecmd_sleepwork(struct work_struct *);
+void aoecmd_cleanslate(struct aoedev *);
+struct sk_buff *aoecmd_ata_id(struct aoedev *);
+
+int aoedev_init(void);
+void aoedev_exit(void);
+struct aoedev *aoedev_by_aoeaddr(int maj, int min);
+struct aoedev *aoedev_by_sysminor_m(ulong sysminor);
+void aoedev_downdev(struct aoedev *d);
+int aoedev_flush(const char __user *str, size_t size);
+
+int aoenet_init(void);
+void aoenet_exit(void);
+void aoenet_xmit(struct sk_buff_head *);
+int is_aoe_netif(struct net_device *ifp);
+int set_aoe_iflist(const char __user *str, size_t size);
+
diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c
new file mode 100644
index 00000000..528f6318
--- /dev/null
+++ b/drivers/block/aoe/aoeblk.c
@@ -0,0 +1,332 @@
+/* Copyright (c) 2007 Coraid, Inc.  See COPYING for GPL terms. */
+/*
+ * aoeblk.c
+ * block device routines
+ */
+
+#include <linux/kernel.h>
+#include <linux/hdreg.h>
+#include <linux/blkdev.h>
+#include <linux/backing-dev.h>
+#include <linux/fs.h>
+#include <linux/ioctl.h>
+#include <linux/slab.h>
+#include <linux/ratelimit.h>
+#include <linux/genhd.h>
+#include <linux/netdevice.h>
+#include <linux/mutex.h>
+#include "aoe.h"
+
+static DEFINE_MUTEX(aoeblk_mutex);
+static struct kmem_cache *buf_pool_cache;
+
+static ssize_t aoedisk_show_state(struct device *dev,
+				  struct device_attribute *attr, char *page)
+{
+	struct gendisk *disk = dev_to_disk(dev);
+	struct aoedev *d = disk->private_data;
+
+	return snprintf(page, PAGE_SIZE,
+			"%s%s\n",
+			(d->flags & DEVFL_UP) ? "up" : "down",
+			(d->flags & DEVFL_KICKME) ? ",kickme" :
+			(d->nopen && !(d->flags & DEVFL_UP)) ? ",closewait" : "");
+	/* I'd rather see nopen exported so we can ditch closewait */
+}
+static ssize_t aoedisk_show_mac(struct device *dev,
+				struct device_attribute *attr, char *page)
+{
+	struct gendisk *disk = dev_to_disk(dev);
+	struct aoedev *d = disk->private_data;
+	struct aoetgt *t = d->targets[0];
+
+	if (t == NULL)
+		return snprintf(page, PAGE_SIZE, "none\n");
+	return snprintf(page, PAGE_SIZE, "%pm\n", t->addr);
+}
+static ssize_t aoedisk_show_netif(struct device *dev,
+				  struct device_attribute *attr, char *page)
+{
+	struct gendisk *disk = dev_to_disk(dev);
+	struct aoedev *d = disk->private_data;
+	struct net_device *nds[8], **nd, **nnd, **ne;
+	struct aoetgt **t, **te;
+	struct aoeif *ifp, *e;
+	char *p;
+
+	memset(nds, 0, sizeof nds);
+	nd = nds;
+	ne = nd + ARRAY_SIZE(nds);
+	t = d->targets;
+	te = t + NTARGETS;
+	for (; t < te && *t; t++) {
+		ifp = (*t)->ifs;
+		e = ifp + NAOEIFS;
+		for (; ifp < e && ifp->nd; ifp++) {
+			for (nnd = nds; nnd < nd; nnd++)
+				if (*nnd == ifp->nd)
+					break;
+			if (nnd == nd && nd != ne)
+				*nd++ = ifp->nd;
+		}
+	}
+
+	ne = nd;
+	nd = nds;
+	if (*nd == NULL)
+		return snprintf(page, PAGE_SIZE, "none\n");
+	for (p = page; nd < ne; nd++)
+		p += snprintf(p, PAGE_SIZE - (p-page), "%s%s",
+			p == page ? "" : ",", (*nd)->name);
+	p += snprintf(p, PAGE_SIZE - (p-page), "\n");
+	return p-page;
+}
+/* firmware version */
+static ssize_t aoedisk_show_fwver(struct device *dev,
+				  struct device_attribute *attr, char *page)
+{
+	struct gendisk *disk = dev_to_disk(dev);
+	struct aoedev *d = disk->private_data;
+
+	return snprintf(page, PAGE_SIZE, "0x%04x\n", (unsigned int) d->fw_ver);
+}
+
+static DEVICE_ATTR(state, S_IRUGO, aoedisk_show_state, NULL);
+static DEVICE_ATTR(mac, S_IRUGO, aoedisk_show_mac, NULL);
+static DEVICE_ATTR(netif, S_IRUGO, aoedisk_show_netif, NULL);
+static struct device_attribute dev_attr_firmware_version = {
+	.attr = { .name = "firmware-version", .mode = S_IRUGO },
+	.show = aoedisk_show_fwver,
+};
+
+static struct attribute *aoe_attrs[] = {
+	&dev_attr_state.attr,
+	&dev_attr_mac.attr,
+	&dev_attr_netif.attr,
+	&dev_attr_firmware_version.attr,
+	NULL,
+};
+
+static const struct attribute_group attr_group = {
+	.attrs = aoe_attrs,
+};
+
+static int
+aoedisk_add_sysfs(struct aoedev *d)
+{
+	return sysfs_create_group(&disk_to_dev(d->gd)->kobj, &attr_group);
+}
+void
+aoedisk_rm_sysfs(struct aoedev *d)
+{
+	sysfs_remove_group(&disk_to_dev(d->gd)->kobj, &attr_group);
+}
+
+static int
+aoeblk_open(struct block_device *bdev, fmode_t mode)
+{
+	struct aoedev *d = bdev->bd_disk->private_data;
+	ulong flags;
+
+	mutex_lock(&aoeblk_mutex);
+	spin_lock_irqsave(&d->lock, flags);
+	if (d->flags & DEVFL_UP) {
+		d->nopen++;
+		spin_unlock_irqrestore(&d->lock, flags);
+		mutex_unlock(&aoeblk_mutex);
+		return 0;
+	}
+	spin_unlock_irqrestore(&d->lock, flags);
+	mutex_unlock(&aoeblk_mutex);
+	return -ENODEV;
+}
+
+static int
+aoeblk_release(struct gendisk *disk, fmode_t mode)
+{
+	struct aoedev *d = disk->private_data;
+	ulong flags;
+
+	spin_lock_irqsave(&d->lock, flags);
+
+	if (--d->nopen == 0) {
+		spin_unlock_irqrestore(&d->lock, flags);
+		aoecmd_cfg(d->aoemajor, d->aoeminor);
+		return 0;
+	}
+	spin_unlock_irqrestore(&d->lock, flags);
+
+	return 0;
+}
+
+static int
+aoeblk_make_request(struct request_queue *q, struct bio *bio)
+{
+	struct sk_buff_head queue;
+	struct aoedev *d;
+	struct buf *buf;
+	ulong flags;
+
+	blk_queue_bounce(q, &bio);
+
+	if (bio == NULL) {
+		printk(KERN_ERR "aoe: bio is NULL\n");
+		BUG();
+		return 0;
+	}
+	d = bio->bi_bdev->bd_disk->private_data;
+	if (d == NULL) {
+		printk(KERN_ERR "aoe: bd_disk->private_data is NULL\n");
+		BUG();
+		bio_endio(bio, -ENXIO);
+		return 0;
+	} else if (bio->bi_io_vec == NULL) {
+		printk(KERN_ERR "aoe: bi_io_vec is NULL\n");
+		BUG();
+		bio_endio(bio, -ENXIO);
+		return 0;
+	}
+	buf = mempool_alloc(d->bufpool, GFP_NOIO);
+	if (buf == NULL) {
+		printk(KERN_INFO "aoe: buf allocation failure\n");
+		bio_endio(bio, -ENOMEM);
+		return 0;
+	}
+	memset(buf, 0, sizeof(*buf));
+	INIT_LIST_HEAD(&buf->bufs);
+	buf->stime = jiffies;
+	buf->bio = bio;
+	buf->resid = bio->bi_size;
+	buf->sector = bio->bi_sector;
+	buf->bv = &bio->bi_io_vec[bio->bi_idx];
+	buf->bv_resid = buf->bv->bv_len;
+	WARN_ON(buf->bv_resid == 0);
+	buf->bv_off = buf->bv->bv_offset;
+
+	spin_lock_irqsave(&d->lock, flags);
+
+	if ((d->flags & DEVFL_UP) == 0) {
+		pr_info_ratelimited("aoe: device %ld.%d is not up\n",
+			d->aoemajor, d->aoeminor);
+		spin_unlock_irqrestore(&d->lock, flags);
+		mempool_free(buf, d->bufpool);
+		bio_endio(bio, -ENXIO);
+		return 0;
+	}
+
+	list_add_tail(&buf->bufs, &d->bufq);
+
+	aoecmd_work(d);
+	__skb_queue_head_init(&queue);
+	skb_queue_splice_init(&d->sendq, &queue);
+
+	spin_unlock_irqrestore(&d->lock, flags);
+	aoenet_xmit(&queue);
+
+	return 0;
+}
+
+static int
+aoeblk_getgeo(struct block_device *bdev, struct hd_geometry *geo)
+{
+	struct aoedev *d = bdev->bd_disk->private_data;
+
+	if ((d->flags & DEVFL_UP) == 0) {
+		printk(KERN_ERR "aoe: disk not up\n");
+		return -ENODEV;
+	}
+
+	geo->cylinders = d->geo.cylinders;
+	geo->heads = d->geo.heads;
+	geo->sectors = d->geo.sectors;
+	return 0;
+}
+
+static const struct block_device_operations aoe_bdops = {
+	.open = aoeblk_open,
+	.release = aoeblk_release,
+	.getgeo = aoeblk_getgeo,
+	.owner = THIS_MODULE,
+};
+
+/* alloc_disk and add_disk can sleep */
+void
+aoeblk_gdalloc(void *vp)
+{
+	struct aoedev *d = vp;
+	struct gendisk *gd;
+	ulong flags;
+
+	gd = alloc_disk(AOE_PARTITIONS);
+	if (gd == NULL) {
+		printk(KERN_ERR
+			"aoe: cannot allocate disk structure for %ld.%d\n",
+			d->aoemajor, d->aoeminor);
+		goto err;
+	}
+
+	d->bufpool = mempool_create_slab_pool(MIN_BUFS, buf_pool_cache);
+	if (d->bufpool == NULL) {
+		printk(KERN_ERR "aoe: cannot allocate bufpool for %ld.%d\n",
+			d->aoemajor, d->aoeminor);
+		goto err_disk;
+	}
+
+	d->blkq = blk_alloc_queue(GFP_KERNEL);
+	if (!d->blkq)
+		goto err_mempool;
+	blk_queue_make_request(d->blkq, aoeblk_make_request);
+	d->blkq->backing_dev_info.name = "aoe";
+	if (bdi_init(&d->blkq->backing_dev_info))
+		goto err_blkq;
+	spin_lock_irqsave(&d->lock, flags);
+	gd->major = AOE_MAJOR;
+	gd->first_minor = d->sysminor * AOE_PARTITIONS;
+	gd->fops = &aoe_bdops;
+	gd->private_data = d;
+	set_capacity(gd, d->ssize);
+	snprintf(gd->disk_name, sizeof gd->disk_name, "etherd/e%ld.%d",
+		d->aoemajor, d->aoeminor);
+
+	gd->queue = d->blkq;
+	d->gd = gd;
+	d->flags &= ~DEVFL_GDALLOC;
+	d->flags |= DEVFL_UP;
+
+	spin_unlock_irqrestore(&d->lock, flags);
+
+	add_disk(gd);
+	aoedisk_add_sysfs(d);
+	return;
+
+err_blkq:
+	blk_cleanup_queue(d->blkq);
+	d->blkq = NULL;
+err_mempool:
+	mempool_destroy(d->bufpool);
+err_disk:
+	put_disk(gd);
+err:
+	spin_lock_irqsave(&d->lock, flags);
+	d->flags &= ~DEVFL_GDALLOC;
+	spin_unlock_irqrestore(&d->lock, flags);
+}
+
+void
+aoeblk_exit(void)
+{
+	kmem_cache_destroy(buf_pool_cache);
+}
+
+int __init
+aoeblk_init(void)
+{
+	buf_pool_cache = kmem_cache_create("aoe_bufs",
+					   sizeof(struct buf),
+					   0, 0, NULL);
+	if (buf_pool_cache == NULL)
+		return -ENOMEM;
+
+	return 0;
+}
+
diff --git a/drivers/block/aoe/aoechr.c b/drivers/block/aoe/aoechr.c
new file mode 100644
index 00000000..146296ca
--- /dev/null
+++ b/drivers/block/aoe/aoechr.c
@@ -0,0 +1,314 @@
+/* Copyright (c) 2007 Coraid, Inc.  See COPYING for GPL terms. */
+/*
+ * aoechr.c
+ * AoE character device driver
+ */
+
+#include <linux/hdreg.h>
+#include <linux/blkdev.h>
+#include <linux/completion.h>
+#include <linux/delay.h>
+#include <linux/slab.h>
+#include <linux/mutex.h>
+#include <linux/skbuff.h>
+#include "aoe.h"
+
+enum {
+	//MINOR_STAT = 1, (moved to sysfs)
+	MINOR_ERR = 2,
+	MINOR_DISCOVER,
+	MINOR_INTERFACES,
+	MINOR_REVALIDATE,
+	MINOR_FLUSH,
+	MSGSZ = 2048,
+	NMSG = 100,		/* message backlog to retain */
+};
+
+struct aoe_chardev {
+	ulong minor;
+	char name[32];
+};
+
+enum { EMFL_VALID = 1 };
+
+struct ErrMsg {
+	short flags;
+	short len;
+	char *msg;
+};
+
+static DEFINE_MUTEX(aoechr_mutex);
+static struct ErrMsg emsgs[NMSG];
+static int emsgs_head_idx, emsgs_tail_idx;
+static struct completion emsgs_comp;
+static spinlock_t emsgs_lock;
+static int nblocked_emsgs_readers;
+static struct class *aoe_class;
+static struct aoe_chardev chardevs[] = {
+	{ MINOR_ERR, "err" },
+	{ MINOR_DISCOVER, "discover" },
+	{ MINOR_INTERFACES, "interfaces" },
+	{ MINOR_REVALIDATE, "revalidate" },
+	{ MINOR_FLUSH, "flush" },
+};
+
+static int
+discover(void)
+{
+	aoecmd_cfg(0xffff, 0xff);
+	return 0;
+}
+
+static int
+interfaces(const char __user *str, size_t size)
+{
+	if (set_aoe_iflist(str, size)) {
+		printk(KERN_ERR
+			"aoe: could not set interface list: too many interfaces\n");
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static int
+revalidate(const char __user *str, size_t size)
+{
+	int major, minor, n;
+	ulong flags;
+	struct aoedev *d;
+	struct sk_buff *skb;
+	char buf[16];
+
+	if (size >= sizeof buf)
+		return -EINVAL;
+	buf[sizeof buf - 1] = '\0';
+	if (copy_from_user(buf, str, size))
+		return -EFAULT;
+
+	/* should be e%d.%d format */
+	n = sscanf(buf, "e%d.%d", &major, &minor);
+	if (n != 2) {
+		printk(KERN_ERR "aoe: invalid device specification\n");
+		return -EINVAL;
+	}
+	d = aoedev_by_aoeaddr(major, minor);
+	if (!d)
+		return -EINVAL;
+	spin_lock_irqsave(&d->lock, flags);
+	aoecmd_cleanslate(d);
+loop:
+	skb = aoecmd_ata_id(d);
+	spin_unlock_irqrestore(&d->lock, flags);
+	/* try again if we are able to sleep a bit,
+	 * otherwise give up this revalidation
+	 */
+	if (!skb && !msleep_interruptible(200)) {
+		spin_lock_irqsave(&d->lock, flags);
+		goto loop;
+	}
+	if (skb) {
+		struct sk_buff_head queue;
+		__skb_queue_head_init(&queue);
+		__skb_queue_tail(&queue, skb);
+		aoenet_xmit(&queue);
+	}
+	aoecmd_cfg(major, minor);
+	return 0;
+}
+
+void
+aoechr_error(char *msg)
+{
+	struct ErrMsg *em;
+	char *mp;
+	ulong flags, n;
+
+	n = strlen(msg);
+
+	spin_lock_irqsave(&emsgs_lock, flags);
+
+	em = emsgs + emsgs_tail_idx;
+	if ((em->flags & EMFL_VALID)) {
+bail:		spin_unlock_irqrestore(&emsgs_lock, flags);
+		return;
+	}
+
+	mp = kmalloc(n, GFP_ATOMIC);
+	if (mp == NULL) {
+		printk(KERN_ERR "aoe: allocation failure, len=%ld\n", n);
+		goto bail;
+	}
+
+	memcpy(mp, msg, n);
+	em->msg = mp;
+	em->flags |= EMFL_VALID;
+	em->len = n;
+
+	emsgs_tail_idx++;
+	emsgs_tail_idx %= ARRAY_SIZE(emsgs);
+
+	spin_unlock_irqrestore(&emsgs_lock, flags);
+
+	if (nblocked_emsgs_readers)
+		complete(&emsgs_comp);
+}
+
+static ssize_t
+aoechr_write(struct file *filp, const char __user *buf, size_t cnt, loff_t *offp)
+{
+	int ret = -EINVAL;
+
+	switch ((unsigned long) filp->private_data) {
+	default:
+		printk(KERN_INFO "aoe: can't write to that file.\n");
+		break;
+	case MINOR_DISCOVER:
+		ret = discover();
+		break;
+	case MINOR_INTERFACES:
+		ret = interfaces(buf, cnt);
+		break;
+	case MINOR_REVALIDATE:
+		ret = revalidate(buf, cnt);
+		break;
+	case MINOR_FLUSH:
+		ret = aoedev_flush(buf, cnt);
+	}
+	if (ret == 0)
+		ret = cnt;
+	return ret;
+}
+
+static int
+aoechr_open(struct inode *inode, struct file *filp)
+{
+	int n, i;
+
+	mutex_lock(&aoechr_mutex);
+	n = iminor(inode);
+	filp->private_data = (void *) (unsigned long) n;
+
+	for (i = 0; i < ARRAY_SIZE(chardevs); ++i)
+		if (chardevs[i].minor == n) {
+			mutex_unlock(&aoechr_mutex);
+			return 0;
+		}
+	mutex_unlock(&aoechr_mutex);
+	return -EINVAL;
+}
+
+static int
+aoechr_rel(struct inode *inode, struct file *filp)
+{
+	return 0;
+}
+
+static ssize_t
+aoechr_read(struct file *filp, char __user *buf, size_t cnt, loff_t *off)
+{
+	unsigned long n;
+	char *mp;
+	struct ErrMsg *em;
+	ssize_t len;
+	ulong flags;
+
+	n = (unsigned long) filp->private_data;
+	if (n != MINOR_ERR)
+		return -EFAULT;
+
+	spin_lock_irqsave(&emsgs_lock, flags);
+
+	for (;;) {
+		em = emsgs + emsgs_head_idx;
+		if ((em->flags & EMFL_VALID) != 0)
+			break;
+		if (filp->f_flags & O_NDELAY) {
+			spin_unlock_irqrestore(&emsgs_lock, flags);
+			return -EAGAIN;
+		}
+		nblocked_emsgs_readers++;
+
+		spin_unlock_irqrestore(&emsgs_lock, flags);
+
+		n = wait_for_completion_interruptible(&emsgs_comp);
+
+		spin_lock_irqsave(&emsgs_lock, flags);
+
+		nblocked_emsgs_readers--;
+
+		if (n) {
+			spin_unlock_irqrestore(&emsgs_lock, flags);
+			return -ERESTARTSYS;
+		}
+	}
+	if (em->len > cnt) {
+		spin_unlock_irqrestore(&emsgs_lock, flags);
+		return -EAGAIN;
+	}
+	mp = em->msg;
+	len = em->len;
+	em->msg = NULL;
+	em->flags &= ~EMFL_VALID;
+
+	emsgs_head_idx++;
+	emsgs_head_idx %= ARRAY_SIZE(emsgs);
+
+	spin_unlock_irqrestore(&emsgs_lock, flags);
+
+	n = copy_to_user(buf, mp, len);
+	kfree(mp);
+	return n == 0 ? len : -EFAULT;
+}
+
+static const struct file_operations aoe_fops = {
+	.write = aoechr_write,
+	.read = aoechr_read,
+	.open = aoechr_open,
+	.release = aoechr_rel,
+	.owner = THIS_MODULE,
+	.llseek = noop_llseek,
+};
+
+static char *aoe_devnode(struct device *dev, mode_t *mode)
+{
+	return kasprintf(GFP_KERNEL, "etherd/%s", dev_name(dev));
+}
+
+int __init
+aoechr_init(void)
+{
+	int n, i;
+
+	n = register_chrdev(AOE_MAJOR, "aoechr", &aoe_fops);
+	if (n < 0) { 
+		printk(KERN_ERR "aoe: can't register char device\n");
+		return n;
+	}
+	init_completion(&emsgs_comp);
+	spin_lock_init(&emsgs_lock);
+	aoe_class = class_create(THIS_MODULE, "aoe");
+	if (IS_ERR(aoe_class)) {
+		unregister_chrdev(AOE_MAJOR, "aoechr");
+		return PTR_ERR(aoe_class);
+	}
+	aoe_class->devnode = aoe_devnode;
+
+	for (i = 0; i < ARRAY_SIZE(chardevs); ++i)
+		device_create(aoe_class, NULL,
+			      MKDEV(AOE_MAJOR, chardevs[i].minor), NULL,
+			      chardevs[i].name);
+
+	return 0;
+}
+
+void
+aoechr_exit(void)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(chardevs); ++i)
+		device_destroy(aoe_class, MKDEV(AOE_MAJOR, chardevs[i].minor));
+	class_destroy(aoe_class);
+	unregister_chrdev(AOE_MAJOR, "aoechr");
+}
+
diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c
new file mode 100644
index 00000000..de0435e6
--- /dev/null
+++ b/drivers/block/aoe/aoecmd.c
@@ -0,0 +1,1083 @@
+/* Copyright (c) 2007 Coraid, Inc.  See COPYING for GPL terms. */
+/*
+ * aoecmd.c
+ * Filesystem request handling methods
+ */
+
+#include <linux/ata.h>
+#include <linux/slab.h>
+#include <linux/hdreg.h>
+#include <linux/blkdev.h>
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#include <linux/genhd.h>
+#include <linux/moduleparam.h>
+#include <net/net_namespace.h>
+#include <asm/unaligned.h>
+#include "aoe.h"
+
+static int aoe_deadsecs = 60 * 3;
+module_param(aoe_deadsecs, int, 0644);
+MODULE_PARM_DESC(aoe_deadsecs, "After aoe_deadsecs seconds, give up and fail dev.");
+
+static int aoe_maxout = 16;
+module_param(aoe_maxout, int, 0644);
+MODULE_PARM_DESC(aoe_maxout,
+	"Only aoe_maxout outstanding packets for every MAC on eX.Y.");
+
+static struct sk_buff *
+new_skb(ulong len)
+{
+	struct sk_buff *skb;
+
+	skb = alloc_skb(len, GFP_ATOMIC);
+	if (skb) {
+		skb_reset_mac_header(skb);
+		skb_reset_network_header(skb);
+		skb->protocol = __constant_htons(ETH_P_AOE);
+	}
+	return skb;
+}
+
+static struct frame *
+getframe(struct aoetgt *t, int tag)
+{
+	struct frame *f, *e;
+
+	f = t->frames;
+	e = f + t->nframes;
+	for (; f<e; f++)
+		if (f->tag == tag)
+			return f;
+	return NULL;
+}
+
+/*
+ * Leave the top bit clear so we have tagspace for userland.
+ * The bottom 16 bits are the xmit tick for rexmit/rttavg processing.
+ * This driver reserves tag -1 to mean "unused frame."
+ */
+static int
+newtag(struct aoetgt *t)
+{
+	register ulong n;
+
+	n = jiffies & 0xffff;
+	return n |= (++t->lasttag & 0x7fff) << 16;
+}
+
+static int
+aoehdr_atainit(struct aoedev *d, struct aoetgt *t, struct aoe_hdr *h)
+{
+	u32 host_tag = newtag(t);
+
+	memcpy(h->src, t->ifp->nd->dev_addr, sizeof h->src);
+	memcpy(h->dst, t->addr, sizeof h->dst);
+	h->type = __constant_cpu_to_be16(ETH_P_AOE);
+	h->verfl = AOE_HVER;
+	h->major = cpu_to_be16(d->aoemajor);
+	h->minor = d->aoeminor;
+	h->cmd = AOECMD_ATA;
+	h->tag = cpu_to_be32(host_tag);
+
+	return host_tag;
+}
+
+static inline void
+put_lba(struct aoe_atahdr *ah, sector_t lba)
+{
+	ah->lba0 = lba;
+	ah->lba1 = lba >>= 8;
+	ah->lba2 = lba >>= 8;
+	ah->lba3 = lba >>= 8;
+	ah->lba4 = lba >>= 8;
+	ah->lba5 = lba >>= 8;
+}
+
+static void
+ifrotate(struct aoetgt *t)
+{
+	t->ifp++;
+	if (t->ifp >= &t->ifs[NAOEIFS] || t->ifp->nd == NULL)
+		t->ifp = t->ifs;
+	if (t->ifp->nd == NULL) {
+		printk(KERN_INFO "aoe: no interface to rotate to\n");
+		BUG();
+	}
+}
+
+static void
+skb_pool_put(struct aoedev *d, struct sk_buff *skb)
+{
+	__skb_queue_tail(&d->skbpool, skb);
+}
+
+static struct sk_buff *
+skb_pool_get(struct aoedev *d)
+{
+	struct sk_buff *skb = skb_peek(&d->skbpool);
+
+	if (skb && atomic_read(&skb_shinfo(skb)->dataref) == 1) {
+		__skb_unlink(skb, &d->skbpool);
+		return skb;
+	}
+	if (skb_queue_len(&d->skbpool) < NSKBPOOLMAX &&
+	    (skb = new_skb(ETH_ZLEN)))
+		return skb;
+
+	return NULL;
+}
+
+/* freeframe is where we do our load balancing so it's a little hairy. */
+static struct frame *
+freeframe(struct aoedev *d)
+{
+	struct frame *f, *e, *rf;
+	struct aoetgt **t;
+	struct sk_buff *skb;
+
+	if (d->targets[0] == NULL) {	/* shouldn't happen, but I'm paranoid */
+		printk(KERN_ERR "aoe: NULL TARGETS!\n");
+		return NULL;
+	}
+	t = d->tgt;
+	t++;
+	if (t >= &d->targets[NTARGETS] || !*t)
+		t = d->targets;
+	for (;;) {
+		if ((*t)->nout < (*t)->maxout
+		&& t != d->htgt
+		&& (*t)->ifp->nd) {
+			rf = NULL;
+			f = (*t)->frames;
+			e = f + (*t)->nframes;
+			for (; f < e; f++) {
+				if (f->tag != FREETAG)
+					continue;
+				skb = f->skb;
+				if (!skb
+				&& !(f->skb = skb = new_skb(ETH_ZLEN)))
+					continue;
+				if (atomic_read(&skb_shinfo(skb)->dataref)
+					!= 1) {
+					if (!rf)
+						rf = f;
+					continue;
+				}
+gotone:				skb_shinfo(skb)->nr_frags = skb->data_len = 0;
+				skb_trim(skb, 0);
+				d->tgt = t;
+				ifrotate(*t);
+				return f;
+			}
+			/* Work can be done, but the network layer is
+			   holding our precious packets.  Try to grab
+			   one from the pool. */
+			f = rf;
+			if (f == NULL) {	/* more paranoia */
+				printk(KERN_ERR
+					"aoe: freeframe: %s.\n",
+					"unexpected null rf");
+				d->flags |= DEVFL_KICKME;
+				return NULL;
+			}
+			skb = skb_pool_get(d);
+			if (skb) {
+				skb_pool_put(d, f->skb);
+				f->skb = skb;
+				goto gotone;
+			}
+			(*t)->dataref++;
+			if ((*t)->nout == 0)
+				d->flags |= DEVFL_KICKME;
+		}
+		if (t == d->tgt)	/* we've looped and found nada */
+			break;
+		t++;
+		if (t >= &d->targets[NTARGETS] || !*t)
+			t = d->targets;
+	}
+	return NULL;
+}
+
+static int
+aoecmd_ata_rw(struct aoedev *d)
+{
+	struct frame *f;
+	struct aoe_hdr *h;
+	struct aoe_atahdr *ah;
+	struct buf *buf;
+	struct bio_vec *bv;
+	struct aoetgt *t;
+	struct sk_buff *skb;
+	ulong bcnt;
+	char writebit, extbit;
+
+	writebit = 0x10;
+	extbit = 0x4;
+
+	f = freeframe(d);
+	if (f == NULL)
+		return 0;
+	t = *d->tgt;
+	buf = d->inprocess;
+	bv = buf->bv;
+	bcnt = t->ifp->maxbcnt;
+	if (bcnt == 0)
+		bcnt = DEFAULTBCNT;
+	if (bcnt > buf->bv_resid)
+		bcnt = buf->bv_resid;
+	/* initialize the headers & frame */
+	skb = f->skb;
+	h = (struct aoe_hdr *) skb_mac_header(skb);
+	ah = (struct aoe_atahdr *) (h+1);
+	skb_put(skb, sizeof *h + sizeof *ah);
+	memset(h, 0, skb->len);
+	f->tag = aoehdr_atainit(d, t, h);
+	t->nout++;
+	f->waited = 0;
+	f->buf = buf;
+	f->bufaddr = page_address(bv->bv_page) + buf->bv_off;
+	f->bcnt = bcnt;
+	f->lba = buf->sector;
+
+	/* set up ata header */
+	ah->scnt = bcnt >> 9;
+	put_lba(ah, buf->sector);
+	if (d->flags & DEVFL_EXT) {
+		ah->aflags |= AOEAFL_EXT;
+	} else {
+		extbit = 0;
+		ah->lba3 &= 0x0f;
+		ah->lba3 |= 0xe0;	/* LBA bit + obsolete 0xa0 */
+	}
+	if (bio_data_dir(buf->bio) == WRITE) {
+		skb_fill_page_desc(skb, 0, bv->bv_page, buf->bv_off, bcnt);
+		ah->aflags |= AOEAFL_WRITE;
+		skb->len += bcnt;
+		skb->data_len = bcnt;
+		t->wpkts++;
+	} else {
+		t->rpkts++;
+		writebit = 0;
+	}
+
+	ah->cmdstat = ATA_CMD_PIO_READ | writebit | extbit;
+
+	/* mark all tracking fields and load out */
+	buf->nframesout += 1;
+	buf->bv_off += bcnt;
+	buf->bv_resid -= bcnt;
+	buf->resid -= bcnt;
+	buf->sector += bcnt >> 9;
+	if (buf->resid == 0) {
+		d->inprocess = NULL;
+	} else if (buf->bv_resid == 0) {
+		buf->bv = ++bv;
+		buf->bv_resid = bv->bv_len;
+		WARN_ON(buf->bv_resid == 0);
+		buf->bv_off = bv->bv_offset;
+	}
+
+	skb->dev = t->ifp->nd;
+	skb = skb_clone(skb, GFP_ATOMIC);
+	if (skb)
+		__skb_queue_tail(&d->sendq, skb);
+	return 1;
+}
+
+/* some callers cannot sleep, and they can call this function,
+ * transmitting the packets later, when interrupts are on
+ */
+static void
+aoecmd_cfg_pkts(ushort aoemajor, unsigned char aoeminor, struct sk_buff_head *queue)
+{
+	struct aoe_hdr *h;
+	struct aoe_cfghdr *ch;
+	struct sk_buff *skb;
+	struct net_device *ifp;
+
+	rcu_read_lock();
+	for_each_netdev_rcu(&init_net, ifp) {
+		dev_hold(ifp);
+		if (!is_aoe_netif(ifp))
+			goto cont;
+
+		skb = new_skb(sizeof *h + sizeof *ch);
+		if (skb == NULL) {
+			printk(KERN_INFO "aoe: skb alloc failure\n");
+			goto cont;
+		}
+		skb_put(skb, sizeof *h + sizeof *ch);
+		skb->dev = ifp;
+		__skb_queue_tail(queue, skb);
+		h = (struct aoe_hdr *) skb_mac_header(skb);
+		memset(h, 0, sizeof *h + sizeof *ch);
+
+		memset(h->dst, 0xff, sizeof h->dst);
+		memcpy(h->src, ifp->dev_addr, sizeof h->src);
+		h->type = __constant_cpu_to_be16(ETH_P_AOE);
+		h->verfl = AOE_HVER;
+		h->major = cpu_to_be16(aoemajor);
+		h->minor = aoeminor;
+		h->cmd = AOECMD_CFG;
+
+cont:
+		dev_put(ifp);
+	}
+	rcu_read_unlock();
+}
+
+static void
+resend(struct aoedev *d, struct aoetgt *t, struct frame *f)
+{
+	struct sk_buff *skb;
+	struct aoe_hdr *h;
+	struct aoe_atahdr *ah;
+	char buf[128];
+	u32 n;
+
+	ifrotate(t);
+	n = newtag(t);
+	skb = f->skb;
+	h = (struct aoe_hdr *) skb_mac_header(skb);
+	ah = (struct aoe_atahdr *) (h+1);
+
+	snprintf(buf, sizeof buf,
+		"%15s e%ld.%d oldtag=%08x@%08lx newtag=%08x s=%pm d=%pm nout=%d\n",
+		"retransmit", d->aoemajor, d->aoeminor, f->tag, jiffies, n,
+		h->src, h->dst, t->nout);
+	aoechr_error(buf);
+
+	f->tag = n;
+	h->tag = cpu_to_be32(n);
+	memcpy(h->dst, t->addr, sizeof h->dst);
+	memcpy(h->src, t->ifp->nd->dev_addr, sizeof h->src);
+
+	switch (ah->cmdstat) {
+	default:
+		break;
+	case ATA_CMD_PIO_READ:
+	case ATA_CMD_PIO_READ_EXT:
+	case ATA_CMD_PIO_WRITE:
+	case ATA_CMD_PIO_WRITE_EXT:
+		put_lba(ah, f->lba);
+
+		n = f->bcnt;
+		if (n > DEFAULTBCNT)
+			n = DEFAULTBCNT;
+		ah->scnt = n >> 9;
+		if (ah->aflags & AOEAFL_WRITE) {
+			skb_fill_page_desc(skb, 0, virt_to_page(f->bufaddr),
+				offset_in_page(f->bufaddr), n);
+			skb->len = sizeof *h + sizeof *ah + n;
+			skb->data_len = n;
+		}
+	}
+	skb->dev = t->ifp->nd;
+	skb = skb_clone(skb, GFP_ATOMIC);
+	if (skb == NULL)
+		return;
+	__skb_queue_tail(&d->sendq, skb);
+}
+
+static int
+tsince(int tag)
+{
+	int n;
+
+	n = jiffies & 0xffff;
+	n -= tag & 0xffff;
+	if (n < 0)
+		n += 1<<16;
+	return n;
+}
+
+static struct aoeif *
+getif(struct aoetgt *t, struct net_device *nd)
+{
+	struct aoeif *p, *e;
+
+	p = t->ifs;
+	e = p + NAOEIFS;
+	for (; p < e; p++)
+		if (p->nd == nd)
+			return p;
+	return NULL;
+}
+
+static struct aoeif *
+addif(struct aoetgt *t, struct net_device *nd)
+{
+	struct aoeif *p;
+
+	p = getif(t, NULL);
+	if (!p)
+		return NULL;
+	p->nd = nd;
+	p->maxbcnt = DEFAULTBCNT;
+	p->lost = 0;
+	p->lostjumbo = 0;
+	return p;
+}
+
+static void
+ejectif(struct aoetgt *t, struct aoeif *ifp)
+{
+	struct aoeif *e;
+	ulong n;
+
+	e = t->ifs + NAOEIFS - 1;
+	n = (e - ifp) * sizeof *ifp;
+	memmove(ifp, ifp+1, n);
+	e->nd = NULL;
+}
+
+static int
+sthtith(struct aoedev *d)
+{
+	struct frame *f, *e, *nf;
+	struct sk_buff *skb;
+	struct aoetgt *ht = *d->htgt;
+
+	f = ht->frames;
+	e = f + ht->nframes;
+	for (; f < e; f++) {
+		if (f->tag == FREETAG)
+			continue;
+		nf = freeframe(d);
+		if (!nf)
+			return 0;
+		skb = nf->skb;
+		*nf = *f;
+		f->skb = skb;
+		f->tag = FREETAG;
+		nf->waited = 0;
+		ht->nout--;
+		(*d->tgt)->nout++;
+		resend(d, *d->tgt, nf);
+	}
+	/* he's clean, he's useless.  take away his interfaces */
+	memset(ht->ifs, 0, sizeof ht->ifs);
+	d->htgt = NULL;
+	return 1;
+}
+
+static inline unsigned char
+ata_scnt(unsigned char *packet) {
+	struct aoe_hdr *h;
+	struct aoe_atahdr *ah;
+
+	h = (struct aoe_hdr *) packet;
+	ah = (struct aoe_atahdr *) (h+1);
+	return ah->scnt;
+}
+
+static void
+rexmit_timer(ulong vp)
+{
+	struct sk_buff_head queue;
+	struct aoedev *d;
+	struct aoetgt *t, **tt, **te;
+	struct aoeif *ifp;
+	struct frame *f, *e;
+	register long timeout;
+	ulong flags, n;
+
+	d = (struct aoedev *) vp;
+
+	/* timeout is always ~150% of the moving average */
+	timeout = d->rttavg;
+	timeout += timeout >> 1;
+
+	spin_lock_irqsave(&d->lock, flags);
+
+	if (d->flags & DEVFL_TKILL) {
+		spin_unlock_irqrestore(&d->lock, flags);
+		return;
+	}
+	tt = d->targets;
+	te = tt + NTARGETS;
+	for (; tt < te && *tt; tt++) {
+		t = *tt;
+		f = t->frames;
+		e = f + t->nframes;
+		for (; f < e; f++) {
+			if (f->tag == FREETAG
+			|| tsince(f->tag) < timeout)
+				continue;
+			n = f->waited += timeout;
+			n /= HZ;
+			if (n > aoe_deadsecs) {
+				/* waited too long.  device failure. */
+				aoedev_downdev(d);
+				break;
+			}
+
+			if (n > HELPWAIT /* see if another target can help */
+			&& (tt != d->targets || d->targets[1]))
+				d->htgt = tt;
+
+			if (t->nout == t->maxout) {
+				if (t->maxout > 1)
+					t->maxout--;
+				t->lastwadj = jiffies;
+			}
+
+			ifp = getif(t, f->skb->dev);
+			if (ifp && ++ifp->lost > (t->nframes << 1)
+			&& (ifp != t->ifs || t->ifs[1].nd)) {
+				ejectif(t, ifp);
+				ifp = NULL;
+			}
+
+			if (ata_scnt(skb_mac_header(f->skb)) > DEFAULTBCNT / 512
+			&& ifp && ++ifp->lostjumbo > (t->nframes << 1)
+			&& ifp->maxbcnt != DEFAULTBCNT) {
+				printk(KERN_INFO
+					"aoe: e%ld.%d: "
+					"too many lost jumbo on "
+					"%s:%pm - "
+					"falling back to %d frames.\n",
+					d->aoemajor, d->aoeminor,
+					ifp->nd->name, t->addr,
+					DEFAULTBCNT);
+				ifp->maxbcnt = 0;
+			}
+			resend(d, t, f);
+		}
+
+		/* window check */
+		if (t->nout == t->maxout
+		&& t->maxout < t->nframes
+		&& (jiffies - t->lastwadj)/HZ > 10) {
+			t->maxout++;
+			t->lastwadj = jiffies;
+		}
+	}
+
+	if (!skb_queue_empty(&d->sendq)) {
+		n = d->rttavg <<= 1;
+		if (n > MAXTIMER)
+			d->rttavg = MAXTIMER;
+	}
+
+	if (d->flags & DEVFL_KICKME || d->htgt) {
+		d->flags &= ~DEVFL_KICKME;
+		aoecmd_work(d);
+	}
+
+	__skb_queue_head_init(&queue);
+	skb_queue_splice_init(&d->sendq, &queue);
+
+	d->timer.expires = jiffies + TIMERTICK;
+	add_timer(&d->timer);
+
+	spin_unlock_irqrestore(&d->lock, flags);
+
+	aoenet_xmit(&queue);
+}
+
+/* enters with d->lock held */
+void
+aoecmd_work(struct aoedev *d)
+{
+	struct buf *buf;
+loop:
+	if (d->htgt && !sthtith(d))
+		return;
+	if (d->inprocess == NULL) {
+		if (list_empty(&d->bufq))
+			return;
+		buf = container_of(d->bufq.next, struct buf, bufs);
+		list_del(d->bufq.next);
+		d->inprocess = buf;
+	}
+	if (aoecmd_ata_rw(d))
+		goto loop;
+}
+
+/* this function performs work that has been deferred until sleeping is OK
+ */
+void
+aoecmd_sleepwork(struct work_struct *work)
+{
+	struct aoedev *d = container_of(work, struct aoedev, work);
+
+	if (d->flags & DEVFL_GDALLOC)
+		aoeblk_gdalloc(d);
+
+	if (d->flags & DEVFL_NEWSIZE) {
+		struct block_device *bd;
+		unsigned long flags;
+		u64 ssize;
+
+		ssize = get_capacity(d->gd);
+		bd = bdget_disk(d->gd, 0);
+
+		if (bd) {
+			mutex_lock(&bd->bd_inode->i_mutex);
+			i_size_write(bd->bd_inode, (loff_t)ssize<<9);
+			mutex_unlock(&bd->bd_inode->i_mutex);
+			bdput(bd);
+		}
+		spin_lock_irqsave(&d->lock, flags);
+		d->flags |= DEVFL_UP;
+		d->flags &= ~DEVFL_NEWSIZE;
+		spin_unlock_irqrestore(&d->lock, flags);
+	}
+}
+
+static void
+ataid_complete(struct aoedev *d, struct aoetgt *t, unsigned char *id)
+{
+	u64 ssize;
+	u16 n;
+
+	/* word 83: command set supported */
+	n = get_unaligned_le16(&id[83 << 1]);
+
+	/* word 86: command set/feature enabled */
+	n |= get_unaligned_le16(&id[86 << 1]);
+
+	if (n & (1<<10)) {	/* bit 10: LBA 48 */
+		d->flags |= DEVFL_EXT;
+
+		/* word 100: number lba48 sectors */
+		ssize = get_unaligned_le64(&id[100 << 1]);
+
+		/* set as in ide-disk.c:init_idedisk_capacity */
+		d->geo.cylinders = ssize;
+		d->geo.cylinders /= (255 * 63);
+		d->geo.heads = 255;
+		d->geo.sectors = 63;
+	} else {
+		d->flags &= ~DEVFL_EXT;
+
+		/* number lba28 sectors */
+		ssize = get_unaligned_le32(&id[60 << 1]);
+
+		/* NOTE: obsolete in ATA 6 */
+		d->geo.cylinders = get_unaligned_le16(&id[54 << 1]);
+		d->geo.heads = get_unaligned_le16(&id[55 << 1]);
+		d->geo.sectors = get_unaligned_le16(&id[56 << 1]);
+	}
+
+	if (d->ssize != ssize)
+		printk(KERN_INFO
+			"aoe: %pm e%ld.%d v%04x has %llu sectors\n",
+			t->addr,
+			d->aoemajor, d->aoeminor,
+			d->fw_ver, (long long)ssize);
+	d->ssize = ssize;
+	d->geo.start = 0;
+	if (d->flags & (DEVFL_GDALLOC|DEVFL_NEWSIZE))
+		return;
+	if (d->gd != NULL) {
+		set_capacity(d->gd, ssize);
+		d->flags |= DEVFL_NEWSIZE;
+	} else
+		d->flags |= DEVFL_GDALLOC;
+	schedule_work(&d->work);
+}
+
+static void
+calc_rttavg(struct aoedev *d, int rtt)
+{
+	register long n;
+
+	n = rtt;
+	if (n < 0) {
+		n = -rtt;
+		if (n < MINTIMER)
+			n = MINTIMER;
+		else if (n > MAXTIMER)
+			n = MAXTIMER;
+		d->mintimer += (n - d->mintimer) >> 1;
+	} else if (n < d->mintimer)
+		n = d->mintimer;
+	else if (n > MAXTIMER)
+		n = MAXTIMER;
+
+	/* g == .25; cf. Congestion Avoidance and Control, Jacobson & Karels; 1988 */
+	n -= d->rttavg;
+	d->rttavg += n >> 2;
+}
+
+static struct aoetgt *
+gettgt(struct aoedev *d, char *addr)
+{
+	struct aoetgt **t, **e;
+
+	t = d->targets;
+	e = t + NTARGETS;
+	for (; t < e && *t; t++)
+		if (memcmp((*t)->addr, addr, sizeof((*t)->addr)) == 0)
+			return *t;
+	return NULL;
+}
+
+static inline void
+diskstats(struct gendisk *disk, struct bio *bio, ulong duration, sector_t sector)
+{
+	unsigned long n_sect = bio->bi_size >> 9;
+	const int rw = bio_data_dir(bio);
+	struct hd_struct *part;
+	int cpu;
+
+	cpu = part_stat_lock();
+	part = disk_map_sector_rcu(disk, sector);
+
+	part_stat_inc(cpu, part, ios[rw]);
+	part_stat_add(cpu, part, ticks[rw], duration);
+	part_stat_add(cpu, part, sectors[rw], n_sect);
+	part_stat_add(cpu, part, io_ticks, duration);
+
+	part_stat_unlock();
+}
+
+void
+aoecmd_ata_rsp(struct sk_buff *skb)
+{
+	struct sk_buff_head queue;
+	struct aoedev *d;
+	struct aoe_hdr *hin, *hout;
+	struct aoe_atahdr *ahin, *ahout;
+	struct frame *f;
+	struct buf *buf;
+	struct aoetgt *t;
+	struct aoeif *ifp;
+	register long n;
+	ulong flags;
+	char ebuf[128];
+	u16 aoemajor;
+
+	hin = (struct aoe_hdr *) skb_mac_header(skb);
+	aoemajor = get_unaligned_be16(&hin->major);
+	d = aoedev_by_aoeaddr(aoemajor, hin->minor);
+	if (d == NULL) {
+		snprintf(ebuf, sizeof ebuf, "aoecmd_ata_rsp: ata response "
+			"for unknown device %d.%d\n",
+			 aoemajor, hin->minor);
+		aoechr_error(ebuf);
+		return;
+	}
+
+	spin_lock_irqsave(&d->lock, flags);
+
+	n = get_unaligned_be32(&hin->tag);
+	t = gettgt(d, hin->src);
+	if (t == NULL) {
+		printk(KERN_INFO "aoe: can't find target e%ld.%d:%pm\n",
+			d->aoemajor, d->aoeminor, hin->src);
+		spin_unlock_irqrestore(&d->lock, flags);
+		return;
+	}
+	f = getframe(t, n);
+	if (f == NULL) {
+		calc_rttavg(d, -tsince(n));
+		spin_unlock_irqrestore(&d->lock, flags);
+		snprintf(ebuf, sizeof ebuf,
+			"%15s e%d.%d    tag=%08x@%08lx\n",
+			"unexpected rsp",
+			get_unaligned_be16(&hin->major),
+			hin->minor,
+			get_unaligned_be32(&hin->tag),
+			jiffies);
+		aoechr_error(ebuf);
+		return;
+	}
+
+	calc_rttavg(d, tsince(f->tag));
+
+	ahin = (struct aoe_atahdr *) (hin+1);
+	hout = (struct aoe_hdr *) skb_mac_header(f->skb);
+	ahout = (struct aoe_atahdr *) (hout+1);
+	buf = f->buf;
+
+	if (ahin->cmdstat & 0xa9) {	/* these bits cleared on success */
+		printk(KERN_ERR
+			"aoe: ata error cmd=%2.2Xh stat=%2.2Xh from e%ld.%d\n",
+			ahout->cmdstat, ahin->cmdstat,
+			d->aoemajor, d->aoeminor);
+		if (buf)
+			buf->flags |= BUFFL_FAIL;
+	} else {
+		if (d->htgt && t == *d->htgt) /* I'll help myself, thank you. */
+			d->htgt = NULL;
+		n = ahout->scnt << 9;
+		switch (ahout->cmdstat) {
+		case ATA_CMD_PIO_READ:
+		case ATA_CMD_PIO_READ_EXT:
+			if (skb->len - sizeof *hin - sizeof *ahin < n) {
+				printk(KERN_ERR
+					"aoe: %s.  skb->len=%d need=%ld\n",
+					"runt data size in read", skb->len, n);
+				/* fail frame f?  just returning will rexmit. */
+				spin_unlock_irqrestore(&d->lock, flags);
+				return;
+			}
+			memcpy(f->bufaddr, ahin+1, n);
+		case ATA_CMD_PIO_WRITE:
+		case ATA_CMD_PIO_WRITE_EXT:
+			ifp = getif(t, skb->dev);
+			if (ifp) {
+				ifp->lost = 0;
+				if (n > DEFAULTBCNT)
+					ifp->lostjumbo = 0;
+			}
+			if (f->bcnt -= n) {
+				f->lba += n >> 9;
+				f->bufaddr += n;
+				resend(d, t, f);
+				goto xmit;
+			}
+			break;
+		case ATA_CMD_ID_ATA:
+			if (skb->len - sizeof *hin - sizeof *ahin < 512) {
+				printk(KERN_INFO
+					"aoe: runt data size in ataid.  skb->len=%d\n",
+					skb->len);
+				spin_unlock_irqrestore(&d->lock, flags);
+				return;
+			}
+			ataid_complete(d, t, (char *) (ahin+1));
+			break;
+		default:
+			printk(KERN_INFO
+				"aoe: unrecognized ata command %2.2Xh for %d.%d\n",
+				ahout->cmdstat,
+				get_unaligned_be16(&hin->major),
+				hin->minor);
+		}
+	}
+
+	if (buf && --buf->nframesout == 0 && buf->resid == 0) {
+		diskstats(d->gd, buf->bio, jiffies - buf->stime, buf->sector);
+		if (buf->flags & BUFFL_FAIL)
+			bio_endio(buf->bio, -EIO);
+		else {
+			bio_flush_dcache_pages(buf->bio);
+			bio_endio(buf->bio, 0);
+		}
+		mempool_free(buf, d->bufpool);
+	}
+
+	f->buf = NULL;
+	f->tag = FREETAG;
+	t->nout--;
+
+	aoecmd_work(d);
+xmit:
+	__skb_queue_head_init(&queue);
+	skb_queue_splice_init(&d->sendq, &queue);
+
+	spin_unlock_irqrestore(&d->lock, flags);
+	aoenet_xmit(&queue);
+}
+
+void
+aoecmd_cfg(ushort aoemajor, unsigned char aoeminor)
+{
+	struct sk_buff_head queue;
+
+	__skb_queue_head_init(&queue);
+	aoecmd_cfg_pkts(aoemajor, aoeminor, &queue);
+	aoenet_xmit(&queue);
+}
+ 
+struct sk_buff *
+aoecmd_ata_id(struct aoedev *d)
+{
+	struct aoe_hdr *h;
+	struct aoe_atahdr *ah;
+	struct frame *f;
+	struct sk_buff *skb;
+	struct aoetgt *t;
+
+	f = freeframe(d);
+	if (f == NULL)
+		return NULL;
+
+	t = *d->tgt;
+
+	/* initialize the headers & frame */
+	skb = f->skb;
+	h = (struct aoe_hdr *) skb_mac_header(skb);
+	ah = (struct aoe_atahdr *) (h+1);
+	skb_put(skb, sizeof *h + sizeof *ah);
+	memset(h, 0, skb->len);
+	f->tag = aoehdr_atainit(d, t, h);
+	t->nout++;
+	f->waited = 0;
+
+	/* set up ata header */
+	ah->scnt = 1;
+	ah->cmdstat = ATA_CMD_ID_ATA;
+	ah->lba3 = 0xa0;
+
+	skb->dev = t->ifp->nd;
+
+	d->rttavg = MAXTIMER;
+	d->timer.function = rexmit_timer;
+
+	return skb_clone(skb, GFP_ATOMIC);
+}
+ 
+static struct aoetgt *
+addtgt(struct aoedev *d, char *addr, ulong nframes)
+{
+	struct aoetgt *t, **tt, **te;
+	struct frame *f, *e;
+
+	tt = d->targets;
+	te = tt + NTARGETS;
+	for (; tt < te && *tt; tt++)
+		;
+
+	if (tt == te) {
+		printk(KERN_INFO
+			"aoe: device addtgt failure; too many targets\n");
+		return NULL;
+	}
+	t = kcalloc(1, sizeof *t, GFP_ATOMIC);
+	f = kcalloc(nframes, sizeof *f, GFP_ATOMIC);
+	if (!t || !f) {
+		kfree(f);
+		kfree(t);
+		printk(KERN_INFO "aoe: cannot allocate memory to add target\n");
+		return NULL;
+	}
+
+	t->nframes = nframes;
+	t->frames = f;
+	e = f + nframes;
+	for (; f < e; f++)
+		f->tag = FREETAG;
+	memcpy(t->addr, addr, sizeof t->addr);
+	t->ifp = t->ifs;
+	t->maxout = t->nframes;
+	return *tt = t;
+}
+
+void
+aoecmd_cfg_rsp(struct sk_buff *skb)
+{
+	struct aoedev *d;
+	struct aoe_hdr *h;
+	struct aoe_cfghdr *ch;
+	struct aoetgt *t;
+	struct aoeif *ifp;
+	ulong flags, sysminor, aoemajor;
+	struct sk_buff *sl;
+	u16 n;
+
+	h = (struct aoe_hdr *) skb_mac_header(skb);
+	ch = (struct aoe_cfghdr *) (h+1);
+
+	/*
+	 * Enough people have their dip switches set backwards to
+	 * warrant a loud message for this special case.
+	 */
+	aoemajor = get_unaligned_be16(&h->major);
+	if (aoemajor == 0xfff) {
+		printk(KERN_ERR "aoe: Warning: shelf address is all ones.  "
+			"Check shelf dip switches.\n");
+		return;
+	}
+
+	sysminor = SYSMINOR(aoemajor, h->minor);
+	if (sysminor * AOE_PARTITIONS + AOE_PARTITIONS > MINORMASK) {
+		printk(KERN_INFO "aoe: e%ld.%d: minor number too large\n",
+			aoemajor, (int) h->minor);
+		return;
+	}
+
+	n = be16_to_cpu(ch->bufcnt);
+	if (n > aoe_maxout)	/* keep it reasonable */
+		n = aoe_maxout;
+
+	d = aoedev_by_sysminor_m(sysminor);
+	if (d == NULL) {
+		printk(KERN_INFO "aoe: device sysminor_m failure\n");
+		return;
+	}
+
+	spin_lock_irqsave(&d->lock, flags);
+
+	t = gettgt(d, h->src);
+	if (!t) {
+		t = addtgt(d, h->src, n);
+		if (!t) {
+			spin_unlock_irqrestore(&d->lock, flags);
+			return;
+		}
+	}
+	ifp = getif(t, skb->dev);
+	if (!ifp) {
+		ifp = addif(t, skb->dev);
+		if (!ifp) {
+			printk(KERN_INFO
+				"aoe: device addif failure; "
+				"too many interfaces?\n");
+			spin_unlock_irqrestore(&d->lock, flags);
+			return;
+		}
+	}
+	if (ifp->maxbcnt) {
+		n = ifp->nd->mtu;
+		n -= sizeof (struct aoe_hdr) + sizeof (struct aoe_atahdr);
+		n /= 512;
+		if (n > ch->scnt)
+			n = ch->scnt;
+		n = n ? n * 512 : DEFAULTBCNT;
+		if (n != ifp->maxbcnt) {
+			printk(KERN_INFO
+				"aoe: e%ld.%d: setting %d%s%s:%pm\n",
+				d->aoemajor, d->aoeminor, n,
+				" byte data frames on ", ifp->nd->name,
+				t->addr);
+			ifp->maxbcnt = n;
+		}
+	}
+
+	/* don't change users' perspective */
+	if (d->nopen) {
+		spin_unlock_irqrestore(&d->lock, flags);
+		return;
+	}
+	d->fw_ver = be16_to_cpu(ch->fwver);
+
+	sl = aoecmd_ata_id(d);
+
+	spin_unlock_irqrestore(&d->lock, flags);
+
+	if (sl) {
+		struct sk_buff_head queue;
+		__skb_queue_head_init(&queue);
+		__skb_queue_tail(&queue, sl);
+		aoenet_xmit(&queue);
+	}
+}
+
+void
+aoecmd_cleanslate(struct aoedev *d)
+{
+	struct aoetgt **t, **te;
+	struct aoeif *p, *e;
+
+	d->mintimer = MINTIMER;
+
+	t = d->targets;
+	te = t + NTARGETS;
+	for (; t < te && *t; t++) {
+		(*t)->maxout = (*t)->nframes;
+		p = (*t)->ifs;
+		e = p + NAOEIFS;
+		for (; p < e; p++) {
+			p->lostjumbo = 0;
+			p->lost = 0;
+			p->maxbcnt = DEFAULTBCNT;
+		}
+	}
+}
diff --git a/drivers/block/aoe/aoedev.c b/drivers/block/aoe/aoedev.c
new file mode 100644
index 00000000..6b5110a4
--- /dev/null
+++ b/drivers/block/aoe/aoedev.c
@@ -0,0 +1,277 @@
+/* Copyright (c) 2007 Coraid, Inc.  See COPYING for GPL terms. */
+/*
+ * aoedev.c
+ * AoE device utility functions; maintains device list.
+ */
+
+#include <linux/hdreg.h>
+#include <linux/blkdev.h>
+#include <linux/netdevice.h>
+#include <linux/delay.h>
+#include <linux/slab.h>
+#include "aoe.h"
+
+static void dummy_timer(ulong);
+static void aoedev_freedev(struct aoedev *);
+static void freetgt(struct aoedev *d, struct aoetgt *t);
+static void skbpoolfree(struct aoedev *d);
+
+static struct aoedev *devlist;
+static DEFINE_SPINLOCK(devlist_lock);
+
+struct aoedev *
+aoedev_by_aoeaddr(int maj, int min)
+{
+	struct aoedev *d;
+	ulong flags;
+
+	spin_lock_irqsave(&devlist_lock, flags);
+
+	for (d=devlist; d; d=d->next)
+		if (d->aoemajor == maj && d->aoeminor == min)
+			break;
+
+	spin_unlock_irqrestore(&devlist_lock, flags);
+	return d;
+}
+
+static void
+dummy_timer(ulong vp)
+{
+	struct aoedev *d;
+
+	d = (struct aoedev *)vp;
+	if (d->flags & DEVFL_TKILL)
+		return;
+	d->timer.expires = jiffies + HZ;
+	add_timer(&d->timer);
+}
+
+void
+aoedev_downdev(struct aoedev *d)
+{
+	struct aoetgt **t, **te;
+	struct frame *f, *e;
+	struct buf *buf;
+	struct bio *bio;
+
+	t = d->targets;
+	te = t + NTARGETS;
+	for (; t < te && *t; t++) {
+		f = (*t)->frames;
+		e = f + (*t)->nframes;
+		for (; f < e; f->tag = FREETAG, f->buf = NULL, f++) {
+			if (f->tag == FREETAG || f->buf == NULL)
+				continue;
+			buf = f->buf;
+			bio = buf->bio;
+			if (--buf->nframesout == 0
+			&& buf != d->inprocess) {
+				mempool_free(buf, d->bufpool);
+				bio_endio(bio, -EIO);
+			}
+		}
+		(*t)->maxout = (*t)->nframes;
+		(*t)->nout = 0;
+	}
+	buf = d->inprocess;
+	if (buf) {
+		bio = buf->bio;
+		mempool_free(buf, d->bufpool);
+		bio_endio(bio, -EIO);
+	}
+	d->inprocess = NULL;
+	d->htgt = NULL;
+
+	while (!list_empty(&d->bufq)) {
+		buf = container_of(d->bufq.next, struct buf, bufs);
+		list_del(d->bufq.next);
+		bio = buf->bio;
+		mempool_free(buf, d->bufpool);
+		bio_endio(bio, -EIO);
+	}
+
+	if (d->gd)
+		set_capacity(d->gd, 0);
+
+	d->flags &= ~DEVFL_UP;
+}
+
+static void
+aoedev_freedev(struct aoedev *d)
+{
+	struct aoetgt **t, **e;
+
+	cancel_work_sync(&d->work);
+	if (d->gd) {
+		aoedisk_rm_sysfs(d);
+		del_gendisk(d->gd);
+		put_disk(d->gd);
+	}
+	t = d->targets;
+	e = t + NTARGETS;
+	for (; t < e && *t; t++)
+		freetgt(d, *t);
+	if (d->bufpool)
+		mempool_destroy(d->bufpool);
+	skbpoolfree(d);
+	blk_cleanup_queue(d->blkq);
+	kfree(d);
+}
+
+int
+aoedev_flush(const char __user *str, size_t cnt)
+{
+	ulong flags;
+	struct aoedev *d, **dd;
+	struct aoedev *rmd = NULL;
+	char buf[16];
+	int all = 0;
+
+	if (cnt >= 3) {
+		if (cnt > sizeof buf)
+			cnt = sizeof buf;
+		if (copy_from_user(buf, str, cnt))
+			return -EFAULT;
+		all = !strncmp(buf, "all", 3);
+	}
+
+	spin_lock_irqsave(&devlist_lock, flags);
+	dd = &devlist;
+	while ((d = *dd)) {
+		spin_lock(&d->lock);
+		if ((!all && (d->flags & DEVFL_UP))
+		|| (d->flags & (DEVFL_GDALLOC|DEVFL_NEWSIZE))
+		|| d->nopen) {
+			spin_unlock(&d->lock);
+			dd = &d->next;
+			continue;
+		}
+		*dd = d->next;
+		aoedev_downdev(d);
+		d->flags |= DEVFL_TKILL;
+		spin_unlock(&d->lock);
+		d->next = rmd;
+		rmd = d;
+	}
+	spin_unlock_irqrestore(&devlist_lock, flags);
+	while ((d = rmd)) {
+		rmd = d->next;
+		del_timer_sync(&d->timer);
+		aoedev_freedev(d);	/* must be able to sleep */
+	}
+	return 0;
+}
+
+/* I'm not really sure that this is a realistic problem, but if the
+network driver goes gonzo let's just leak memory after complaining. */
+static void
+skbfree(struct sk_buff *skb)
+{
+	enum { Sms = 100, Tms = 3*1000};
+	int i = Tms / Sms;
+
+	if (skb == NULL)
+		return;
+	while (atomic_read(&skb_shinfo(skb)->dataref) != 1 && i-- > 0)
+		msleep(Sms);
+	if (i < 0) {
+		printk(KERN_ERR
+			"aoe: %s holds ref: %s\n",
+			skb->dev ? skb->dev->name : "netif",
+			"cannot free skb -- memory leaked.");
+		return;
+	}
+	skb_shinfo(skb)->nr_frags = skb->data_len = 0;
+	skb_trim(skb, 0);
+	dev_kfree_skb(skb);
+}
+
+static void
+skbpoolfree(struct aoedev *d)
+{
+	struct sk_buff *skb, *tmp;
+
+	skb_queue_walk_safe(&d->skbpool, skb, tmp)
+		skbfree(skb);
+
+	__skb_queue_head_init(&d->skbpool);
+}
+
+/* find it or malloc it */
+struct aoedev *
+aoedev_by_sysminor_m(ulong sysminor)
+{
+	struct aoedev *d;
+	ulong flags;
+
+	spin_lock_irqsave(&devlist_lock, flags);
+
+	for (d=devlist; d; d=d->next)
+		if (d->sysminor == sysminor)
+			break;
+	if (d)
+		goto out;
+	d = kcalloc(1, sizeof *d, GFP_ATOMIC);
+	if (!d)
+		goto out;
+	INIT_WORK(&d->work, aoecmd_sleepwork);
+	spin_lock_init(&d->lock);
+	skb_queue_head_init(&d->sendq);
+	skb_queue_head_init(&d->skbpool);
+	init_timer(&d->timer);
+	d->timer.data = (ulong) d;
+	d->timer.function = dummy_timer;
+	d->timer.expires = jiffies + HZ;
+	add_timer(&d->timer);
+	d->bufpool = NULL;	/* defer to aoeblk_gdalloc */
+	d->tgt = d->targets;
+	INIT_LIST_HEAD(&d->bufq);
+	d->sysminor = sysminor;
+	d->aoemajor = AOEMAJOR(sysminor);
+	d->aoeminor = AOEMINOR(sysminor);
+	d->mintimer = MINTIMER;
+	d->next = devlist;
+	devlist = d;
+ out:
+	spin_unlock_irqrestore(&devlist_lock, flags);
+	return d;
+}
+
+static void
+freetgt(struct aoedev *d, struct aoetgt *t)
+{
+	struct frame *f, *e;
+
+	f = t->frames;
+	e = f + t->nframes;
+	for (; f < e; f++)
+		skbfree(f->skb);
+	kfree(t->frames);
+	kfree(t);
+}
+
+void
+aoedev_exit(void)
+{
+	struct aoedev *d;
+	ulong flags;
+
+	while ((d = devlist)) {
+		devlist = d->next;
+
+		spin_lock_irqsave(&d->lock, flags);
+		aoedev_downdev(d);
+		d->flags |= DEVFL_TKILL;
+		spin_unlock_irqrestore(&d->lock, flags);
+
+		del_timer_sync(&d->timer);
+		aoedev_freedev(d);
+	}
+}
+
+int __init
+aoedev_init(void)
+{
+	return 0;
+}
diff --git a/drivers/block/aoe/aoemain.c b/drivers/block/aoe/aoemain.c
new file mode 100644
index 00000000..7f83ad90
--- /dev/null
+++ b/drivers/block/aoe/aoemain.c
@@ -0,0 +1,111 @@
+/* Copyright (c) 2007 Coraid, Inc.  See COPYING for GPL terms. */
+/*
+ * aoemain.c
+ * Module initialization routines, discover timer
+ */
+
+#include <linux/hdreg.h>
+#include <linux/blkdev.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include "aoe.h"
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Sam Hopkins <sah@coraid.com>");
+MODULE_DESCRIPTION("AoE block/char driver for 2.6.2 and newer 2.6 kernels");
+MODULE_VERSION(VERSION);
+
+enum { TINIT, TRUN, TKILL };
+
+static void
+discover_timer(ulong vp)
+{
+	static struct timer_list t;
+	static volatile ulong die;
+	static spinlock_t lock;
+	ulong flags;
+	enum { DTIMERTICK = HZ * 60 }; /* one minute */
+
+	switch (vp) {
+	case TINIT:
+		init_timer(&t);
+		spin_lock_init(&lock);
+		t.data = TRUN;
+		t.function = discover_timer;
+		die = 0;
+	case TRUN:
+		spin_lock_irqsave(&lock, flags);
+		if (!die) {
+			t.expires = jiffies + DTIMERTICK;
+			add_timer(&t);
+		}
+		spin_unlock_irqrestore(&lock, flags);
+
+		aoecmd_cfg(0xffff, 0xff);
+		return;
+	case TKILL:
+		spin_lock_irqsave(&lock, flags);
+		die = 1;
+		spin_unlock_irqrestore(&lock, flags);
+
+		del_timer_sync(&t);
+	default:
+		return;
+	}
+}
+
+static void
+aoe_exit(void)
+{
+	discover_timer(TKILL);
+
+	aoenet_exit();
+	unregister_blkdev(AOE_MAJOR, DEVICE_NAME);
+	aoechr_exit();
+	aoedev_exit();
+	aoeblk_exit();		/* free cache after de-allocating bufs */
+}
+
+static int __init
+aoe_init(void)
+{
+	int ret;
+
+	ret = aoedev_init();
+	if (ret)
+		return ret;
+	ret = aoechr_init();
+	if (ret)
+		goto chr_fail;
+	ret = aoeblk_init();
+	if (ret)
+		goto blk_fail;
+	ret = aoenet_init();
+	if (ret)
+		goto net_fail;
+	ret = register_blkdev(AOE_MAJOR, DEVICE_NAME);
+	if (ret < 0) {
+		printk(KERN_ERR "aoe: can't register major\n");
+		goto blkreg_fail;
+	}
+
+	printk(KERN_INFO "aoe: AoE v%s initialised.\n", VERSION);
+	discover_timer(TINIT);
+	return 0;
+
+ blkreg_fail:
+	aoenet_exit();
+ net_fail:
+	aoeblk_exit();
+ blk_fail:
+	aoechr_exit();
+ chr_fail:
+	aoedev_exit();
+	
+	printk(KERN_INFO "aoe: initialisation failure.\n");
+	return ret;
+}
+
+module_init(aoe_init);
+module_exit(aoe_exit);
+
diff --git a/drivers/block/aoe/aoenet.c b/drivers/block/aoe/aoenet.c
new file mode 100644
index 00000000..4d3bc0d4
--- /dev/null
+++ b/drivers/block/aoe/aoenet.c
@@ -0,0 +1,172 @@
+/* Copyright (c) 2007 Coraid, Inc.  See COPYING for GPL terms. */
+/*
+ * aoenet.c
+ * Ethernet portion of AoE driver
+ */
+
+#include <linux/gfp.h>
+#include <linux/hdreg.h>
+#include <linux/blkdev.h>
+#include <linux/netdevice.h>
+#include <linux/moduleparam.h>
+#include <net/net_namespace.h>
+#include <asm/unaligned.h>
+#include "aoe.h"
+
+#define NECODES 5
+
+static char *aoe_errlist[] =
+{
+	"no such error",
+	"unrecognized command code",
+	"bad argument parameter",
+	"device unavailable",
+	"config string present",
+	"unsupported version"
+};
+
+enum {
+	IFLISTSZ = 1024,
+};
+
+static char aoe_iflist[IFLISTSZ];
+module_param_string(aoe_iflist, aoe_iflist, IFLISTSZ, 0600);
+MODULE_PARM_DESC(aoe_iflist, "aoe_iflist=\"dev1 [dev2 ...]\"");
+
+#ifndef MODULE
+static int __init aoe_iflist_setup(char *str)
+{
+	strncpy(aoe_iflist, str, IFLISTSZ);
+	aoe_iflist[IFLISTSZ - 1] = '\0';
+	return 1;
+}
+
+__setup("aoe_iflist=", aoe_iflist_setup);
+#endif
+
+int
+is_aoe_netif(struct net_device *ifp)
+{
+	register char *p, *q;
+	register int len;
+
+	if (aoe_iflist[0] == '\0')
+		return 1;
+
+	p = aoe_iflist + strspn(aoe_iflist, WHITESPACE);
+	for (; *p; p = q + strspn(q, WHITESPACE)) {
+		q = p + strcspn(p, WHITESPACE);
+		if (q != p)
+			len = q - p;
+		else
+			len = strlen(p); /* last token in aoe_iflist */
+
+		if (strlen(ifp->name) == len && !strncmp(ifp->name, p, len))
+			return 1;
+		if (q == p)
+			break;
+	}
+
+	return 0;
+}
+
+int
+set_aoe_iflist(const char __user *user_str, size_t size)
+{
+	if (size >= IFLISTSZ)
+		return -EINVAL;
+
+	if (copy_from_user(aoe_iflist, user_str, size)) {
+		printk(KERN_INFO "aoe: copy from user failed\n");
+		return -EFAULT;
+	}
+	aoe_iflist[size] = 0x00;
+	return 0;
+}
+
+void
+aoenet_xmit(struct sk_buff_head *queue)
+{
+	struct sk_buff *skb, *tmp;
+
+	skb_queue_walk_safe(queue, skb, tmp) {
+		__skb_unlink(skb, queue);
+		dev_queue_xmit(skb);
+	}
+}
+
+/* 
+ * (1) len doesn't include the header by default.  I want this. 
+ */
+static int
+aoenet_rcv(struct sk_buff *skb, struct net_device *ifp, struct packet_type *pt, struct net_device *orig_dev)
+{
+	struct aoe_hdr *h;
+	u32 n;
+
+	if (dev_net(ifp) != &init_net)
+		goto exit;
+
+	skb = skb_share_check(skb, GFP_ATOMIC);
+	if (skb == NULL)
+		return 0;
+	if (skb_linearize(skb))
+		goto exit;
+	if (!is_aoe_netif(ifp))
+		goto exit;
+	skb_push(skb, ETH_HLEN);	/* (1) */
+
+	h = (struct aoe_hdr *) skb_mac_header(skb);
+	n = get_unaligned_be32(&h->tag);
+	if ((h->verfl & AOEFL_RSP) == 0 || (n & 1<<31))
+		goto exit;
+
+	if (h->verfl & AOEFL_ERR) {
+		n = h->err;
+		if (n > NECODES)
+			n = 0;
+		if (net_ratelimit())
+			printk(KERN_ERR
+				"%s%d.%d@%s; ecode=%d '%s'\n",
+				"aoe: error packet from ",
+				get_unaligned_be16(&h->major),
+				h->minor, skb->dev->name,
+				h->err, aoe_errlist[n]);
+		goto exit;
+	}
+
+	switch (h->cmd) {
+	case AOECMD_ATA:
+		aoecmd_ata_rsp(skb);
+		break;
+	case AOECMD_CFG:
+		aoecmd_cfg_rsp(skb);
+		break;
+	default:
+		if (h->cmd >= AOECMD_VEND_MIN)
+			break;	/* don't complain about vendor commands */
+		printk(KERN_INFO "aoe: unknown cmd %d\n", h->cmd);
+	}
+exit:
+	dev_kfree_skb(skb);
+	return 0;
+}
+
+static struct packet_type aoe_pt __read_mostly = {
+	.type = __constant_htons(ETH_P_AOE),
+	.func = aoenet_rcv,
+};
+
+int __init
+aoenet_init(void)
+{
+	dev_add_pack(&aoe_pt);
+	return 0;
+}
+
+void
+aoenet_exit(void)
+{
+	dev_remove_pack(&aoe_pt);
+}
+
diff --git a/drivers/block/ataflop.c b/drivers/block/ataflop.c
new file mode 100644
index 00000000..ede16c64
--- /dev/null
+++ b/drivers/block/ataflop.c
@@ -0,0 +1,2057 @@
+/*
+ *  drivers/block/ataflop.c
+ *
+ *  Copyright (C) 1993  Greg Harp
+ *  Atari Support by Bjoern Brauel, Roman Hodek
+ *
+ *  Big cleanup Sep 11..14 1994 Roman Hodek:
+ *   - Driver now works interrupt driven
+ *   - Support for two drives; should work, but I cannot test that :-(
+ *   - Reading is done in whole tracks and buffered to speed up things
+ *   - Disk change detection and drive deselecting after motor-off
+ *     similar to TOS
+ *   - Autodetection of disk format (DD/HD); untested yet, because I
+ *     don't have an HD drive :-(
+ *
+ *  Fixes Nov 13 1994 Martin Schaller:
+ *   - Autodetection works now
+ *   - Support for 5 1/4'' disks
+ *   - Removed drive type (unknown on atari)
+ *   - Do seeks with 8 Mhz
+ *
+ *  Changes by Andreas Schwab:
+ *   - After errors in multiple read mode try again reading single sectors
+ *  (Feb 1995):
+ *   - Clean up error handling
+ *   - Set blk_size for proper size checking
+ *   - Initialize track register when testing presence of floppy
+ *   - Implement some ioctl's
+ *
+ *  Changes by Torsten Lang:
+ *   - When probing the floppies we should add the FDCCMDADD_H flag since
+ *     the FDC will otherwise wait forever when no disk is inserted...
+ *
+ * ++ Freddi Aschwanden (fa) 20.9.95 fixes for medusa:
+ *  - MFPDELAY() after each FDC access -> atari 
+ *  - more/other disk formats
+ *  - DMA to the block buffer directly if we have a 32bit DMA
+ *  - for medusa, the step rate is always 3ms
+ *  - on medusa, use only cache_push()
+ * Roman:
+ *  - Make disk format numbering independent from minors
+ *  - Let user set max. supported drive type (speeds up format
+ *    detection, saves buffer space)
+ *
+ * Roman 10/15/95:
+ *  - implement some more ioctls
+ *  - disk formatting
+ *  
+ * Andreas 95/12/12:
+ *  - increase gap size at start of track for HD/ED disks
+ *
+ * Michael (MSch) 11/07/96:
+ *  - implemented FDSETPRM and FDDEFPRM ioctl
+ *
+ * Andreas (97/03/19):
+ *  - implemented missing BLK* ioctls
+ *
+ *  Things left to do:
+ *   - Formatting
+ *   - Maybe a better strategy for disk change detection (does anyone
+ *     know one?)
+ */
+
+#include <linux/module.h>
+
+#include <linux/fd.h>
+#include <linux/delay.h>
+#include <linux/init.h>
+#include <linux/blkdev.h>
+#include <linux/mutex.h>
+
+#include <asm/atafd.h>
+#include <asm/atafdreg.h>
+#include <asm/atariints.h>
+#include <asm/atari_stdma.h>
+#include <asm/atari_stram.h>
+
+#define	FD_MAX_UNITS 2
+
+#undef DEBUG
+
+static DEFINE_MUTEX(ataflop_mutex);
+static struct request *fd_request;
+static int fdc_queue;
+
+/* Disk types: DD, HD, ED */
+static struct atari_disk_type {
+	const char	*name;
+	unsigned	spt;		/* sectors per track */
+	unsigned	blocks;		/* total number of blocks */
+	unsigned	fdc_speed;	/* fdc_speed setting */
+	unsigned 	stretch;	/* track doubling ? */
+} atari_disk_type[] = {
+	{ "d360",  9, 720, 0, 0},	/*  0: 360kB diskette */
+	{ "D360",  9, 720, 0, 1},	/*  1: 360kb in 720k or 1.2MB drive */
+	{ "D720",  9,1440, 0, 0},	/*  2: 720kb in 720k or 1.2MB drive */
+	{ "D820", 10,1640, 0, 0},	/*  3: DD disk with 82 tracks/10 sectors */
+/* formats above are probed for type DD */
+#define	MAX_TYPE_DD 3
+	{ "h1200",15,2400, 3, 0},	/*  4: 1.2MB diskette */
+	{ "H1440",18,2880, 3, 0},	/*  5: 1.4 MB diskette (HD) */
+	{ "H1640",20,3280, 3, 0},	/*  6: 1.64MB diskette (fat HD) 82 tr 20 sec */
+/* formats above are probed for types DD and HD */
+#define	MAX_TYPE_HD 6
+	{ "E2880",36,5760, 3, 0},	/*  7: 2.8 MB diskette (ED) */
+	{ "E3280",40,6560, 3, 0},	/*  8: 3.2 MB diskette (fat ED) 82 tr 40 sec */
+/* formats above are probed for types DD, HD and ED */
+#define	MAX_TYPE_ED 8
+/* types below are never autoprobed */
+	{ "H1680",21,3360, 3, 0},	/*  9: 1.68MB diskette (fat HD) 80 tr 21 sec */
+	{ "h410",10,820, 0, 1},		/* 10: 410k diskette 41 tr 10 sec, stretch */
+	{ "h1476",18,2952, 3, 0},	/* 11: 1.48MB diskette 82 tr 18 sec */
+	{ "H1722",21,3444, 3, 0},	/* 12: 1.72MB diskette 82 tr 21 sec */
+	{ "h420",10,840, 0, 1},		/* 13: 420k diskette 42 tr 10 sec, stretch */
+	{ "H830",10,1660, 0, 0},	/* 14: 820k diskette 83 tr 10 sec */
+	{ "h1494",18,2952, 3, 0},	/* 15: 1.49MB diskette 83 tr 18 sec */
+	{ "H1743",21,3486, 3, 0},	/* 16: 1.74MB diskette 83 tr 21 sec */
+	{ "h880",11,1760, 0, 0},	/* 17: 880k diskette 80 tr 11 sec */
+	{ "D1040",13,2080, 0, 0},	/* 18: 1.04MB diskette 80 tr 13 sec */
+	{ "D1120",14,2240, 0, 0},	/* 19: 1.12MB diskette 80 tr 14 sec */
+	{ "h1600",20,3200, 3, 0},	/* 20: 1.60MB diskette 80 tr 20 sec */
+	{ "H1760",22,3520, 3, 0},	/* 21: 1.76MB diskette 80 tr 22 sec */
+	{ "H1920",24,3840, 3, 0},	/* 22: 1.92MB diskette 80 tr 24 sec */
+	{ "E3200",40,6400, 3, 0},	/* 23: 3.2MB diskette 80 tr 40 sec */
+	{ "E3520",44,7040, 3, 0},	/* 24: 3.52MB diskette 80 tr 44 sec */
+	{ "E3840",48,7680, 3, 0},	/* 25: 3.84MB diskette 80 tr 48 sec */
+	{ "H1840",23,3680, 3, 0},	/* 26: 1.84MB diskette 80 tr 23 sec */
+	{ "D800",10,1600, 0, 0},	/* 27: 800k diskette 80 tr 10 sec */
+};
+
+static int StartDiskType[] = {
+	MAX_TYPE_DD,
+	MAX_TYPE_HD,
+	MAX_TYPE_ED
+};
+
+#define	TYPE_DD		0
+#define	TYPE_HD		1
+#define	TYPE_ED		2
+
+static int DriveType = TYPE_HD;
+
+static DEFINE_SPINLOCK(ataflop_lock);
+
+/* Array for translating minors into disk formats */
+static struct {
+	int 	 index;
+	unsigned drive_types;
+} minor2disktype[] = {
+	{  0, TYPE_DD },	/*  1: d360 */
+	{  4, TYPE_HD },	/*  2: h1200 */
+	{  1, TYPE_DD },	/*  3: D360 */
+	{  2, TYPE_DD },	/*  4: D720 */
+	{  1, TYPE_DD },	/*  5: h360 = D360 */
+	{  2, TYPE_DD },	/*  6: h720 = D720 */
+	{  5, TYPE_HD },	/*  7: H1440 */
+	{  7, TYPE_ED },	/*  8: E2880 */
+/* some PC formats :-) */
+	{  8, TYPE_ED },	/*  9: E3280    <- was "CompaQ" == E2880 for PC */
+	{  5, TYPE_HD },	/* 10: h1440 = H1440 */
+	{  9, TYPE_HD },	/* 11: H1680 */
+	{ 10, TYPE_DD },	/* 12: h410  */
+	{  3, TYPE_DD },	/* 13: H820     <- == D820, 82x10 */
+	{ 11, TYPE_HD },	/* 14: h1476 */
+	{ 12, TYPE_HD },	/* 15: H1722 */
+	{ 13, TYPE_DD },	/* 16: h420  */
+	{ 14, TYPE_DD },	/* 17: H830  */
+	{ 15, TYPE_HD },	/* 18: h1494 */
+	{ 16, TYPE_HD },	/* 19: H1743 */
+	{ 17, TYPE_DD },	/* 20: h880  */
+	{ 18, TYPE_DD },	/* 21: D1040 */
+	{ 19, TYPE_DD },	/* 22: D1120 */
+	{ 20, TYPE_HD },	/* 23: h1600 */
+	{ 21, TYPE_HD },	/* 24: H1760 */
+	{ 22, TYPE_HD },	/* 25: H1920 */
+	{ 23, TYPE_ED },	/* 26: E3200 */
+	{ 24, TYPE_ED },	/* 27: E3520 */
+	{ 25, TYPE_ED },	/* 28: E3840 */
+	{ 26, TYPE_HD },	/* 29: H1840 */
+	{ 27, TYPE_DD },	/* 30: D800  */
+	{  6, TYPE_HD },	/* 31: H1640    <- was H1600 == h1600 for PC */
+};
+
+#define NUM_DISK_MINORS ARRAY_SIZE(minor2disktype)
+
+/*
+ * Maximum disk size (in kilobytes). This default is used whenever the
+ * current disk size is unknown.
+ */
+#define MAX_DISK_SIZE 3280
+
+/*
+ * MSch: User-provided type information. 'drive' points to
+ * the respective entry of this array. Set by FDSETPRM ioctls.
+ */
+static struct atari_disk_type user_params[FD_MAX_UNITS];
+
+/*
+ * User-provided permanent type information. 'drive' points to
+ * the respective entry of this array.  Set by FDDEFPRM ioctls, 
+ * restored upon disk change by floppy_revalidate() if valid (as seen by
+ * default_params[].blocks > 0 - a bit in unit[].flags might be used for this?)
+ */
+static struct atari_disk_type default_params[FD_MAX_UNITS];
+
+/* current info on each unit */
+static struct atari_floppy_struct {
+	int connected;				/* !=0 : drive is connected */
+	int autoprobe;				/* !=0 : do autoprobe	    */
+
+	struct atari_disk_type	*disktype;	/* current type of disk */
+
+	int track;		/* current head position or -1 if
+				   unknown */
+	unsigned int steprate;	/* steprate setting */
+	unsigned int wpstat;	/* current state of WP signal (for
+				   disk change detection) */
+	int flags;		/* flags */
+	struct gendisk *disk;
+	int ref;
+	int type;
+} unit[FD_MAX_UNITS];
+
+#define	UD	unit[drive]
+#define	UDT	unit[drive].disktype
+#define	SUD	unit[SelectedDrive]
+#define	SUDT	unit[SelectedDrive].disktype
+
+
+#define FDC_READ(reg) ({			\
+    /* unsigned long __flags; */		\
+    unsigned short __val;			\
+    /* local_irq_save(__flags); */		\
+    dma_wd.dma_mode_status = 0x80 | (reg);	\
+    udelay(25);					\
+    __val = dma_wd.fdc_acces_seccount;		\
+    MFPDELAY();					\
+    /* local_irq_restore(__flags); */		\
+    __val & 0xff;				\
+})
+
+#define FDC_WRITE(reg,val)			\
+    do {					\
+	/* unsigned long __flags; */		\
+	/* local_irq_save(__flags); */		\
+	dma_wd.dma_mode_status = 0x80 | (reg);	\
+	udelay(25);				\
+	dma_wd.fdc_acces_seccount = (val);	\
+	MFPDELAY();				\
+        /* local_irq_restore(__flags); */	\
+    } while(0)
+
+
+/* Buffering variables:
+ * First, there is a DMA buffer in ST-RAM that is used for floppy DMA
+ * operations. Second, a track buffer is used to cache a whole track
+ * of the disk to save read operations. These are two separate buffers
+ * because that allows write operations without clearing the track buffer.
+ */
+
+static int MaxSectors[] = {
+	11, 22, 44
+};
+static int BufferSize[] = {
+	15*512, 30*512, 60*512
+};
+
+#define	BUFFER_SIZE	(BufferSize[DriveType])
+
+unsigned char *DMABuffer;			  /* buffer for writes */
+static unsigned long PhysDMABuffer;   /* physical address */
+
+static int UseTrackbuffer = -1;		  /* Do track buffering? */
+module_param(UseTrackbuffer, int, 0);
+
+unsigned char *TrackBuffer;			  /* buffer for reads */
+static unsigned long PhysTrackBuffer; /* physical address */
+static int BufferDrive, BufferSide, BufferTrack;
+static int read_track;		/* non-zero if we are reading whole tracks */
+
+#define	SECTOR_BUFFER(sec)	(TrackBuffer + ((sec)-1)*512)
+#define	IS_BUFFERED(drive,side,track) \
+    (BufferDrive == (drive) && BufferSide == (side) && BufferTrack == (track))
+
+/*
+ * These are global variables, as that's the easiest way to give
+ * information to interrupts. They are the data used for the current
+ * request.
+ */
+static int SelectedDrive = 0;
+static int ReqCmd, ReqBlock;
+static int ReqSide, ReqTrack, ReqSector, ReqCnt;
+static int HeadSettleFlag = 0;
+static unsigned char *ReqData, *ReqBuffer;
+static int MotorOn = 0, MotorOffTrys;
+static int IsFormatting = 0, FormatError;
+
+static int UserSteprate[FD_MAX_UNITS] = { -1, -1 };
+module_param_array(UserSteprate, int, NULL, 0);
+
+/* Synchronization of FDC access. */
+static volatile int fdc_busy = 0;
+static DECLARE_WAIT_QUEUE_HEAD(fdc_wait);
+static DECLARE_WAIT_QUEUE_HEAD(format_wait);
+
+static unsigned long changed_floppies = 0xff, fake_change = 0;
+#define	CHECK_CHANGE_DELAY	HZ/2
+
+#define	FD_MOTOR_OFF_DELAY	(3*HZ)
+#define	FD_MOTOR_OFF_MAXTRY	(10*20)
+
+#define FLOPPY_TIMEOUT		(6*HZ)
+#define RECALIBRATE_ERRORS	4	/* After this many errors the drive
+					 * will be recalibrated. */
+#define MAX_ERRORS		8	/* After this many errors the driver
+					 * will give up. */
+
+
+/*
+ * The driver is trying to determine the correct media format
+ * while Probing is set. fd_rwsec_done() clears it after a
+ * successful access.
+ */
+static int Probing = 0;
+
+/* This flag is set when a dummy seek is necessary to make the WP
+ * status bit accessible.
+ */
+static int NeedSeek = 0;
+
+
+#ifdef DEBUG
+#define DPRINT(a)	printk a
+#else
+#define DPRINT(a)
+#endif
+
+/***************************** Prototypes *****************************/
+
+static void fd_select_side( int side );
+static void fd_select_drive( int drive );
+static void fd_deselect( void );
+static void fd_motor_off_timer( unsigned long dummy );
+static void check_change( unsigned long dummy );
+static irqreturn_t floppy_irq (int irq, void *dummy);
+static void fd_error( void );
+static int do_format(int drive, int type, struct atari_format_descr *desc);
+static void do_fd_action( int drive );
+static void fd_calibrate( void );
+static void fd_calibrate_done( int status );
+static void fd_seek( void );
+static void fd_seek_done( int status );
+static void fd_rwsec( void );
+static void fd_readtrack_check( unsigned long dummy );
+static void fd_rwsec_done( int status );
+static void fd_rwsec_done1(int status);
+static void fd_writetrack( void );
+static void fd_writetrack_done( int status );
+static void fd_times_out( unsigned long dummy );
+static void finish_fdc( void );
+static void finish_fdc_done( int dummy );
+static void setup_req_params( int drive );
+static void redo_fd_request( void);
+static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode, unsigned int
+                     cmd, unsigned long param);
+static void fd_probe( int drive );
+static int fd_test_drive_present( int drive );
+static void config_types( void );
+static int floppy_open(struct block_device *bdev, fmode_t mode);
+static int floppy_release(struct gendisk *disk, fmode_t mode);
+
+/************************* End of Prototypes **************************/
+
+static DEFINE_TIMER(motor_off_timer, fd_motor_off_timer, 0, 0);
+static DEFINE_TIMER(readtrack_timer, fd_readtrack_check, 0, 0);
+static DEFINE_TIMER(timeout_timer, fd_times_out, 0, 0);
+static DEFINE_TIMER(fd_timer, check_change, 0, 0);
+	
+static void fd_end_request_cur(int err)
+{
+	if (!__blk_end_request_cur(fd_request, err))
+		fd_request = NULL;
+}
+
+static inline void start_motor_off_timer(void)
+{
+	mod_timer(&motor_off_timer, jiffies + FD_MOTOR_OFF_DELAY);
+	MotorOffTrys = 0;
+}
+
+static inline void start_check_change_timer( void )
+{
+	mod_timer(&fd_timer, jiffies + CHECK_CHANGE_DELAY);
+}
+
+static inline void start_timeout(void)
+{
+	mod_timer(&timeout_timer, jiffies + FLOPPY_TIMEOUT);
+}
+
+static inline void stop_timeout(void)
+{
+	del_timer(&timeout_timer);
+}
+
+/* Select the side to use. */
+
+static void fd_select_side( int side )
+{
+	unsigned long flags;
+
+	/* protect against various other ints mucking around with the PSG */
+	local_irq_save(flags);
+  
+	sound_ym.rd_data_reg_sel = 14; /* Select PSG Port A */
+	sound_ym.wd_data = (side == 0) ? sound_ym.rd_data_reg_sel | 0x01 :
+	                                 sound_ym.rd_data_reg_sel & 0xfe;
+
+	local_irq_restore(flags);
+}
+
+
+/* Select a drive, update the FDC's track register and set the correct
+ * clock speed for this disk's type.
+ */
+
+static void fd_select_drive( int drive )
+{
+	unsigned long flags;
+	unsigned char tmp;
+  
+	if (drive == SelectedDrive)
+	  return;
+
+	/* protect against various other ints mucking around with the PSG */
+	local_irq_save(flags);
+	sound_ym.rd_data_reg_sel = 14; /* Select PSG Port A */
+	tmp = sound_ym.rd_data_reg_sel;
+	sound_ym.wd_data = (tmp | DSKDRVNONE) & ~(drive == 0 ? DSKDRV0 : DSKDRV1);
+	atari_dont_touch_floppy_select = 1;
+	local_irq_restore(flags);
+
+	/* restore track register to saved value */
+	FDC_WRITE( FDCREG_TRACK, UD.track );
+	udelay(25);
+
+	/* select 8/16 MHz */
+	if (UDT)
+		if (ATARIHW_PRESENT(FDCSPEED))
+			dma_wd.fdc_speed = UDT->fdc_speed;
+	
+	SelectedDrive = drive;
+}
+
+
+/* Deselect both drives. */
+
+static void fd_deselect( void )
+{
+	unsigned long flags;
+
+	/* protect against various other ints mucking around with the PSG */
+	local_irq_save(flags);
+	atari_dont_touch_floppy_select = 0;
+	sound_ym.rd_data_reg_sel=14;	/* Select PSG Port A */
+	sound_ym.wd_data = (sound_ym.rd_data_reg_sel |
+			    (MACH_IS_FALCON ? 3 : 7)); /* no drives selected */
+	/* On Falcon, the drive B select line is used on the printer port, so
+	 * leave it alone... */
+	SelectedDrive = -1;
+	local_irq_restore(flags);
+}
+
+
+/* This timer function deselects the drives when the FDC switched the
+ * motor off. The deselection cannot happen earlier because the FDC
+ * counts the index signals, which arrive only if one drive is selected.
+ */
+
+static void fd_motor_off_timer( unsigned long dummy )
+{
+	unsigned char status;
+
+	if (SelectedDrive < 0)
+		/* no drive selected, needn't deselect anyone */
+		return;
+
+	if (stdma_islocked())
+		goto retry;
+
+	status = FDC_READ( FDCREG_STATUS );
+
+	if (!(status & 0x80)) {
+		/* motor already turned off by FDC -> deselect drives */
+		MotorOn = 0;
+		fd_deselect();
+		return;
+	}
+	/* not yet off, try again */
+
+  retry:
+	/* Test again later; if tested too often, it seems there is no disk
+	 * in the drive and the FDC will leave the motor on forever (or,
+	 * at least until a disk is inserted). So we'll test only twice
+	 * per second from then on...
+	 */
+	mod_timer(&motor_off_timer,
+		  jiffies + (MotorOffTrys++ < FD_MOTOR_OFF_MAXTRY ? HZ/20 : HZ/2));
+}
+
+
+/* This function is repeatedly called to detect disk changes (as good
+ * as possible) and keep track of the current state of the write protection.
+ */
+
+static void check_change( unsigned long dummy )
+{
+	static int    drive = 0;
+
+	unsigned long flags;
+	unsigned char old_porta;
+	int			  stat;
+
+	if (++drive > 1 || !UD.connected)
+		drive = 0;
+
+	/* protect against various other ints mucking around with the PSG */
+	local_irq_save(flags);
+
+	if (!stdma_islocked()) {
+		sound_ym.rd_data_reg_sel = 14;
+		old_porta = sound_ym.rd_data_reg_sel;
+		sound_ym.wd_data = (old_porta | DSKDRVNONE) &
+			               ~(drive == 0 ? DSKDRV0 : DSKDRV1);
+		stat = !!(FDC_READ( FDCREG_STATUS ) & FDCSTAT_WPROT);
+		sound_ym.wd_data = old_porta;
+
+		if (stat != UD.wpstat) {
+			DPRINT(( "wpstat[%d] = %d\n", drive, stat ));
+			UD.wpstat = stat;
+			set_bit (drive, &changed_floppies);
+		}
+	}
+	local_irq_restore(flags);
+
+	start_check_change_timer();
+}
+
+ 
+/* Handling of the Head Settling Flag: This flag should be set after each
+ * seek operation, because we don't use seeks with verify.
+ */
+
+static inline void set_head_settle_flag(void)
+{
+	HeadSettleFlag = FDCCMDADD_E;
+}
+
+static inline int get_head_settle_flag(void)
+{
+	int	tmp = HeadSettleFlag;
+	HeadSettleFlag = 0;
+	return( tmp );
+}
+
+static inline void copy_buffer(void *from, void *to)
+{
+	ulong *p1 = (ulong *)from, *p2 = (ulong *)to;
+	int cnt;
+
+	for (cnt = 512/4; cnt; cnt--)
+		*p2++ = *p1++;
+}
+
+  
+  
+
+/* General Interrupt Handling */
+
+static void (*FloppyIRQHandler)( int status ) = NULL;
+
+static irqreturn_t floppy_irq (int irq, void *dummy)
+{
+	unsigned char status;
+	void (*handler)( int );
+
+	handler = xchg(&FloppyIRQHandler, NULL);
+
+	if (handler) {
+		nop();
+		status = FDC_READ( FDCREG_STATUS );
+		DPRINT(("FDC irq, status = %02x handler = %08lx\n",status,(unsigned long)handler));
+		handler( status );
+	}
+	else {
+		DPRINT(("FDC irq, no handler\n"));
+	}
+	return IRQ_HANDLED;
+}
+
+
+/* Error handling: If some error happened, retry some times, then
+ * recalibrate, then try again, and fail after MAX_ERRORS.
+ */
+
+static void fd_error( void )
+{
+	if (IsFormatting) {
+		IsFormatting = 0;
+		FormatError = 1;
+		wake_up( &format_wait );
+		return;
+	}
+
+	if (!fd_request)
+		return;
+
+	fd_request->errors++;
+	if (fd_request->errors >= MAX_ERRORS) {
+		printk(KERN_ERR "fd%d: too many errors.\n", SelectedDrive );
+		fd_end_request_cur(-EIO);
+	}
+	else if (fd_request->errors == RECALIBRATE_ERRORS) {
+		printk(KERN_WARNING "fd%d: recalibrating\n", SelectedDrive );
+		if (SelectedDrive != -1)
+			SUD.track = -1;
+	}
+	redo_fd_request();
+}
+
+
+
+#define	SET_IRQ_HANDLER(proc) do { FloppyIRQHandler = (proc); } while(0)
+
+
+/* ---------- Formatting ---------- */
+
+#define FILL(n,val)		\
+    do {			\
+	memset( p, val, n );	\
+	p += n;			\
+    } while(0)
+
+static int do_format(int drive, int type, struct atari_format_descr *desc)
+{
+	unsigned char	*p;
+	int sect, nsect;
+	unsigned long	flags;
+
+	DPRINT(("do_format( dr=%d tr=%d he=%d offs=%d )\n",
+		drive, desc->track, desc->head, desc->sect_offset ));
+
+	local_irq_save(flags);
+	while( fdc_busy ) sleep_on( &fdc_wait );
+	fdc_busy = 1;
+	stdma_lock(floppy_irq, NULL);
+	atari_turnon_irq( IRQ_MFP_FDC ); /* should be already, just to be sure */
+	local_irq_restore(flags);
+
+	if (type) {
+		if (--type >= NUM_DISK_MINORS ||
+		    minor2disktype[type].drive_types > DriveType) {
+			redo_fd_request();
+			return -EINVAL;
+		}
+		type = minor2disktype[type].index;
+		UDT = &atari_disk_type[type];
+	}
+
+	if (!UDT || desc->track >= UDT->blocks/UDT->spt/2 || desc->head >= 2) {
+		redo_fd_request();
+		return -EINVAL;
+	}
+
+	nsect = UDT->spt;
+	p = TrackBuffer;
+	/* The track buffer is used for the raw track data, so its
+	   contents become invalid! */
+	BufferDrive = -1;
+	/* stop deselect timer */
+	del_timer( &motor_off_timer );
+
+	FILL( 60 * (nsect / 9), 0x4e );
+	for( sect = 0; sect < nsect; ++sect ) {
+		FILL( 12, 0 );
+		FILL( 3, 0xf5 );
+		*p++ = 0xfe;
+		*p++ = desc->track;
+		*p++ = desc->head;
+		*p++ = (nsect + sect - desc->sect_offset) % nsect + 1;
+		*p++ = 2;
+		*p++ = 0xf7;
+		FILL( 22, 0x4e );
+		FILL( 12, 0 );
+		FILL( 3, 0xf5 );
+		*p++ = 0xfb;
+		FILL( 512, 0xe5 );
+		*p++ = 0xf7;
+		FILL( 40, 0x4e );
+	}
+	FILL( TrackBuffer+BUFFER_SIZE-p, 0x4e );
+
+	IsFormatting = 1;
+	FormatError = 0;
+	ReqTrack = desc->track;
+	ReqSide  = desc->head;
+	do_fd_action( drive );
+
+	sleep_on( &format_wait );
+
+	redo_fd_request();
+	return( FormatError ? -EIO : 0 );	
+}
+
+
+/* do_fd_action() is the general procedure for a fd request: All
+ * required parameter settings (drive select, side select, track
+ * position) are checked and set if needed. For each of these
+ * parameters and the actual reading or writing exist two functions:
+ * one that starts the setting (or skips it if possible) and one
+ * callback for the "done" interrupt. Each done func calls the next
+ * set function to propagate the request down to fd_rwsec_done().
+ */
+
+static void do_fd_action( int drive )
+{
+	DPRINT(("do_fd_action\n"));
+	
+	if (UseTrackbuffer && !IsFormatting) {
+	repeat:
+	    if (IS_BUFFERED( drive, ReqSide, ReqTrack )) {
+		if (ReqCmd == READ) {
+		    copy_buffer( SECTOR_BUFFER(ReqSector), ReqData );
+		    if (++ReqCnt < blk_rq_cur_sectors(fd_request)) {
+			/* read next sector */
+			setup_req_params( drive );
+			goto repeat;
+		    }
+		    else {
+			/* all sectors finished */
+			fd_end_request_cur(0);
+			redo_fd_request();
+			return;
+		    }
+		}
+		else {
+		    /* cmd == WRITE, pay attention to track buffer
+		     * consistency! */
+		    copy_buffer( ReqData, SECTOR_BUFFER(ReqSector) );
+		}
+	    }
+	}
+
+	if (SelectedDrive != drive)
+		fd_select_drive( drive );
+    
+	if (UD.track == -1)
+		fd_calibrate();
+	else if (UD.track != ReqTrack << UDT->stretch)
+		fd_seek();
+	else if (IsFormatting)
+		fd_writetrack();
+	else
+		fd_rwsec();
+}
+
+
+/* Seek to track 0 if the current track is unknown */
+
+static void fd_calibrate( void )
+{
+	if (SUD.track >= 0) {
+		fd_calibrate_done( 0 );
+		return;
+	}
+
+	if (ATARIHW_PRESENT(FDCSPEED))
+		dma_wd.fdc_speed = 0; 	/* always seek with 8 Mhz */;
+	DPRINT(("fd_calibrate\n"));
+	SET_IRQ_HANDLER( fd_calibrate_done );
+	/* we can't verify, since the speed may be incorrect */
+	FDC_WRITE( FDCREG_CMD, FDCCMD_RESTORE | SUD.steprate );
+
+	NeedSeek = 1;
+	MotorOn = 1;
+	start_timeout();
+	/* wait for IRQ */
+}
+
+
+static void fd_calibrate_done( int status )
+{
+	DPRINT(("fd_calibrate_done()\n"));
+	stop_timeout();
+    
+	/* set the correct speed now */
+	if (ATARIHW_PRESENT(FDCSPEED))
+		dma_wd.fdc_speed = SUDT->fdc_speed;
+	if (status & FDCSTAT_RECNF) {
+		printk(KERN_ERR "fd%d: restore failed\n", SelectedDrive );
+		fd_error();
+	}
+	else {
+		SUD.track = 0;
+		fd_seek();
+	}
+}
+  
+  
+/* Seek the drive to the requested track. The drive must have been
+ * calibrated at some point before this.
+ */
+  
+static void fd_seek( void )
+{
+	if (SUD.track == ReqTrack << SUDT->stretch) {
+		fd_seek_done( 0 );
+		return;
+	}
+
+	if (ATARIHW_PRESENT(FDCSPEED)) {
+		dma_wd.fdc_speed = 0;	/* always seek witch 8 Mhz */
+		MFPDELAY();
+	}
+
+	DPRINT(("fd_seek() to track %d\n",ReqTrack));
+	FDC_WRITE( FDCREG_DATA, ReqTrack << SUDT->stretch);
+	udelay(25);
+	SET_IRQ_HANDLER( fd_seek_done );
+	FDC_WRITE( FDCREG_CMD, FDCCMD_SEEK | SUD.steprate );
+
+	MotorOn = 1;
+	set_head_settle_flag();
+	start_timeout();
+	/* wait for IRQ */
+}
+
+
+static void fd_seek_done( int status )
+{
+	DPRINT(("fd_seek_done()\n"));
+	stop_timeout();
+	
+	/* set the correct speed */
+	if (ATARIHW_PRESENT(FDCSPEED))
+		dma_wd.fdc_speed = SUDT->fdc_speed;
+	if (status & FDCSTAT_RECNF) {
+		printk(KERN_ERR "fd%d: seek error (to track %d)\n",
+				SelectedDrive, ReqTrack );
+		/* we don't know exactly which track we are on now! */
+		SUD.track = -1;
+		fd_error();
+	}
+	else {
+		SUD.track = ReqTrack << SUDT->stretch;
+		NeedSeek = 0;
+		if (IsFormatting)
+			fd_writetrack();
+		else
+			fd_rwsec();
+	}
+}
+
+
+/* This does the actual reading/writing after positioning the head
+ * over the correct track.
+ */
+
+static int MultReadInProgress = 0;
+
+
+static void fd_rwsec( void )
+{
+	unsigned long paddr, flags;
+	unsigned int  rwflag, old_motoron;
+	unsigned int track;
+	
+	DPRINT(("fd_rwsec(), Sec=%d, Access=%c\n",ReqSector, ReqCmd == WRITE ? 'w' : 'r' ));
+	if (ReqCmd == WRITE) {
+		if (ATARIHW_PRESENT(EXTD_DMA)) {
+			paddr = virt_to_phys(ReqData);
+		}
+		else {
+			copy_buffer( ReqData, DMABuffer );
+			paddr = PhysDMABuffer;
+		}
+		dma_cache_maintenance( paddr, 512, 1 );
+		rwflag = 0x100;
+	}
+	else {
+		if (read_track)
+			paddr = PhysTrackBuffer;
+		else
+			paddr = ATARIHW_PRESENT(EXTD_DMA) ? 
+				virt_to_phys(ReqData) : PhysDMABuffer;
+		rwflag = 0;
+	}
+
+	fd_select_side( ReqSide );
+  
+	/* Start sector of this operation */
+	FDC_WRITE( FDCREG_SECTOR, read_track ? 1 : ReqSector );
+	MFPDELAY();
+	/* Cheat for track if stretch != 0 */
+	if (SUDT->stretch) {
+		track = FDC_READ( FDCREG_TRACK);
+		MFPDELAY();
+		FDC_WRITE( FDCREG_TRACK, track >> SUDT->stretch);
+	}
+	udelay(25);
+  
+	/* Setup DMA */
+	local_irq_save(flags);
+	dma_wd.dma_lo = (unsigned char)paddr;
+	MFPDELAY();
+	paddr >>= 8;
+	dma_wd.dma_md = (unsigned char)paddr;
+	MFPDELAY();
+	paddr >>= 8;
+	if (ATARIHW_PRESENT(EXTD_DMA))
+		st_dma_ext_dmahi = (unsigned short)paddr;
+	else
+		dma_wd.dma_hi = (unsigned char)paddr;
+	MFPDELAY();
+	local_irq_restore(flags);
+  
+	/* Clear FIFO and switch DMA to correct mode */  
+	dma_wd.dma_mode_status = 0x90 | rwflag;  
+	MFPDELAY();
+	dma_wd.dma_mode_status = 0x90 | (rwflag ^ 0x100);  
+	MFPDELAY();
+	dma_wd.dma_mode_status = 0x90 | rwflag;
+	MFPDELAY();
+  
+	/* How many sectors for DMA */
+	dma_wd.fdc_acces_seccount = read_track ? SUDT->spt : 1;
+  
+	udelay(25);  
+  
+	/* Start operation */
+	dma_wd.dma_mode_status = FDCSELREG_STP | rwflag;
+	udelay(25);
+	SET_IRQ_HANDLER( fd_rwsec_done );
+	dma_wd.fdc_acces_seccount =
+	  (get_head_settle_flag() |
+	   (rwflag ? FDCCMD_WRSEC : (FDCCMD_RDSEC | (read_track ? FDCCMDADD_M : 0))));
+
+	old_motoron = MotorOn;
+	MotorOn = 1;
+	NeedSeek = 1;
+	/* wait for interrupt */
+
+	if (read_track) {
+		/* If reading a whole track, wait about one disk rotation and
+		 * then check if all sectors are read. The FDC will even
+		 * search for the first non-existent sector and need 1 sec to
+		 * recognise that it isn't present :-(
+		 */
+		MultReadInProgress = 1;
+		mod_timer(&readtrack_timer,
+			  /* 1 rot. + 5 rot.s if motor was off  */
+			  jiffies + HZ/5 + (old_motoron ? 0 : HZ));
+	}
+	start_timeout();
+}
+
+    
+static void fd_readtrack_check( unsigned long dummy )
+{
+	unsigned long flags, addr, addr2;
+
+	local_irq_save(flags);
+
+	if (!MultReadInProgress) {
+		/* This prevents a race condition that could arise if the
+		 * interrupt is triggered while the calling of this timer
+		 * callback function takes place. The IRQ function then has
+		 * already cleared 'MultReadInProgress'  when flow of control
+		 * gets here.
+		 */
+		local_irq_restore(flags);
+		return;
+	}
+
+	/* get the current DMA address */
+	/* ++ f.a. read twice to avoid being fooled by switcher */
+	addr = 0;
+	do {
+		addr2 = addr;
+		addr = dma_wd.dma_lo & 0xff;
+		MFPDELAY();
+		addr |= (dma_wd.dma_md & 0xff) << 8;
+		MFPDELAY();
+		if (ATARIHW_PRESENT( EXTD_DMA ))
+			addr |= (st_dma_ext_dmahi & 0xffff) << 16;
+		else
+			addr |= (dma_wd.dma_hi & 0xff) << 16;
+		MFPDELAY();
+	} while(addr != addr2);
+  
+	if (addr >= PhysTrackBuffer + SUDT->spt*512) {
+		/* already read enough data, force an FDC interrupt to stop
+		 * the read operation
+		 */
+		SET_IRQ_HANDLER( NULL );
+		MultReadInProgress = 0;
+		local_irq_restore(flags);
+		DPRINT(("fd_readtrack_check(): done\n"));
+		FDC_WRITE( FDCREG_CMD, FDCCMD_FORCI );
+		udelay(25);
+
+		/* No error until now -- the FDC would have interrupted
+		 * otherwise!
+		 */
+		fd_rwsec_done1(0);
+	}
+	else {
+		/* not yet finished, wait another tenth rotation */
+		local_irq_restore(flags);
+		DPRINT(("fd_readtrack_check(): not yet finished\n"));
+		mod_timer(&readtrack_timer, jiffies + HZ/5/10);
+	}
+}
+
+
+static void fd_rwsec_done( int status )
+{
+	DPRINT(("fd_rwsec_done()\n"));
+
+	if (read_track) {
+		del_timer(&readtrack_timer);
+		if (!MultReadInProgress)
+			return;
+		MultReadInProgress = 0;
+	}
+	fd_rwsec_done1(status);
+}
+
+static void fd_rwsec_done1(int status)
+{
+	unsigned int track;
+
+	stop_timeout();
+	
+	/* Correct the track if stretch != 0 */
+	if (SUDT->stretch) {
+		track = FDC_READ( FDCREG_TRACK);
+		MFPDELAY();
+		FDC_WRITE( FDCREG_TRACK, track << SUDT->stretch);
+	}
+
+	if (!UseTrackbuffer) {
+		dma_wd.dma_mode_status = 0x90;
+		MFPDELAY();
+		if (!(dma_wd.dma_mode_status & 0x01)) {
+			printk(KERN_ERR "fd%d: DMA error\n", SelectedDrive );
+			goto err_end;
+		}
+	}
+	MFPDELAY();
+
+	if (ReqCmd == WRITE && (status & FDCSTAT_WPROT)) {
+		printk(KERN_NOTICE "fd%d: is write protected\n", SelectedDrive );
+		goto err_end;
+	}	
+	if ((status & FDCSTAT_RECNF) &&
+	    /* RECNF is no error after a multiple read when the FDC
+	       searched for a non-existent sector! */
+	    !(read_track && FDC_READ(FDCREG_SECTOR) > SUDT->spt)) {
+		if (Probing) {
+			if (SUDT > atari_disk_type) {
+			    if (SUDT[-1].blocks > ReqBlock) {
+				/* try another disk type */
+				SUDT--;
+				set_capacity(unit[SelectedDrive].disk,
+							SUDT->blocks);
+			    } else
+				Probing = 0;
+			}
+			else {
+				if (SUD.flags & FTD_MSG)
+					printk(KERN_INFO "fd%d: Auto-detected floppy type %s\n",
+					       SelectedDrive, SUDT->name );
+				Probing=0;
+			}
+		} else {	
+/* record not found, but not probing. Maybe stretch wrong ? Restart probing */
+			if (SUD.autoprobe) {
+				SUDT = atari_disk_type + StartDiskType[DriveType];
+				set_capacity(unit[SelectedDrive].disk,
+							SUDT->blocks);
+				Probing = 1;
+			}
+		}
+		if (Probing) {
+			if (ATARIHW_PRESENT(FDCSPEED)) {
+				dma_wd.fdc_speed = SUDT->fdc_speed;
+				MFPDELAY();
+			}
+			setup_req_params( SelectedDrive );
+			BufferDrive = -1;
+			do_fd_action( SelectedDrive );
+			return;
+		}
+
+		printk(KERN_ERR "fd%d: sector %d not found (side %d, track %d)\n",
+		       SelectedDrive, FDC_READ (FDCREG_SECTOR), ReqSide, ReqTrack );
+		goto err_end;
+	}
+	if (status & FDCSTAT_CRC) {
+		printk(KERN_ERR "fd%d: CRC error (side %d, track %d, sector %d)\n",
+		       SelectedDrive, ReqSide, ReqTrack, FDC_READ (FDCREG_SECTOR) );
+		goto err_end;
+	}
+	if (status & FDCSTAT_LOST) {
+		printk(KERN_ERR "fd%d: lost data (side %d, track %d, sector %d)\n",
+		       SelectedDrive, ReqSide, ReqTrack, FDC_READ (FDCREG_SECTOR) );
+		goto err_end;
+	}
+
+	Probing = 0;
+	
+	if (ReqCmd == READ) {
+		if (!read_track) {
+			void *addr;
+			addr = ATARIHW_PRESENT( EXTD_DMA ) ? ReqData : DMABuffer;
+			dma_cache_maintenance( virt_to_phys(addr), 512, 0 );
+			if (!ATARIHW_PRESENT( EXTD_DMA ))
+				copy_buffer (addr, ReqData);
+		} else {
+			dma_cache_maintenance( PhysTrackBuffer, MaxSectors[DriveType] * 512, 0 );
+			BufferDrive = SelectedDrive;
+			BufferSide  = ReqSide;
+			BufferTrack = ReqTrack;
+			copy_buffer (SECTOR_BUFFER (ReqSector), ReqData);
+		}
+	}
+  
+	if (++ReqCnt < blk_rq_cur_sectors(fd_request)) {
+		/* read next sector */
+		setup_req_params( SelectedDrive );
+		do_fd_action( SelectedDrive );
+	}
+	else {
+		/* all sectors finished */
+		fd_end_request_cur(0);
+		redo_fd_request();
+	}
+	return;
+  
+  err_end:
+	BufferDrive = -1;
+	fd_error();
+}
+
+
+static void fd_writetrack( void )
+{
+	unsigned long paddr, flags;
+	unsigned int track;
+	
+	DPRINT(("fd_writetrack() Tr=%d Si=%d\n", ReqTrack, ReqSide ));
+
+	paddr = PhysTrackBuffer;
+	dma_cache_maintenance( paddr, BUFFER_SIZE, 1 );
+
+	fd_select_side( ReqSide );
+  
+	/* Cheat for track if stretch != 0 */
+	if (SUDT->stretch) {
+		track = FDC_READ( FDCREG_TRACK);
+		MFPDELAY();
+		FDC_WRITE(FDCREG_TRACK,track >> SUDT->stretch);
+	}
+	udelay(40);
+  
+	/* Setup DMA */
+	local_irq_save(flags);
+	dma_wd.dma_lo = (unsigned char)paddr;
+	MFPDELAY();
+	paddr >>= 8;
+	dma_wd.dma_md = (unsigned char)paddr;
+	MFPDELAY();
+	paddr >>= 8;
+	if (ATARIHW_PRESENT( EXTD_DMA ))
+		st_dma_ext_dmahi = (unsigned short)paddr;
+	else
+		dma_wd.dma_hi = (unsigned char)paddr;
+	MFPDELAY();
+	local_irq_restore(flags);
+  
+	/* Clear FIFO and switch DMA to correct mode */  
+	dma_wd.dma_mode_status = 0x190;  
+	MFPDELAY();
+	dma_wd.dma_mode_status = 0x90;  
+	MFPDELAY();
+	dma_wd.dma_mode_status = 0x190;
+	MFPDELAY();
+  
+	/* How many sectors for DMA */
+	dma_wd.fdc_acces_seccount = BUFFER_SIZE/512;
+	udelay(40);  
+  
+	/* Start operation */
+	dma_wd.dma_mode_status = FDCSELREG_STP | 0x100;
+	udelay(40);
+	SET_IRQ_HANDLER( fd_writetrack_done );
+	dma_wd.fdc_acces_seccount = FDCCMD_WRTRA | get_head_settle_flag(); 
+
+	MotorOn = 1;
+	start_timeout();
+	/* wait for interrupt */
+}
+
+
+static void fd_writetrack_done( int status )
+{
+	DPRINT(("fd_writetrack_done()\n"));
+
+	stop_timeout();
+
+	if (status & FDCSTAT_WPROT) {
+		printk(KERN_NOTICE "fd%d: is write protected\n", SelectedDrive );
+		goto err_end;
+	}	
+	if (status & FDCSTAT_LOST) {
+		printk(KERN_ERR "fd%d: lost data (side %d, track %d)\n",
+				SelectedDrive, ReqSide, ReqTrack );
+		goto err_end;
+	}
+
+	wake_up( &format_wait );
+	return;
+
+  err_end:
+	fd_error();
+}
+
+static void fd_times_out( unsigned long dummy )
+{
+	atari_disable_irq( IRQ_MFP_FDC );
+	if (!FloppyIRQHandler) goto end; /* int occurred after timer was fired, but
+					  * before we came here... */
+
+	SET_IRQ_HANDLER( NULL );
+	/* If the timeout occurred while the readtrack_check timer was
+	 * active, we need to cancel it, else bad things will happen */
+	if (UseTrackbuffer)
+		del_timer( &readtrack_timer );
+	FDC_WRITE( FDCREG_CMD, FDCCMD_FORCI );
+	udelay( 25 );
+	
+	printk(KERN_ERR "floppy timeout\n" );
+	fd_error();
+  end:
+	atari_enable_irq( IRQ_MFP_FDC );
+}
+
+
+/* The (noop) seek operation here is needed to make the WP bit in the
+ * FDC status register accessible for check_change. If the last disk
+ * operation would have been a RDSEC, this bit would always read as 0
+ * no matter what :-( To save time, the seek goes to the track we're
+ * already on.
+ */
+
+static void finish_fdc( void )
+{
+	if (!NeedSeek) {
+		finish_fdc_done( 0 );
+	}
+	else {
+		DPRINT(("finish_fdc: dummy seek started\n"));
+		FDC_WRITE (FDCREG_DATA, SUD.track);
+		SET_IRQ_HANDLER( finish_fdc_done );
+		FDC_WRITE (FDCREG_CMD, FDCCMD_SEEK);
+		MotorOn = 1;
+		start_timeout();
+		/* we must wait for the IRQ here, because the ST-DMA
+		   is released immediately afterwards and the interrupt
+		   may be delivered to the wrong driver. */
+	  }
+}
+
+
+static void finish_fdc_done( int dummy )
+{
+	unsigned long flags;
+
+	DPRINT(("finish_fdc_done entered\n"));
+	stop_timeout();
+	NeedSeek = 0;
+
+	if (timer_pending(&fd_timer) && time_before(fd_timer.expires, jiffies + 5))
+		/* If the check for a disk change is done too early after this
+		 * last seek command, the WP bit still reads wrong :-((
+		 */
+		mod_timer(&fd_timer, jiffies + 5);
+	else
+		start_check_change_timer();
+	start_motor_off_timer();
+
+	local_irq_save(flags);
+	stdma_release();
+	fdc_busy = 0;
+	wake_up( &fdc_wait );
+	local_irq_restore(flags);
+
+	DPRINT(("finish_fdc() finished\n"));
+}
+
+/* The detection of disk changes is a dark chapter in Atari history :-(
+ * Because the "Drive ready" signal isn't present in the Atari
+ * hardware, one has to rely on the "Write Protect". This works fine,
+ * as long as no write protected disks are used. TOS solves this
+ * problem by introducing tri-state logic ("maybe changed") and
+ * looking at the serial number in block 0. This isn't possible for
+ * Linux, since the floppy driver can't make assumptions about the
+ * filesystem used on the disk and thus the contents of block 0. I've
+ * chosen the method to always say "The disk was changed" if it is
+ * unsure whether it was. This implies that every open or mount
+ * invalidates the disk buffers if you work with write protected
+ * disks. But at least this is better than working with incorrect data
+ * due to unrecognised disk changes.
+ */
+
+static unsigned int floppy_check_events(struct gendisk *disk,
+					unsigned int clearing)
+{
+	struct atari_floppy_struct *p = disk->private_data;
+	unsigned int drive = p - unit;
+	if (test_bit (drive, &fake_change)) {
+		/* simulated change (e.g. after formatting) */
+		return DISK_EVENT_MEDIA_CHANGE;
+	}
+	if (test_bit (drive, &changed_floppies)) {
+		/* surely changed (the WP signal changed at least once) */
+		return DISK_EVENT_MEDIA_CHANGE;
+	}
+	if (UD.wpstat) {
+		/* WP is on -> could be changed: to be sure, buffers should be
+		 * invalidated...
+		 */
+		return DISK_EVENT_MEDIA_CHANGE;
+	}
+
+	return 0;
+}
+
+static int floppy_revalidate(struct gendisk *disk)
+{
+	struct atari_floppy_struct *p = disk->private_data;
+	unsigned int drive = p - unit;
+
+	if (test_bit(drive, &changed_floppies) ||
+	    test_bit(drive, &fake_change) ||
+	    p->disktype == 0) {
+		if (UD.flags & FTD_MSG)
+			printk(KERN_ERR "floppy: clear format %p!\n", UDT);
+		BufferDrive = -1;
+		clear_bit(drive, &fake_change);
+		clear_bit(drive, &changed_floppies);
+		/* MSch: clearing geometry makes sense only for autoprobe
+		   formats, for 'permanent user-defined' parameter:
+		   restore default_params[] here if flagged valid! */
+		if (default_params[drive].blocks == 0)
+			UDT = NULL;
+		else
+			UDT = &default_params[drive];
+	}
+	return 0;
+}
+
+
+/* This sets up the global variables describing the current request. */
+
+static void setup_req_params( int drive )
+{
+	int block = ReqBlock + ReqCnt;
+
+	ReqTrack = block / UDT->spt;
+	ReqSector = block - ReqTrack * UDT->spt + 1;
+	ReqSide = ReqTrack & 1;
+	ReqTrack >>= 1;
+	ReqData = ReqBuffer + 512 * ReqCnt;
+
+	if (UseTrackbuffer)
+		read_track = (ReqCmd == READ && fd_request->errors == 0);
+	else
+		read_track = 0;
+
+	DPRINT(("Request params: Si=%d Tr=%d Se=%d Data=%08lx\n",ReqSide,
+			ReqTrack, ReqSector, (unsigned long)ReqData ));
+}
+
+/*
+ * Round-robin between our available drives, doing one request from each
+ */
+static struct request *set_next_request(void)
+{
+	struct request_queue *q;
+	int old_pos = fdc_queue;
+	struct request *rq = NULL;
+
+	do {
+		q = unit[fdc_queue].disk->queue;
+		if (++fdc_queue == FD_MAX_UNITS)
+			fdc_queue = 0;
+		if (q) {
+			rq = blk_fetch_request(q);
+			if (rq)
+				break;
+		}
+	} while (fdc_queue != old_pos);
+
+	return rq;
+}
+
+
+static void redo_fd_request(void)
+{
+	int drive, type;
+	struct atari_floppy_struct *floppy;
+
+	DPRINT(("redo_fd_request: fd_request=%p dev=%s fd_request->sector=%ld\n",
+		fd_request, fd_request ? fd_request->rq_disk->disk_name : "",
+		fd_request ? blk_rq_pos(fd_request) : 0 ));
+
+	IsFormatting = 0;
+
+repeat:
+	if (!fd_request) {
+		fd_request = set_next_request();
+		if (!fd_request)
+			goto the_end;
+	}
+
+	floppy = fd_request->rq_disk->private_data;
+	drive = floppy - unit;
+	type = floppy->type;
+	
+	if (!UD.connected) {
+		/* drive not connected */
+		printk(KERN_ERR "Unknown Device: fd%d\n", drive );
+		fd_end_request_cur(-EIO);
+		goto repeat;
+	}
+		
+	if (type == 0) {
+		if (!UDT) {
+			Probing = 1;
+			UDT = atari_disk_type + StartDiskType[DriveType];
+			set_capacity(floppy->disk, UDT->blocks);
+			UD.autoprobe = 1;
+		}
+	} 
+	else {
+		/* user supplied disk type */
+		if (--type >= NUM_DISK_MINORS) {
+			printk(KERN_WARNING "fd%d: invalid disk format", drive );
+			fd_end_request_cur(-EIO);
+			goto repeat;
+		}
+		if (minor2disktype[type].drive_types > DriveType)  {
+			printk(KERN_WARNING "fd%d: unsupported disk format", drive );
+			fd_end_request_cur(-EIO);
+			goto repeat;
+		}
+		type = minor2disktype[type].index;
+		UDT = &atari_disk_type[type];
+		set_capacity(floppy->disk, UDT->blocks);
+		UD.autoprobe = 0;
+	}
+	
+	if (blk_rq_pos(fd_request) + 1 > UDT->blocks) {
+		fd_end_request_cur(-EIO);
+		goto repeat;
+	}
+
+	/* stop deselect timer */
+	del_timer( &motor_off_timer );
+		
+	ReqCnt = 0;
+	ReqCmd = rq_data_dir(fd_request);
+	ReqBlock = blk_rq_pos(fd_request);
+	ReqBuffer = fd_request->buffer;
+	setup_req_params( drive );
+	do_fd_action( drive );
+
+	return;
+
+  the_end:
+	finish_fdc();
+}
+
+
+void do_fd_request(struct request_queue * q)
+{
+	DPRINT(("do_fd_request for pid %d\n",current->pid));
+	while( fdc_busy ) sleep_on( &fdc_wait );
+	fdc_busy = 1;
+	stdma_lock(floppy_irq, NULL);
+
+	atari_disable_irq( IRQ_MFP_FDC );
+	redo_fd_request();
+	atari_enable_irq( IRQ_MFP_FDC );
+}
+
+static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode,
+		    unsigned int cmd, unsigned long param)
+{
+	struct gendisk *disk = bdev->bd_disk;
+	struct atari_floppy_struct *floppy = disk->private_data;
+	int drive = floppy - unit;
+	int type = floppy->type;
+	struct atari_format_descr fmt_desc;
+	struct atari_disk_type *dtp;
+	struct floppy_struct getprm;
+	int settype;
+	struct floppy_struct setprm;
+	void __user *argp = (void __user *)param;
+
+	switch (cmd) {
+	case FDGETPRM:
+		if (type) {
+			if (--type >= NUM_DISK_MINORS)
+				return -ENODEV;
+			if (minor2disktype[type].drive_types > DriveType)
+				return -ENODEV;
+			type = minor2disktype[type].index;
+			dtp = &atari_disk_type[type];
+			if (UD.flags & FTD_MSG)
+			    printk (KERN_ERR "floppy%d: found dtp %p name %s!\n",
+			        drive, dtp, dtp->name);
+		}
+		else {
+			if (!UDT)
+				return -ENXIO;
+			else
+				dtp = UDT;
+		}
+		memset((void *)&getprm, 0, sizeof(getprm));
+		getprm.size = dtp->blocks;
+		getprm.sect = dtp->spt;
+		getprm.head = 2;
+		getprm.track = dtp->blocks/dtp->spt/2;
+		getprm.stretch = dtp->stretch;
+		if (copy_to_user(argp, &getprm, sizeof(getprm)))
+			return -EFAULT;
+		return 0;
+	}
+	switch (cmd) {
+	case FDSETPRM:
+	case FDDEFPRM:
+	        /* 
+		 * MSch 7/96: simple 'set geometry' case: just set the
+		 * 'default' device params (minor == 0).
+		 * Currently, the drive geometry is cleared after each
+		 * disk change and subsequent revalidate()! simple
+		 * implementation of FDDEFPRM: save geometry from a
+		 * FDDEFPRM call and restore it in floppy_revalidate() !
+		 */
+
+		/* get the parameters from user space */
+		if (floppy->ref != 1 && floppy->ref != -1)
+			return -EBUSY;
+		if (copy_from_user(&setprm, argp, sizeof(setprm)))
+			return -EFAULT;
+		/* 
+		 * first of all: check for floppy change and revalidate, 
+		 * or the next access will revalidate - and clear UDT :-(
+		 */
+
+		if (floppy_check_events(disk, 0))
+		        floppy_revalidate(disk);
+
+		if (UD.flags & FTD_MSG)
+		    printk (KERN_INFO "floppy%d: setting size %d spt %d str %d!\n",
+			drive, setprm.size, setprm.sect, setprm.stretch);
+
+		/* what if type > 0 here? Overwrite specified entry ? */
+		if (type) {
+		        /* refuse to re-set a predefined type for now */
+			redo_fd_request();
+			return -EINVAL;
+		}
+
+		/* 
+		 * type == 0: first look for a matching entry in the type list,
+		 * and set the UD.disktype field to use the perdefined entry.
+		 * TODO: add user-defined format to head of autoprobe list ? 
+		 * Useful to include the user-type for future autodetection!
+		 */
+
+		for (settype = 0; settype < NUM_DISK_MINORS; settype++) {
+			int setidx = 0;
+			if (minor2disktype[settype].drive_types > DriveType) {
+				/* skip this one, invalid for drive ... */
+				continue;
+			}
+			setidx = minor2disktype[settype].index;
+			dtp = &atari_disk_type[setidx];
+
+			/* found matching entry ?? */
+			if (   dtp->blocks  == setprm.size 
+			    && dtp->spt     == setprm.sect
+			    && dtp->stretch == setprm.stretch ) {
+				if (UD.flags & FTD_MSG)
+				    printk (KERN_INFO "floppy%d: setting %s %p!\n",
+				        drive, dtp->name, dtp);
+				UDT = dtp;
+				set_capacity(floppy->disk, UDT->blocks);
+
+				if (cmd == FDDEFPRM) {
+				  /* save settings as permanent default type */
+				  default_params[drive].name    = dtp->name;
+				  default_params[drive].spt     = dtp->spt;
+				  default_params[drive].blocks  = dtp->blocks;
+				  default_params[drive].fdc_speed = dtp->fdc_speed;
+				  default_params[drive].stretch = dtp->stretch;
+				}
+				
+				return 0;
+			}
+
+		}
+
+		/* no matching disk type found above - setting user_params */
+
+	       	if (cmd == FDDEFPRM) {
+			/* set permanent type */
+			dtp = &default_params[drive];
+		} else
+			/* set user type (reset by disk change!) */
+			dtp = &user_params[drive];
+
+		dtp->name   = "user format";
+		dtp->blocks = setprm.size;
+		dtp->spt    = setprm.sect;
+		if (setprm.sect > 14) 
+			dtp->fdc_speed = 3;
+		else
+			dtp->fdc_speed = 0;
+		dtp->stretch = setprm.stretch;
+
+		if (UD.flags & FTD_MSG)
+			printk (KERN_INFO "floppy%d: blk %d spt %d str %d!\n",
+				drive, dtp->blocks, dtp->spt, dtp->stretch);
+
+		/* sanity check */
+		if (setprm.track != dtp->blocks/dtp->spt/2 ||
+		    setprm.head != 2) {
+			redo_fd_request();
+			return -EINVAL;
+		}
+
+		UDT = dtp;
+		set_capacity(floppy->disk, UDT->blocks);
+
+		return 0;
+	case FDMSGON:
+		UD.flags |= FTD_MSG;
+		return 0;
+	case FDMSGOFF:
+		UD.flags &= ~FTD_MSG;
+		return 0;
+	case FDSETEMSGTRESH:
+		return -EINVAL;
+	case FDFMTBEG:
+		return 0;
+	case FDFMTTRK:
+		if (floppy->ref != 1 && floppy->ref != -1)
+			return -EBUSY;
+		if (copy_from_user(&fmt_desc, argp, sizeof(fmt_desc)))
+			return -EFAULT;
+		return do_format(drive, type, &fmt_desc);
+	case FDCLRPRM:
+		UDT = NULL;
+		/* MSch: invalidate default_params */
+		default_params[drive].blocks  = 0;
+		set_capacity(floppy->disk, MAX_DISK_SIZE * 2);
+	case FDFMTEND:
+	case FDFLUSH:
+		/* invalidate the buffer track to force a reread */
+		BufferDrive = -1;
+		set_bit(drive, &fake_change);
+		check_disk_change(bdev);
+		return 0;
+	default:
+		return -EINVAL;
+	}
+}
+
+static int fd_ioctl(struct block_device *bdev, fmode_t mode,
+			     unsigned int cmd, unsigned long arg)
+{
+	int ret;
+
+	mutex_lock(&ataflop_mutex);
+	ret = fd_locked_ioctl(bdev, mode, cmd, arg);
+	mutex_unlock(&ataflop_mutex);
+
+	return ret;
+}
+
+/* Initialize the 'unit' variable for drive 'drive' */
+
+static void __init fd_probe( int drive )
+{
+	UD.connected = 0;
+	UDT  = NULL;
+
+	if (!fd_test_drive_present( drive ))
+		return;
+
+	UD.connected = 1;
+	UD.track     = 0;
+	switch( UserSteprate[drive] ) {
+	case 2:
+		UD.steprate = FDCSTEP_2;
+		break;
+	case 3:
+		UD.steprate = FDCSTEP_3;
+		break;
+	case 6:
+		UD.steprate = FDCSTEP_6;
+		break;
+	case 12:
+		UD.steprate = FDCSTEP_12;
+		break;
+	default: /* should be -1 for "not set by user" */
+		if (ATARIHW_PRESENT( FDCSPEED ) || MACH_IS_MEDUSA)
+			UD.steprate = FDCSTEP_3;
+		else
+			UD.steprate = FDCSTEP_6;
+		break;
+	}
+	MotorOn = 1;	/* from probe restore operation! */
+}
+
+
+/* This function tests the physical presence of a floppy drive (not
+ * whether a disk is inserted). This is done by issuing a restore
+ * command, waiting max. 2 seconds (that should be enough to move the
+ * head across the whole disk) and looking at the state of the "TR00"
+ * signal. This should now be raised if there is a drive connected
+ * (and there is no hardware failure :-) Otherwise, the drive is
+ * declared absent.
+ */
+
+static int __init fd_test_drive_present( int drive )
+{
+	unsigned long timeout;
+	unsigned char status;
+	int ok;
+	
+	if (drive >= (MACH_IS_FALCON ? 1 : 2)) return( 0 );
+	fd_select_drive( drive );
+
+	/* disable interrupt temporarily */
+	atari_turnoff_irq( IRQ_MFP_FDC );
+	FDC_WRITE (FDCREG_TRACK, 0xff00);
+	FDC_WRITE( FDCREG_CMD, FDCCMD_RESTORE | FDCCMDADD_H | FDCSTEP_6 );
+
+	timeout = jiffies + 2*HZ+HZ/2;
+	while (time_before(jiffies, timeout))
+		if (!(st_mfp.par_dt_reg & 0x20))
+			break;
+
+	status = FDC_READ( FDCREG_STATUS );
+	ok = (status & FDCSTAT_TR00) != 0;
+
+	/* force interrupt to abort restore operation (FDC would try
+	 * about 50 seconds!) */
+	FDC_WRITE( FDCREG_CMD, FDCCMD_FORCI );
+	udelay(500);
+	status = FDC_READ( FDCREG_STATUS );
+	udelay(20);
+
+	if (ok) {
+		/* dummy seek command to make WP bit accessible */
+		FDC_WRITE( FDCREG_DATA, 0 );
+		FDC_WRITE( FDCREG_CMD, FDCCMD_SEEK );
+		while( st_mfp.par_dt_reg & 0x20 )
+			;
+		status = FDC_READ( FDCREG_STATUS );
+	}
+
+	atari_turnon_irq( IRQ_MFP_FDC );
+	return( ok );
+}
+
+
+/* Look how many and which kind of drives are connected. If there are
+ * floppies, additionally start the disk-change and motor-off timers.
+ */
+
+static void __init config_types( void )
+{
+	int drive, cnt = 0;
+
+	/* for probing drives, set the FDC speed to 8 MHz */
+	if (ATARIHW_PRESENT(FDCSPEED))
+		dma_wd.fdc_speed = 0;
+
+	printk(KERN_INFO "Probing floppy drive(s):\n");
+	for( drive = 0; drive < FD_MAX_UNITS; drive++ ) {
+		fd_probe( drive );
+		if (UD.connected) {
+			printk(KERN_INFO "fd%d\n", drive);
+			++cnt;
+		}
+	}
+
+	if (FDC_READ( FDCREG_STATUS ) & FDCSTAT_BUSY) {
+		/* If FDC is still busy from probing, give it another FORCI
+		 * command to abort the operation. If this isn't done, the FDC
+		 * will interrupt later and its IRQ line stays low, because
+		 * the status register isn't read. And this will block any
+		 * interrupts on this IRQ line :-(
+		 */
+		FDC_WRITE( FDCREG_CMD, FDCCMD_FORCI );
+		udelay(500);
+		FDC_READ( FDCREG_STATUS );
+		udelay(20);
+	}
+	
+	if (cnt > 0) {
+		start_motor_off_timer();
+		if (cnt == 1) fd_select_drive( 0 );
+		start_check_change_timer();
+	}
+}
+
+/*
+ * floppy_open check for aliasing (/dev/fd0 can be the same as
+ * /dev/PS0 etc), and disallows simultaneous access to the same
+ * drive with different device numbers.
+ */
+
+static int floppy_open(struct block_device *bdev, fmode_t mode)
+{
+	struct atari_floppy_struct *p = bdev->bd_disk->private_data;
+	int type  = MINOR(bdev->bd_dev) >> 2;
+
+	DPRINT(("fd_open: type=%d\n",type));
+	if (p->ref && p->type != type)
+		return -EBUSY;
+
+	if (p->ref == -1 || (p->ref && mode & FMODE_EXCL))
+		return -EBUSY;
+
+	if (mode & FMODE_EXCL)
+		p->ref = -1;
+	else
+		p->ref++;
+
+	p->type = type;
+
+	if (mode & FMODE_NDELAY)
+		return 0;
+
+	if (mode & (FMODE_READ|FMODE_WRITE)) {
+		check_disk_change(bdev);
+		if (mode & FMODE_WRITE) {
+			if (p->wpstat) {
+				if (p->ref < 0)
+					p->ref = 0;
+				else
+					p->ref--;
+				return -EROFS;
+			}
+		}
+	}
+	return 0;
+}
+
+static int floppy_unlocked_open(struct block_device *bdev, fmode_t mode)
+{
+	int ret;
+
+	mutex_lock(&ataflop_mutex);
+	ret = floppy_open(bdev, mode);
+	mutex_unlock(&ataflop_mutex);
+
+	return ret;
+}
+
+static int floppy_release(struct gendisk *disk, fmode_t mode)
+{
+	struct atari_floppy_struct *p = disk->private_data;
+	mutex_lock(&ataflop_mutex);
+	if (p->ref < 0)
+		p->ref = 0;
+	else if (!p->ref--) {
+		printk(KERN_ERR "floppy_release with fd_ref == 0");
+		p->ref = 0;
+	}
+	mutex_unlock(&ataflop_mutex);
+	return 0;
+}
+
+static const struct block_device_operations floppy_fops = {
+	.owner		= THIS_MODULE,
+	.open		= floppy_unlocked_open,
+	.release	= floppy_release,
+	.ioctl		= fd_ioctl,
+	.check_events	= floppy_check_events,
+	.revalidate_disk= floppy_revalidate,
+};
+
+static struct kobject *floppy_find(dev_t dev, int *part, void *data)
+{
+	int drive = *part & 3;
+	int type  = *part >> 2;
+	if (drive >= FD_MAX_UNITS || type > NUM_DISK_MINORS)
+		return NULL;
+	*part = 0;
+	return get_disk(unit[drive].disk);
+}
+
+static int __init atari_floppy_init (void)
+{
+	int i;
+
+	if (!MACH_IS_ATARI)
+		/* Amiga, Mac, ... don't have Atari-compatible floppy :-) */
+		return -ENODEV;
+
+	if (register_blkdev(FLOPPY_MAJOR,"fd"))
+		return -EBUSY;
+
+	for (i = 0; i < FD_MAX_UNITS; i++) {
+		unit[i].disk = alloc_disk(1);
+		if (!unit[i].disk)
+			goto Enomem;
+	}
+
+	if (UseTrackbuffer < 0)
+		/* not set by user -> use default: for now, we turn
+		   track buffering off for all Medusas, though it
+		   could be used with ones that have a counter
+		   card. But the test is too hard :-( */
+		UseTrackbuffer = !MACH_IS_MEDUSA;
+
+	/* initialize variables */
+	SelectedDrive = -1;
+	BufferDrive = -1;
+
+	DMABuffer = atari_stram_alloc(BUFFER_SIZE+512, "ataflop");
+	if (!DMABuffer) {
+		printk(KERN_ERR "atari_floppy_init: cannot get dma buffer\n");
+		goto Enomem;
+	}
+	TrackBuffer = DMABuffer + 512;
+	PhysDMABuffer = virt_to_phys(DMABuffer);
+	PhysTrackBuffer = virt_to_phys(TrackBuffer);
+	BufferDrive = BufferSide = BufferTrack = -1;
+
+	for (i = 0; i < FD_MAX_UNITS; i++) {
+		unit[i].track = -1;
+		unit[i].flags = 0;
+		unit[i].disk->major = FLOPPY_MAJOR;
+		unit[i].disk->first_minor = i;
+		sprintf(unit[i].disk->disk_name, "fd%d", i);
+		unit[i].disk->fops = &floppy_fops;
+		unit[i].disk->private_data = &unit[i];
+		unit[i].disk->queue = blk_init_queue(do_fd_request,
+					&ataflop_lock);
+		if (!unit[i].disk->queue)
+			goto Enomem;
+		set_capacity(unit[i].disk, MAX_DISK_SIZE * 2);
+		add_disk(unit[i].disk);
+	}
+
+	blk_register_region(MKDEV(FLOPPY_MAJOR, 0), 256, THIS_MODULE,
+				floppy_find, NULL, NULL);
+
+	printk(KERN_INFO "Atari floppy driver: max. %cD, %strack buffering\n",
+	       DriveType == 0 ? 'D' : DriveType == 1 ? 'H' : 'E',
+	       UseTrackbuffer ? "" : "no ");
+	config_types();
+
+	return 0;
+Enomem:
+	while (i--) {
+		struct request_queue *q = unit[i].disk->queue;
+
+		put_disk(unit[i].disk);
+		if (q)
+			blk_cleanup_queue(q);
+	}
+
+	unregister_blkdev(FLOPPY_MAJOR, "fd");
+	return -ENOMEM;
+}
+
+#ifndef MODULE
+static int __init atari_floppy_setup(char *str)
+{
+	int ints[3 + FD_MAX_UNITS];
+	int i;
+
+	if (!MACH_IS_ATARI)
+		return 0;
+
+	str = get_options(str, 3 + FD_MAX_UNITS, ints);
+	
+	if (ints[0] < 1) {
+		printk(KERN_ERR "ataflop_setup: no arguments!\n" );
+		return 0;
+	}
+	else if (ints[0] > 2+FD_MAX_UNITS) {
+		printk(KERN_ERR "ataflop_setup: too many arguments\n" );
+	}
+
+	if (ints[1] < 0 || ints[1] > 2)
+		printk(KERN_ERR "ataflop_setup: bad drive type\n" );
+	else
+		DriveType = ints[1];
+
+	if (ints[0] >= 2)
+		UseTrackbuffer = (ints[2] > 0);
+
+	for( i = 3; i <= ints[0] && i-3 < FD_MAX_UNITS; ++i ) {
+		if (ints[i] != 2 && ints[i] != 3 && ints[i] != 6 && ints[i] != 12)
+			printk(KERN_ERR "ataflop_setup: bad steprate\n" );
+		else
+			UserSteprate[i-3] = ints[i];
+	}
+	return 1;
+}
+
+__setup("floppy=", atari_floppy_setup);
+#endif
+
+static void __exit atari_floppy_exit(void)
+{
+	int i;
+	blk_unregister_region(MKDEV(FLOPPY_MAJOR, 0), 256);
+	for (i = 0; i < FD_MAX_UNITS; i++) {
+		struct request_queue *q = unit[i].disk->queue;
+
+		del_gendisk(unit[i].disk);
+		put_disk(unit[i].disk);
+		blk_cleanup_queue(q);
+	}
+	unregister_blkdev(FLOPPY_MAJOR, "fd");
+
+	del_timer_sync(&fd_timer);
+	atari_stram_free( DMABuffer );
+}
+
+module_init(atari_floppy_init)
+module_exit(atari_floppy_exit)
+
+MODULE_LICENSE("GPL");
diff --git a/drivers/block/brd.c b/drivers/block/brd.c
new file mode 100644
index 00000000..1539aab0
--- /dev/null
+++ b/drivers/block/brd.c
@@ -0,0 +1,658 @@
+/*
+ * Ram backed block device driver.
+ *
+ * Copyright (C) 2007 Nick Piggin
+ * Copyright (C) 2007 Novell Inc.
+ *
+ * Parts derived from drivers/block/rd.c, and drivers/block/loop.c, copyright
+ * of their respective owners.
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/major.h>
+#include <linux/blkdev.h>
+#include <linux/bio.h>
+#include <linux/highmem.h>
+#include <linux/mutex.h>
+#include <linux/radix-tree.h>
+#include <linux/buffer_head.h> /* invalidate_bh_lrus() */
+#include <linux/slab.h>
+
+#include <asm/uaccess.h>
+
+#define SECTOR_SHIFT		9
+#define PAGE_SECTORS_SHIFT	(PAGE_SHIFT - SECTOR_SHIFT)
+#define PAGE_SECTORS		(1 << PAGE_SECTORS_SHIFT)
+
+/*
+ * Each block ramdisk device has a radix_tree brd_pages of pages that stores
+ * the pages containing the block device's contents. A brd page's ->index is
+ * its offset in PAGE_SIZE units. This is similar to, but in no way connected
+ * with, the kernel's pagecache or buffer cache (which sit above our block
+ * device).
+ */
+struct brd_device {
+	int		brd_number;
+
+	struct request_queue	*brd_queue;
+	struct gendisk		*brd_disk;
+	struct list_head	brd_list;
+
+	/*
+	 * Backing store of pages and lock to protect it. This is the contents
+	 * of the block device.
+	 */
+	spinlock_t		brd_lock;
+	struct radix_tree_root	brd_pages;
+};
+
+/*
+ * Look up and return a brd's page for a given sector.
+ */
+static DEFINE_MUTEX(brd_mutex);
+static struct page *brd_lookup_page(struct brd_device *brd, sector_t sector)
+{
+	pgoff_t idx;
+	struct page *page;
+
+	/*
+	 * The page lifetime is protected by the fact that we have opened the
+	 * device node -- brd pages will never be deleted under us, so we
+	 * don't need any further locking or refcounting.
+	 *
+	 * This is strictly true for the radix-tree nodes as well (ie. we
+	 * don't actually need the rcu_read_lock()), however that is not a
+	 * documented feature of the radix-tree API so it is better to be
+	 * safe here (we don't have total exclusion from radix tree updates
+	 * here, only deletes).
+	 */
+	rcu_read_lock();
+	idx = sector >> PAGE_SECTORS_SHIFT; /* sector to page index */
+	page = radix_tree_lookup(&brd->brd_pages, idx);
+	rcu_read_unlock();
+
+	BUG_ON(page && page->index != idx);
+
+	return page;
+}
+
+/*
+ * Look up and return a brd's page for a given sector.
+ * If one does not exist, allocate an empty page, and insert that. Then
+ * return it.
+ */
+static struct page *brd_insert_page(struct brd_device *brd, sector_t sector)
+{
+	pgoff_t idx;
+	struct page *page;
+	gfp_t gfp_flags;
+
+	page = brd_lookup_page(brd, sector);
+	if (page)
+		return page;
+
+	/*
+	 * Must use NOIO because we don't want to recurse back into the
+	 * block or filesystem layers from page reclaim.
+	 *
+	 * Cannot support XIP and highmem, because our ->direct_access
+	 * routine for XIP must return memory that is always addressable.
+	 * If XIP was reworked to use pfns and kmap throughout, this
+	 * restriction might be able to be lifted.
+	 */
+	gfp_flags = GFP_NOIO | __GFP_ZERO;
+#ifndef CONFIG_BLK_DEV_XIP
+	gfp_flags |= __GFP_HIGHMEM;
+#endif
+	page = alloc_page(gfp_flags);
+	if (!page)
+		return NULL;
+
+	if (radix_tree_preload(GFP_NOIO)) {
+		__free_page(page);
+		return NULL;
+	}
+
+	spin_lock(&brd->brd_lock);
+	idx = sector >> PAGE_SECTORS_SHIFT;
+	if (radix_tree_insert(&brd->brd_pages, idx, page)) {
+		__free_page(page);
+		page = radix_tree_lookup(&brd->brd_pages, idx);
+		BUG_ON(!page);
+		BUG_ON(page->index != idx);
+	} else
+		page->index = idx;
+	spin_unlock(&brd->brd_lock);
+
+	radix_tree_preload_end();
+
+	return page;
+}
+
+static void brd_free_page(struct brd_device *brd, sector_t sector)
+{
+	struct page *page;
+	pgoff_t idx;
+
+	spin_lock(&brd->brd_lock);
+	idx = sector >> PAGE_SECTORS_SHIFT;
+	page = radix_tree_delete(&brd->brd_pages, idx);
+	spin_unlock(&brd->brd_lock);
+	if (page)
+		__free_page(page);
+}
+
+static void brd_zero_page(struct brd_device *brd, sector_t sector)
+{
+	struct page *page;
+
+	page = brd_lookup_page(brd, sector);
+	if (page)
+		clear_highpage(page);
+}
+
+/*
+ * Free all backing store pages and radix tree. This must only be called when
+ * there are no other users of the device.
+ */
+#define FREE_BATCH 16
+static void brd_free_pages(struct brd_device *brd)
+{
+	unsigned long pos = 0;
+	struct page *pages[FREE_BATCH];
+	int nr_pages;
+
+	do {
+		int i;
+
+		nr_pages = radix_tree_gang_lookup(&brd->brd_pages,
+				(void **)pages, pos, FREE_BATCH);
+
+		for (i = 0; i < nr_pages; i++) {
+			void *ret;
+
+			BUG_ON(pages[i]->index < pos);
+			pos = pages[i]->index;
+			ret = radix_tree_delete(&brd->brd_pages, pos);
+			BUG_ON(!ret || ret != pages[i]);
+			__free_page(pages[i]);
+		}
+
+		pos++;
+
+		/*
+		 * This assumes radix_tree_gang_lookup always returns as
+		 * many pages as possible. If the radix-tree code changes,
+		 * so will this have to.
+		 */
+	} while (nr_pages == FREE_BATCH);
+}
+
+/*
+ * copy_to_brd_setup must be called before copy_to_brd. It may sleep.
+ */
+static int copy_to_brd_setup(struct brd_device *brd, sector_t sector, size_t n)
+{
+	unsigned int offset = (sector & (PAGE_SECTORS-1)) << SECTOR_SHIFT;
+	size_t copy;
+
+	copy = min_t(size_t, n, PAGE_SIZE - offset);
+	if (!brd_insert_page(brd, sector))
+		return -ENOMEM;
+	if (copy < n) {
+		sector += copy >> SECTOR_SHIFT;
+		if (!brd_insert_page(brd, sector))
+			return -ENOMEM;
+	}
+	return 0;
+}
+
+static void discard_from_brd(struct brd_device *brd,
+			sector_t sector, size_t n)
+{
+	while (n >= PAGE_SIZE) {
+		/*
+		 * Don't want to actually discard pages here because
+		 * re-allocating the pages can result in writeback
+		 * deadlocks under heavy load.
+		 */
+		if (0)
+			brd_free_page(brd, sector);
+		else
+			brd_zero_page(brd, sector);
+		sector += PAGE_SIZE >> SECTOR_SHIFT;
+		n -= PAGE_SIZE;
+	}
+}
+
+/*
+ * Copy n bytes from src to the brd starting at sector. Does not sleep.
+ */
+static void copy_to_brd(struct brd_device *brd, const void *src,
+			sector_t sector, size_t n)
+{
+	struct page *page;
+	void *dst;
+	unsigned int offset = (sector & (PAGE_SECTORS-1)) << SECTOR_SHIFT;
+	size_t copy;
+
+	copy = min_t(size_t, n, PAGE_SIZE - offset);
+	page = brd_lookup_page(brd, sector);
+	BUG_ON(!page);
+
+	dst = kmap_atomic(page, KM_USER1);
+	memcpy(dst + offset, src, copy);
+	kunmap_atomic(dst, KM_USER1);
+
+	if (copy < n) {
+		src += copy;
+		sector += copy >> SECTOR_SHIFT;
+		copy = n - copy;
+		page = brd_lookup_page(brd, sector);
+		BUG_ON(!page);
+
+		dst = kmap_atomic(page, KM_USER1);
+		memcpy(dst, src, copy);
+		kunmap_atomic(dst, KM_USER1);
+	}
+}
+
+/*
+ * Copy n bytes to dst from the brd starting at sector. Does not sleep.
+ */
+static void copy_from_brd(void *dst, struct brd_device *brd,
+			sector_t sector, size_t n)
+{
+	struct page *page;
+	void *src;
+	unsigned int offset = (sector & (PAGE_SECTORS-1)) << SECTOR_SHIFT;
+	size_t copy;
+
+	copy = min_t(size_t, n, PAGE_SIZE - offset);
+	page = brd_lookup_page(brd, sector);
+	if (page) {
+		src = kmap_atomic(page, KM_USER1);
+		memcpy(dst, src + offset, copy);
+		kunmap_atomic(src, KM_USER1);
+	} else
+		memset(dst, 0, copy);
+
+	if (copy < n) {
+		dst += copy;
+		sector += copy >> SECTOR_SHIFT;
+		copy = n - copy;
+		page = brd_lookup_page(brd, sector);
+		if (page) {
+			src = kmap_atomic(page, KM_USER1);
+			memcpy(dst, src, copy);
+			kunmap_atomic(src, KM_USER1);
+		} else
+			memset(dst, 0, copy);
+	}
+}
+
+/*
+ * Process a single bvec of a bio.
+ */
+static int brd_do_bvec(struct brd_device *brd, struct page *page,
+			unsigned int len, unsigned int off, int rw,
+			sector_t sector)
+{
+	void *mem;
+	int err = 0;
+
+	if (rw != READ) {
+		err = copy_to_brd_setup(brd, sector, len);
+		if (err)
+			goto out;
+	}
+
+	mem = kmap_atomic(page, KM_USER0);
+	if (rw == READ) {
+		copy_from_brd(mem + off, brd, sector, len);
+		flush_dcache_page(page);
+	} else {
+		flush_dcache_page(page);
+		copy_to_brd(brd, mem + off, sector, len);
+	}
+	kunmap_atomic(mem, KM_USER0);
+
+out:
+	return err;
+}
+
+static int brd_make_request(struct request_queue *q, struct bio *bio)
+{
+	struct block_device *bdev = bio->bi_bdev;
+	struct brd_device *brd = bdev->bd_disk->private_data;
+	int rw;
+	struct bio_vec *bvec;
+	sector_t sector;
+	int i;
+	int err = 0;
+
+	sector = bio->bi_sector;
+	if (sector + (bio->bi_size >> SECTOR_SHIFT) >
+						get_capacity(bdev->bd_disk)) {
+		err = -EIO;
+		goto out;
+	}
+
+	if (unlikely(bio->bi_rw & REQ_DISCARD)) {
+		err = 0;
+		discard_from_brd(brd, sector, bio->bi_size);
+		goto out;
+	}
+
+	rw = bio_rw(bio);
+	if (rw == READA)
+		rw = READ;
+
+	bio_for_each_segment(bvec, bio, i) {
+		unsigned int len = bvec->bv_len;
+		err = brd_do_bvec(brd, bvec->bv_page, len,
+					bvec->bv_offset, rw, sector);
+		if (err)
+			break;
+		sector += len >> SECTOR_SHIFT;
+	}
+
+out:
+	bio_endio(bio, err);
+
+	return 0;
+}
+
+#ifdef CONFIG_BLK_DEV_XIP
+static int brd_direct_access(struct block_device *bdev, sector_t sector,
+			void **kaddr, unsigned long *pfn)
+{
+	struct brd_device *brd = bdev->bd_disk->private_data;
+	struct page *page;
+
+	if (!brd)
+		return -ENODEV;
+	if (sector & (PAGE_SECTORS-1))
+		return -EINVAL;
+	if (sector + PAGE_SECTORS > get_capacity(bdev->bd_disk))
+		return -ERANGE;
+	page = brd_insert_page(brd, sector);
+	if (!page)
+		return -ENOMEM;
+	*kaddr = page_address(page);
+	*pfn = page_to_pfn(page);
+
+	return 0;
+}
+#endif
+
+static int brd_ioctl(struct block_device *bdev, fmode_t mode,
+			unsigned int cmd, unsigned long arg)
+{
+	int error;
+	struct brd_device *brd = bdev->bd_disk->private_data;
+
+	if (cmd != BLKFLSBUF)
+		return -ENOTTY;
+
+	/*
+	 * ram device BLKFLSBUF has special semantics, we want to actually
+	 * release and destroy the ramdisk data.
+	 */
+	mutex_lock(&brd_mutex);
+	mutex_lock(&bdev->bd_mutex);
+	error = -EBUSY;
+	if (bdev->bd_openers <= 1) {
+		/*
+		 * Invalidate the cache first, so it isn't written
+		 * back to the device.
+		 *
+		 * Another thread might instantiate more buffercache here,
+		 * but there is not much we can do to close that race.
+		 */
+		invalidate_bh_lrus();
+		truncate_inode_pages(bdev->bd_inode->i_mapping, 0);
+		brd_free_pages(brd);
+		error = 0;
+	}
+	mutex_unlock(&bdev->bd_mutex);
+	mutex_unlock(&brd_mutex);
+
+	return error;
+}
+
+static const struct block_device_operations brd_fops = {
+	.owner =		THIS_MODULE,
+	.ioctl =		brd_ioctl,
+#ifdef CONFIG_BLK_DEV_XIP
+	.direct_access =	brd_direct_access,
+#endif
+};
+
+/*
+ * And now the modules code and kernel interface.
+ */
+static int rd_nr;
+int rd_size = CONFIG_BLK_DEV_RAM_SIZE;
+static int max_part;
+static int part_shift;
+module_param(rd_nr, int, S_IRUGO);
+MODULE_PARM_DESC(rd_nr, "Maximum number of brd devices");
+module_param(rd_size, int, S_IRUGO);
+MODULE_PARM_DESC(rd_size, "Size of each RAM disk in kbytes.");
+module_param(max_part, int, S_IRUGO);
+MODULE_PARM_DESC(max_part, "Maximum number of partitions per RAM disk");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_BLOCKDEV_MAJOR(RAMDISK_MAJOR);
+MODULE_ALIAS("rd");
+
+#ifndef MODULE
+/* Legacy boot options - nonmodular */
+static int __init ramdisk_size(char *str)
+{
+	rd_size = simple_strtol(str, NULL, 0);
+	return 1;
+}
+__setup("ramdisk_size=", ramdisk_size);
+#endif
+
+/*
+ * The device scheme is derived from loop.c. Keep them in synch where possible
+ * (should share code eventually).
+ */
+static LIST_HEAD(brd_devices);
+static DEFINE_MUTEX(brd_devices_mutex);
+
+static struct brd_device *brd_alloc(int i)
+{
+	struct brd_device *brd;
+	struct gendisk *disk;
+
+	brd = kzalloc(sizeof(*brd), GFP_KERNEL);
+	if (!brd)
+		goto out;
+	brd->brd_number		= i;
+	spin_lock_init(&brd->brd_lock);
+	INIT_RADIX_TREE(&brd->brd_pages, GFP_ATOMIC);
+
+	brd->brd_queue = blk_alloc_queue(GFP_KERNEL);
+	if (!brd->brd_queue)
+		goto out_free_dev;
+	blk_queue_make_request(brd->brd_queue, brd_make_request);
+	blk_queue_max_hw_sectors(brd->brd_queue, 1024);
+	blk_queue_bounce_limit(brd->brd_queue, BLK_BOUNCE_ANY);
+
+	brd->brd_queue->limits.discard_granularity = PAGE_SIZE;
+	brd->brd_queue->limits.max_discard_sectors = UINT_MAX;
+	brd->brd_queue->limits.discard_zeroes_data = 1;
+	queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, brd->brd_queue);
+
+	disk = brd->brd_disk = alloc_disk(1 << part_shift);
+	if (!disk)
+		goto out_free_queue;
+	disk->major		= RAMDISK_MAJOR;
+	disk->first_minor	= i << part_shift;
+	disk->fops		= &brd_fops;
+	disk->private_data	= brd;
+	disk->queue		= brd->brd_queue;
+	disk->flags |= GENHD_FL_SUPPRESS_PARTITION_INFO;
+	sprintf(disk->disk_name, "ram%d", i);
+	set_capacity(disk, rd_size * 2);
+
+	return brd;
+
+out_free_queue:
+	blk_cleanup_queue(brd->brd_queue);
+out_free_dev:
+	kfree(brd);
+out:
+	return NULL;
+}
+
+static void brd_free(struct brd_device *brd)
+{
+	put_disk(brd->brd_disk);
+	blk_cleanup_queue(brd->brd_queue);
+	brd_free_pages(brd);
+	kfree(brd);
+}
+
+static struct brd_device *brd_init_one(int i)
+{
+	struct brd_device *brd;
+
+	list_for_each_entry(brd, &brd_devices, brd_list) {
+		if (brd->brd_number == i)
+			goto out;
+	}
+
+	brd = brd_alloc(i);
+	if (brd) {
+		add_disk(brd->brd_disk);
+		list_add_tail(&brd->brd_list, &brd_devices);
+	}
+out:
+	return brd;
+}
+
+static void brd_del_one(struct brd_device *brd)
+{
+	list_del(&brd->brd_list);
+	del_gendisk(brd->brd_disk);
+	brd_free(brd);
+}
+
+static struct kobject *brd_probe(dev_t dev, int *part, void *data)
+{
+	struct brd_device *brd;
+	struct kobject *kobj;
+
+	mutex_lock(&brd_devices_mutex);
+	brd = brd_init_one(MINOR(dev) >> part_shift);
+	kobj = brd ? get_disk(brd->brd_disk) : ERR_PTR(-ENOMEM);
+	mutex_unlock(&brd_devices_mutex);
+
+	*part = 0;
+	return kobj;
+}
+
+static int __init brd_init(void)
+{
+	int i, nr;
+	unsigned long range;
+	struct brd_device *brd, *next;
+
+	/*
+	 * brd module now has a feature to instantiate underlying device
+	 * structure on-demand, provided that there is an access dev node.
+	 * However, this will not work well with user space tool that doesn't
+	 * know about such "feature".  In order to not break any existing
+	 * tool, we do the following:
+	 *
+	 * (1) if rd_nr is specified, create that many upfront, and this
+	 *     also becomes a hard limit.
+	 * (2) if rd_nr is not specified, create CONFIG_BLK_DEV_RAM_COUNT
+	 *     (default 16) rd device on module load, user can further
+	 *     extend brd device by create dev node themselves and have
+	 *     kernel automatically instantiate actual device on-demand.
+	 */
+
+	part_shift = 0;
+	if (max_part > 0) {
+		part_shift = fls(max_part);
+
+		/*
+		 * Adjust max_part according to part_shift as it is exported
+		 * to user space so that user can decide correct minor number
+		 * if [s]he want to create more devices.
+		 *
+		 * Note that -1 is required because partition 0 is reserved
+		 * for the whole disk.
+		 */
+		max_part = (1UL << part_shift) - 1;
+	}
+
+	if ((1UL << part_shift) > DISK_MAX_PARTS)
+		return -EINVAL;
+
+	if (rd_nr > 1UL << (MINORBITS - part_shift))
+		return -EINVAL;
+
+	if (rd_nr) {
+		nr = rd_nr;
+		range = rd_nr << part_shift;
+	} else {
+		nr = CONFIG_BLK_DEV_RAM_COUNT;
+		range = 1UL << MINORBITS;
+	}
+
+	if (register_blkdev(RAMDISK_MAJOR, "ramdisk"))
+		return -EIO;
+
+	for (i = 0; i < nr; i++) {
+		brd = brd_alloc(i);
+		if (!brd)
+			goto out_free;
+		list_add_tail(&brd->brd_list, &brd_devices);
+	}
+
+	/* point of no return */
+
+	list_for_each_entry(brd, &brd_devices, brd_list)
+		add_disk(brd->brd_disk);
+
+	blk_register_region(MKDEV(RAMDISK_MAJOR, 0), range,
+				  THIS_MODULE, brd_probe, NULL, NULL);
+
+	printk(KERN_INFO "brd: module loaded\n");
+	return 0;
+
+out_free:
+	list_for_each_entry_safe(brd, next, &brd_devices, brd_list) {
+		list_del(&brd->brd_list);
+		brd_free(brd);
+	}
+	unregister_blkdev(RAMDISK_MAJOR, "ramdisk");
+
+	return -ENOMEM;
+}
+
+static void __exit brd_exit(void)
+{
+	unsigned long range;
+	struct brd_device *brd, *next;
+
+	range = rd_nr ? rd_nr << part_shift : 1UL << MINORBITS;
+
+	list_for_each_entry_safe(brd, next, &brd_devices, brd_list)
+		brd_del_one(brd);
+
+	blk_unregister_region(MKDEV(RAMDISK_MAJOR, 0), range);
+	unregister_blkdev(RAMDISK_MAJOR, "ramdisk");
+}
+
+module_init(brd_init);
+module_exit(brd_exit);
+
diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
new file mode 100644
index 00000000..1dab802d
--- /dev/null
+++ b/drivers/block/cciss.c
@@ -0,0 +1,5331 @@
+/*
+ *    Disk Array driver for HP Smart Array controllers.
+ *    (C) Copyright 2000, 2007 Hewlett-Packard Development Company, L.P.
+ *
+ *    This program is free software; you can redistribute it and/or modify
+ *    it under the terms of the GNU General Public License as published by
+ *    the Free Software Foundation; version 2 of the License.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ *    General Public License for more details.
+ *
+ *    You should have received a copy of the GNU General Public License
+ *    along with this program; if not, write to the Free Software
+ *    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ *    02111-1307, USA.
+ *
+ *    Questions/Comments/Bugfixes to iss_storagedev@hp.com
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/interrupt.h>
+#include <linux/types.h>
+#include <linux/pci.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/delay.h>
+#include <linux/major.h>
+#include <linux/fs.h>
+#include <linux/bio.h>
+#include <linux/blkpg.h>
+#include <linux/timer.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/init.h>
+#include <linux/jiffies.h>
+#include <linux/hdreg.h>
+#include <linux/spinlock.h>
+#include <linux/compat.h>
+#include <linux/mutex.h>
+#include <asm/uaccess.h>
+#include <asm/io.h>
+
+#include <linux/dma-mapping.h>
+#include <linux/blkdev.h>
+#include <linux/genhd.h>
+#include <linux/completion.h>
+#include <scsi/scsi.h>
+#include <scsi/sg.h>
+#include <scsi/scsi_ioctl.h>
+#include <linux/cdrom.h>
+#include <linux/scatterlist.h>
+#include <linux/kthread.h>
+
+#define CCISS_DRIVER_VERSION(maj,min,submin) ((maj<<16)|(min<<8)|(submin))
+#define DRIVER_NAME "HP CISS Driver (v 3.6.26)"
+#define DRIVER_VERSION CCISS_DRIVER_VERSION(3, 6, 26)
+
+/* Embedded module documentation macros - see modules.h */
+MODULE_AUTHOR("Hewlett-Packard Company");
+MODULE_DESCRIPTION("Driver for HP Smart Array Controllers");
+MODULE_SUPPORTED_DEVICE("HP Smart Array Controllers");
+MODULE_VERSION("3.6.26");
+MODULE_LICENSE("GPL");
+static int cciss_tape_cmds = 6;
+module_param(cciss_tape_cmds, int, 0644);
+MODULE_PARM_DESC(cciss_tape_cmds,
+	"number of commands to allocate for tape devices (default: 6)");
+
+static DEFINE_MUTEX(cciss_mutex);
+static struct proc_dir_entry *proc_cciss;
+
+#include "cciss_cmd.h"
+#include "cciss.h"
+#include <linux/cciss_ioctl.h>
+
+/* define the PCI info for the cards we can control */
+static const struct pci_device_id cciss_pci_device_id[] = {
+	{PCI_VENDOR_ID_COMPAQ, PCI_DEVICE_ID_COMPAQ_CISS,  0x0E11, 0x4070},
+	{PCI_VENDOR_ID_COMPAQ, PCI_DEVICE_ID_COMPAQ_CISSB, 0x0E11, 0x4080},
+	{PCI_VENDOR_ID_COMPAQ, PCI_DEVICE_ID_COMPAQ_CISSB, 0x0E11, 0x4082},
+	{PCI_VENDOR_ID_COMPAQ, PCI_DEVICE_ID_COMPAQ_CISSB, 0x0E11, 0x4083},
+	{PCI_VENDOR_ID_COMPAQ, PCI_DEVICE_ID_COMPAQ_CISSC, 0x0E11, 0x4091},
+	{PCI_VENDOR_ID_COMPAQ, PCI_DEVICE_ID_COMPAQ_CISSC, 0x0E11, 0x409A},
+	{PCI_VENDOR_ID_COMPAQ, PCI_DEVICE_ID_COMPAQ_CISSC, 0x0E11, 0x409B},
+	{PCI_VENDOR_ID_COMPAQ, PCI_DEVICE_ID_COMPAQ_CISSC, 0x0E11, 0x409C},
+	{PCI_VENDOR_ID_COMPAQ, PCI_DEVICE_ID_COMPAQ_CISSC, 0x0E11, 0x409D},
+	{PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSA,     0x103C, 0x3225},
+	{PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSC,     0x103C, 0x3223},
+	{PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSC,     0x103C, 0x3234},
+	{PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSC,     0x103C, 0x3235},
+	{PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSD,     0x103C, 0x3211},
+	{PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSD,     0x103C, 0x3212},
+	{PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSD,     0x103C, 0x3213},
+	{PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSD,     0x103C, 0x3214},
+	{PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSD,     0x103C, 0x3215},
+	{PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSC,     0x103C, 0x3237},
+	{PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSC,     0x103C, 0x323D},
+	{0,}
+};
+
+MODULE_DEVICE_TABLE(pci, cciss_pci_device_id);
+
+/*  board_id = Subsystem Device ID & Vendor ID
+ *  product = Marketing Name for the board
+ *  access = Address of the struct of function pointers
+ */
+static struct board_type products[] = {
+	{0x40700E11, "Smart Array 5300", &SA5_access},
+	{0x40800E11, "Smart Array 5i", &SA5B_access},
+	{0x40820E11, "Smart Array 532", &SA5B_access},
+	{0x40830E11, "Smart Array 5312", &SA5B_access},
+	{0x409A0E11, "Smart Array 641", &SA5_access},
+	{0x409B0E11, "Smart Array 642", &SA5_access},
+	{0x409C0E11, "Smart Array 6400", &SA5_access},
+	{0x409D0E11, "Smart Array 6400 EM", &SA5_access},
+	{0x40910E11, "Smart Array 6i", &SA5_access},
+	{0x3225103C, "Smart Array P600", &SA5_access},
+	{0x3223103C, "Smart Array P800", &SA5_access},
+	{0x3234103C, "Smart Array P400", &SA5_access},
+	{0x3235103C, "Smart Array P400i", &SA5_access},
+	{0x3211103C, "Smart Array E200i", &SA5_access},
+	{0x3212103C, "Smart Array E200", &SA5_access},
+	{0x3213103C, "Smart Array E200i", &SA5_access},
+	{0x3214103C, "Smart Array E200i", &SA5_access},
+	{0x3215103C, "Smart Array E200i", &SA5_access},
+	{0x3237103C, "Smart Array E500", &SA5_access},
+	{0x3223103C, "Smart Array P800", &SA5_access},
+	{0x3234103C, "Smart Array P400", &SA5_access},
+	{0x323D103C, "Smart Array P700m", &SA5_access},
+};
+
+/* How long to wait (in milliseconds) for board to go into simple mode */
+#define MAX_CONFIG_WAIT 30000
+#define MAX_IOCTL_CONFIG_WAIT 1000
+
+/*define how many times we will try a command because of bus resets */
+#define MAX_CMD_RETRIES 3
+
+#define MAX_CTLR	32
+
+/* Originally cciss driver only supports 8 major numbers */
+#define MAX_CTLR_ORIG 	8
+
+static ctlr_info_t *hba[MAX_CTLR];
+
+static struct task_struct *cciss_scan_thread;
+static DEFINE_MUTEX(scan_mutex);
+static LIST_HEAD(scan_q);
+
+static void do_cciss_request(struct request_queue *q);
+static irqreturn_t do_cciss_intx(int irq, void *dev_id);
+static irqreturn_t do_cciss_msix_intr(int irq, void *dev_id);
+static int cciss_open(struct block_device *bdev, fmode_t mode);
+static int cciss_unlocked_open(struct block_device *bdev, fmode_t mode);
+static int cciss_release(struct gendisk *disk, fmode_t mode);
+static int do_ioctl(struct block_device *bdev, fmode_t mode,
+		    unsigned int cmd, unsigned long arg);
+static int cciss_ioctl(struct block_device *bdev, fmode_t mode,
+		       unsigned int cmd, unsigned long arg);
+static int cciss_getgeo(struct block_device *bdev, struct hd_geometry *geo);
+
+static int cciss_revalidate(struct gendisk *disk);
+static int rebuild_lun_table(ctlr_info_t *h, int first_time, int via_ioctl);
+static int deregister_disk(ctlr_info_t *h, int drv_index,
+			   int clear_all, int via_ioctl);
+
+static void cciss_read_capacity(ctlr_info_t *h, int logvol,
+			sector_t *total_size, unsigned int *block_size);
+static void cciss_read_capacity_16(ctlr_info_t *h, int logvol,
+			sector_t *total_size, unsigned int *block_size);
+static void cciss_geometry_inquiry(ctlr_info_t *h, int logvol,
+			sector_t total_size,
+			unsigned int block_size, InquiryData_struct *inq_buff,
+				   drive_info_struct *drv);
+static void __devinit cciss_interrupt_mode(ctlr_info_t *);
+static void start_io(ctlr_info_t *h);
+static int sendcmd_withirq(ctlr_info_t *h, __u8 cmd, void *buff, size_t size,
+			__u8 page_code, unsigned char scsi3addr[],
+			int cmd_type);
+static int sendcmd_withirq_core(ctlr_info_t *h, CommandList_struct *c,
+	int attempt_retry);
+static int process_sendcmd_error(ctlr_info_t *h, CommandList_struct *c);
+
+static int add_to_scan_list(struct ctlr_info *h);
+static int scan_thread(void *data);
+static int check_for_unit_attention(ctlr_info_t *h, CommandList_struct *c);
+static void cciss_hba_release(struct device *dev);
+static void cciss_device_release(struct device *dev);
+static void cciss_free_gendisk(ctlr_info_t *h, int drv_index);
+static void cciss_free_drive_info(ctlr_info_t *h, int drv_index);
+static inline u32 next_command(ctlr_info_t *h);
+static int __devinit cciss_find_cfg_addrs(struct pci_dev *pdev,
+	void __iomem *vaddr, u32 *cfg_base_addr, u64 *cfg_base_addr_index,
+	u64 *cfg_offset);
+static int __devinit cciss_pci_find_memory_BAR(struct pci_dev *pdev,
+	unsigned long *memory_bar);
+static inline u32 cciss_tag_discard_error_bits(ctlr_info_t *h, u32 tag);
+static __devinit int write_driver_ver_to_cfgtable(
+	CfgTable_struct __iomem *cfgtable);
+
+/* performant mode helper functions */
+static void  calc_bucket_map(int *bucket, int num_buckets, int nsgs,
+				int *bucket_map);
+static void cciss_put_controller_into_performant_mode(ctlr_info_t *h);
+
+#ifdef CONFIG_PROC_FS
+static void cciss_procinit(ctlr_info_t *h);
+#else
+static void cciss_procinit(ctlr_info_t *h)
+{
+}
+#endif				/* CONFIG_PROC_FS */
+
+#ifdef CONFIG_COMPAT
+static int cciss_compat_ioctl(struct block_device *, fmode_t,
+			      unsigned, unsigned long);
+#endif
+
+static const struct block_device_operations cciss_fops = {
+	.owner = THIS_MODULE,
+	.open = cciss_unlocked_open,
+	.release = cciss_release,
+	.ioctl = do_ioctl,
+	.getgeo = cciss_getgeo,
+#ifdef CONFIG_COMPAT
+	.compat_ioctl = cciss_compat_ioctl,
+#endif
+	.revalidate_disk = cciss_revalidate,
+};
+
+/* set_performant_mode: Modify the tag for cciss performant
+ * set bit 0 for pull model, bits 3-1 for block fetch
+ * register number
+ */
+static void set_performant_mode(ctlr_info_t *h, CommandList_struct *c)
+{
+	if (likely(h->transMethod & CFGTBL_Trans_Performant))
+		c->busaddr |= 1 | (h->blockFetchTable[c->Header.SGList] << 1);
+}
+
+/*
+ * Enqueuing and dequeuing functions for cmdlists.
+ */
+static inline void addQ(struct list_head *list, CommandList_struct *c)
+{
+	list_add_tail(&c->list, list);
+}
+
+static inline void removeQ(CommandList_struct *c)
+{
+	/*
+	 * After kexec/dump some commands might still
+	 * be in flight, which the firmware will try
+	 * to complete. Resetting the firmware doesn't work
+	 * with old fw revisions, so we have to mark
+	 * them off as 'stale' to prevent the driver from
+	 * falling over.
+	 */
+	if (WARN_ON(list_empty(&c->list))) {
+		c->cmd_type = CMD_MSG_STALE;
+		return;
+	}
+
+	list_del_init(&c->list);
+}
+
+static void enqueue_cmd_and_start_io(ctlr_info_t *h,
+	CommandList_struct *c)
+{
+	unsigned long flags;
+	set_performant_mode(h, c);
+	spin_lock_irqsave(&h->lock, flags);
+	addQ(&h->reqQ, c);
+	h->Qdepth++;
+	if (h->Qdepth > h->maxQsinceinit)
+		h->maxQsinceinit = h->Qdepth;
+	start_io(h);
+	spin_unlock_irqrestore(&h->lock, flags);
+}
+
+static void cciss_free_sg_chain_blocks(SGDescriptor_struct **cmd_sg_list,
+	int nr_cmds)
+{
+	int i;
+
+	if (!cmd_sg_list)
+		return;
+	for (i = 0; i < nr_cmds; i++) {
+		kfree(cmd_sg_list[i]);
+		cmd_sg_list[i] = NULL;
+	}
+	kfree(cmd_sg_list);
+}
+
+static SGDescriptor_struct **cciss_allocate_sg_chain_blocks(
+	ctlr_info_t *h, int chainsize, int nr_cmds)
+{
+	int j;
+	SGDescriptor_struct **cmd_sg_list;
+
+	if (chainsize <= 0)
+		return NULL;
+
+	cmd_sg_list = kmalloc(sizeof(*cmd_sg_list) * nr_cmds, GFP_KERNEL);
+	if (!cmd_sg_list)
+		return NULL;
+
+	/* Build up chain blocks for each command */
+	for (j = 0; j < nr_cmds; j++) {
+		/* Need a block of chainsized s/g elements. */
+		cmd_sg_list[j] = kmalloc((chainsize *
+			sizeof(*cmd_sg_list[j])), GFP_KERNEL);
+		if (!cmd_sg_list[j]) {
+			dev_err(&h->pdev->dev, "Cannot get memory "
+				"for s/g chains.\n");
+			goto clean;
+		}
+	}
+	return cmd_sg_list;
+clean:
+	cciss_free_sg_chain_blocks(cmd_sg_list, nr_cmds);
+	return NULL;
+}
+
+static void cciss_unmap_sg_chain_block(ctlr_info_t *h, CommandList_struct *c)
+{
+	SGDescriptor_struct *chain_sg;
+	u64bit temp64;
+
+	if (c->Header.SGTotal <= h->max_cmd_sgentries)
+		return;
+
+	chain_sg = &c->SG[h->max_cmd_sgentries - 1];
+	temp64.val32.lower = chain_sg->Addr.lower;
+	temp64.val32.upper = chain_sg->Addr.upper;
+	pci_unmap_single(h->pdev, temp64.val, chain_sg->Len, PCI_DMA_TODEVICE);
+}
+
+static void cciss_map_sg_chain_block(ctlr_info_t *h, CommandList_struct *c,
+	SGDescriptor_struct *chain_block, int len)
+{
+	SGDescriptor_struct *chain_sg;
+	u64bit temp64;
+
+	chain_sg = &c->SG[h->max_cmd_sgentries - 1];
+	chain_sg->Ext = CCISS_SG_CHAIN;
+	chain_sg->Len = len;
+	temp64.val = pci_map_single(h->pdev, chain_block, len,
+				PCI_DMA_TODEVICE);
+	chain_sg->Addr.lower = temp64.val32.lower;
+	chain_sg->Addr.upper = temp64.val32.upper;
+}
+
+#include "cciss_scsi.c"		/* For SCSI tape support */
+
+static const char *raid_label[] = { "0", "4", "1(1+0)", "5", "5+1", "ADG",
+	"UNKNOWN"
+};
+#define RAID_UNKNOWN (ARRAY_SIZE(raid_label)-1)
+
+#ifdef CONFIG_PROC_FS
+
+/*
+ * Report information about this controller.
+ */
+#define ENG_GIG 1000000000
+#define ENG_GIG_FACTOR (ENG_GIG/512)
+#define ENGAGE_SCSI	"engage scsi"
+
+static void cciss_seq_show_header(struct seq_file *seq)
+{
+	ctlr_info_t *h = seq->private;
+
+	seq_printf(seq, "%s: HP %s Controller\n"
+		"Board ID: 0x%08lx\n"
+		"Firmware Version: %c%c%c%c\n"
+		"IRQ: %d\n"
+		"Logical drives: %d\n"
+		"Current Q depth: %d\n"
+		"Current # commands on controller: %d\n"
+		"Max Q depth since init: %d\n"
+		"Max # commands on controller since init: %d\n"
+		"Max SG entries since init: %d\n",
+		h->devname,
+		h->product_name,
+		(unsigned long)h->board_id,
+		h->firm_ver[0], h->firm_ver[1], h->firm_ver[2],
+		h->firm_ver[3], (unsigned int)h->intr[PERF_MODE_INT],
+		h->num_luns,
+		h->Qdepth, h->commands_outstanding,
+		h->maxQsinceinit, h->max_outstanding, h->maxSG);
+
+#ifdef CONFIG_CISS_SCSI_TAPE
+	cciss_seq_tape_report(seq, h);
+#endif /* CONFIG_CISS_SCSI_TAPE */
+}
+
+static void *cciss_seq_start(struct seq_file *seq, loff_t *pos)
+{
+	ctlr_info_t *h = seq->private;
+	unsigned long flags;
+
+	/* prevent displaying bogus info during configuration
+	 * or deconfiguration of a logical volume
+	 */
+	spin_lock_irqsave(&h->lock, flags);
+	if (h->busy_configuring) {
+		spin_unlock_irqrestore(&h->lock, flags);
+		return ERR_PTR(-EBUSY);
+	}
+	h->busy_configuring = 1;
+	spin_unlock_irqrestore(&h->lock, flags);
+
+	if (*pos == 0)
+		cciss_seq_show_header(seq);
+
+	return pos;
+}
+
+static int cciss_seq_show(struct seq_file *seq, void *v)
+{
+	sector_t vol_sz, vol_sz_frac;
+	ctlr_info_t *h = seq->private;
+	unsigned ctlr = h->ctlr;
+	loff_t *pos = v;
+	drive_info_struct *drv = h->drv[*pos];
+
+	if (*pos > h->highest_lun)
+		return 0;
+
+	if (drv == NULL) /* it's possible for h->drv[] to have holes. */
+		return 0;
+
+	if (drv->heads == 0)
+		return 0;
+
+	vol_sz = drv->nr_blocks;
+	vol_sz_frac = sector_div(vol_sz, ENG_GIG_FACTOR);
+	vol_sz_frac *= 100;
+	sector_div(vol_sz_frac, ENG_GIG_FACTOR);
+
+	if (drv->raid_level < 0 || drv->raid_level > RAID_UNKNOWN)
+		drv->raid_level = RAID_UNKNOWN;
+	seq_printf(seq, "cciss/c%dd%d:"
+			"\t%4u.%02uGB\tRAID %s\n",
+			ctlr, (int) *pos, (int)vol_sz, (int)vol_sz_frac,
+			raid_label[drv->raid_level]);
+	return 0;
+}
+
+static void *cciss_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+	ctlr_info_t *h = seq->private;
+
+	if (*pos > h->highest_lun)
+		return NULL;
+	*pos += 1;
+
+	return pos;
+}
+
+static void cciss_seq_stop(struct seq_file *seq, void *v)
+{
+	ctlr_info_t *h = seq->private;
+
+	/* Only reset h->busy_configuring if we succeeded in setting
+	 * it during cciss_seq_start. */
+	if (v == ERR_PTR(-EBUSY))
+		return;
+
+	h->busy_configuring = 0;
+}
+
+static const struct seq_operations cciss_seq_ops = {
+	.start = cciss_seq_start,
+	.show  = cciss_seq_show,
+	.next  = cciss_seq_next,
+	.stop  = cciss_seq_stop,
+};
+
+static int cciss_seq_open(struct inode *inode, struct file *file)
+{
+	int ret = seq_open(file, &cciss_seq_ops);
+	struct seq_file *seq = file->private_data;
+
+	if (!ret)
+		seq->private = PDE(inode)->data;
+
+	return ret;
+}
+
+static ssize_t
+cciss_proc_write(struct file *file, const char __user *buf,
+		 size_t length, loff_t *ppos)
+{
+	int err;
+	char *buffer;
+
+#ifndef CONFIG_CISS_SCSI_TAPE
+	return -EINVAL;
+#endif
+
+	if (!buf || length > PAGE_SIZE - 1)
+		return -EINVAL;
+
+	buffer = (char *)__get_free_page(GFP_KERNEL);
+	if (!buffer)
+		return -ENOMEM;
+
+	err = -EFAULT;
+	if (copy_from_user(buffer, buf, length))
+		goto out;
+	buffer[length] = '\0';
+
+#ifdef CONFIG_CISS_SCSI_TAPE
+	if (strncmp(ENGAGE_SCSI, buffer, sizeof ENGAGE_SCSI - 1) == 0) {
+		struct seq_file *seq = file->private_data;
+		ctlr_info_t *h = seq->private;
+
+		err = cciss_engage_scsi(h);
+		if (err == 0)
+			err = length;
+	} else
+#endif /* CONFIG_CISS_SCSI_TAPE */
+		err = -EINVAL;
+	/* might be nice to have "disengage" too, but it's not
+	   safely possible. (only 1 module use count, lock issues.) */
+
+out:
+	free_page((unsigned long)buffer);
+	return err;
+}
+
+static const struct file_operations cciss_proc_fops = {
+	.owner	 = THIS_MODULE,
+	.open    = cciss_seq_open,
+	.read    = seq_read,
+	.llseek  = seq_lseek,
+	.release = seq_release,
+	.write	 = cciss_proc_write,
+};
+
+static void __devinit cciss_procinit(ctlr_info_t *h)
+{
+	struct proc_dir_entry *pde;
+
+	if (proc_cciss == NULL)
+		proc_cciss = proc_mkdir("driver/cciss", NULL);
+	if (!proc_cciss)
+		return;
+	pde = proc_create_data(h->devname, S_IWUSR | S_IRUSR | S_IRGRP |
+					S_IROTH, proc_cciss,
+					&cciss_proc_fops, h);
+}
+#endif				/* CONFIG_PROC_FS */
+
+#define MAX_PRODUCT_NAME_LEN 19
+
+#define to_hba(n) container_of(n, struct ctlr_info, dev)
+#define to_drv(n) container_of(n, drive_info_struct, dev)
+
+/* List of controllers which cannot be hard reset on kexec with reset_devices */
+static u32 unresettable_controller[] = {
+	0x324a103C, /* Smart Array P712m */
+	0x324b103C, /* SmartArray P711m */
+	0x3223103C, /* Smart Array P800 */
+	0x3234103C, /* Smart Array P400 */
+	0x3235103C, /* Smart Array P400i */
+	0x3211103C, /* Smart Array E200i */
+	0x3212103C, /* Smart Array E200 */
+	0x3213103C, /* Smart Array E200i */
+	0x3214103C, /* Smart Array E200i */
+	0x3215103C, /* Smart Array E200i */
+	0x3237103C, /* Smart Array E500 */
+	0x323D103C, /* Smart Array P700m */
+	0x409C0E11, /* Smart Array 6400 */
+	0x409D0E11, /* Smart Array 6400 EM */
+};
+
+/* List of controllers which cannot even be soft reset */
+static u32 soft_unresettable_controller[] = {
+	0x409C0E11, /* Smart Array 6400 */
+	0x409D0E11, /* Smart Array 6400 EM */
+};
+
+static int ctlr_is_hard_resettable(u32 board_id)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(unresettable_controller); i++)
+		if (unresettable_controller[i] == board_id)
+			return 0;
+	return 1;
+}
+
+static int ctlr_is_soft_resettable(u32 board_id)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(soft_unresettable_controller); i++)
+		if (soft_unresettable_controller[i] == board_id)
+			return 0;
+	return 1;
+}
+
+static int ctlr_is_resettable(u32 board_id)
+{
+	return ctlr_is_hard_resettable(board_id) ||
+		ctlr_is_soft_resettable(board_id);
+}
+
+static ssize_t host_show_resettable(struct device *dev,
+				    struct device_attribute *attr,
+				    char *buf)
+{
+	struct ctlr_info *h = to_hba(dev);
+
+	return snprintf(buf, 20, "%d\n", ctlr_is_resettable(h->board_id));
+}
+static DEVICE_ATTR(resettable, S_IRUGO, host_show_resettable, NULL);
+
+static ssize_t host_store_rescan(struct device *dev,
+				 struct device_attribute *attr,
+				 const char *buf, size_t count)
+{
+	struct ctlr_info *h = to_hba(dev);
+
+	add_to_scan_list(h);
+	wake_up_process(cciss_scan_thread);
+	wait_for_completion_interruptible(&h->scan_wait);
+
+	return count;
+}
+static DEVICE_ATTR(rescan, S_IWUSR, NULL, host_store_rescan);
+
+static ssize_t dev_show_unique_id(struct device *dev,
+				 struct device_attribute *attr,
+				 char *buf)
+{
+	drive_info_struct *drv = to_drv(dev);
+	struct ctlr_info *h = to_hba(drv->dev.parent);
+	__u8 sn[16];
+	unsigned long flags;
+	int ret = 0;
+
+	spin_lock_irqsave(&h->lock, flags);
+	if (h->busy_configuring)
+		ret = -EBUSY;
+	else
+		memcpy(sn, drv->serial_no, sizeof(sn));
+	spin_unlock_irqrestore(&h->lock, flags);
+
+	if (ret)
+		return ret;
+	else
+		return snprintf(buf, 16 * 2 + 2,
+				"%02X%02X%02X%02X%02X%02X%02X%02X"
+				"%02X%02X%02X%02X%02X%02X%02X%02X\n",
+				sn[0], sn[1], sn[2], sn[3],
+				sn[4], sn[5], sn[6], sn[7],
+				sn[8], sn[9], sn[10], sn[11],
+				sn[12], sn[13], sn[14], sn[15]);
+}
+static DEVICE_ATTR(unique_id, S_IRUGO, dev_show_unique_id, NULL);
+
+static ssize_t dev_show_vendor(struct device *dev,
+			       struct device_attribute *attr,
+			       char *buf)
+{
+	drive_info_struct *drv = to_drv(dev);
+	struct ctlr_info *h = to_hba(drv->dev.parent);
+	char vendor[VENDOR_LEN + 1];
+	unsigned long flags;
+	int ret = 0;
+
+	spin_lock_irqsave(&h->lock, flags);
+	if (h->busy_configuring)
+		ret = -EBUSY;
+	else
+		memcpy(vendor, drv->vendor, VENDOR_LEN + 1);
+	spin_unlock_irqrestore(&h->lock, flags);
+
+	if (ret)
+		return ret;
+	else
+		return snprintf(buf, sizeof(vendor) + 1, "%s\n", drv->vendor);
+}
+static DEVICE_ATTR(vendor, S_IRUGO, dev_show_vendor, NULL);
+
+static ssize_t dev_show_model(struct device *dev,
+			      struct device_attribute *attr,
+			      char *buf)
+{
+	drive_info_struct *drv = to_drv(dev);
+	struct ctlr_info *h = to_hba(drv->dev.parent);
+	char model[MODEL_LEN + 1];
+	unsigned long flags;
+	int ret = 0;
+
+	spin_lock_irqsave(&h->lock, flags);
+	if (h->busy_configuring)
+		ret = -EBUSY;
+	else
+		memcpy(model, drv->model, MODEL_LEN + 1);
+	spin_unlock_irqrestore(&h->lock, flags);
+
+	if (ret)
+		return ret;
+	else
+		return snprintf(buf, sizeof(model) + 1, "%s\n", drv->model);
+}
+static DEVICE_ATTR(model, S_IRUGO, dev_show_model, NULL);
+
+static ssize_t dev_show_rev(struct device *dev,
+			    struct device_attribute *attr,
+			    char *buf)
+{
+	drive_info_struct *drv = to_drv(dev);
+	struct ctlr_info *h = to_hba(drv->dev.parent);
+	char rev[REV_LEN + 1];
+	unsigned long flags;
+	int ret = 0;
+
+	spin_lock_irqsave(&h->lock, flags);
+	if (h->busy_configuring)
+		ret = -EBUSY;
+	else
+		memcpy(rev, drv->rev, REV_LEN + 1);
+	spin_unlock_irqrestore(&h->lock, flags);
+
+	if (ret)
+		return ret;
+	else
+		return snprintf(buf, sizeof(rev) + 1, "%s\n", drv->rev);
+}
+static DEVICE_ATTR(rev, S_IRUGO, dev_show_rev, NULL);
+
+static ssize_t cciss_show_lunid(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	drive_info_struct *drv = to_drv(dev);
+	struct ctlr_info *h = to_hba(drv->dev.parent);
+	unsigned long flags;
+	unsigned char lunid[8];
+
+	spin_lock_irqsave(&h->lock, flags);
+	if (h->busy_configuring) {
+		spin_unlock_irqrestore(&h->lock, flags);
+		return -EBUSY;
+	}
+	if (!drv->heads) {
+		spin_unlock_irqrestore(&h->lock, flags);
+		return -ENOTTY;
+	}
+	memcpy(lunid, drv->LunID, sizeof(lunid));
+	spin_unlock_irqrestore(&h->lock, flags);
+	return snprintf(buf, 20, "0x%02x%02x%02x%02x%02x%02x%02x%02x\n",
+		lunid[0], lunid[1], lunid[2], lunid[3],
+		lunid[4], lunid[5], lunid[6], lunid[7]);
+}
+static DEVICE_ATTR(lunid, S_IRUGO, cciss_show_lunid, NULL);
+
+static ssize_t cciss_show_raid_level(struct device *dev,
+				     struct device_attribute *attr, char *buf)
+{
+	drive_info_struct *drv = to_drv(dev);
+	struct ctlr_info *h = to_hba(drv->dev.parent);
+	int raid;
+	unsigned long flags;
+
+	spin_lock_irqsave(&h->lock, flags);
+	if (h->busy_configuring) {
+		spin_unlock_irqrestore(&h->lock, flags);
+		return -EBUSY;
+	}
+	raid = drv->raid_level;
+	spin_unlock_irqrestore(&h->lock, flags);
+	if (raid < 0 || raid > RAID_UNKNOWN)
+		raid = RAID_UNKNOWN;
+
+	return snprintf(buf, strlen(raid_label[raid]) + 7, "RAID %s\n",
+			raid_label[raid]);
+}
+static DEVICE_ATTR(raid_level, S_IRUGO, cciss_show_raid_level, NULL);
+
+static ssize_t cciss_show_usage_count(struct device *dev,
+				      struct device_attribute *attr, char *buf)
+{
+	drive_info_struct *drv = to_drv(dev);
+	struct ctlr_info *h = to_hba(drv->dev.parent);
+	unsigned long flags;
+	int count;
+
+	spin_lock_irqsave(&h->lock, flags);
+	if (h->busy_configuring) {
+		spin_unlock_irqrestore(&h->lock, flags);
+		return -EBUSY;
+	}
+	count = drv->usage_count;
+	spin_unlock_irqrestore(&h->lock, flags);
+	return snprintf(buf, 20, "%d\n", count);
+}
+static DEVICE_ATTR(usage_count, S_IRUGO, cciss_show_usage_count, NULL);
+
+static struct attribute *cciss_host_attrs[] = {
+	&dev_attr_rescan.attr,
+	&dev_attr_resettable.attr,
+	NULL
+};
+
+static struct attribute_group cciss_host_attr_group = {
+	.attrs = cciss_host_attrs,
+};
+
+static const struct attribute_group *cciss_host_attr_groups[] = {
+	&cciss_host_attr_group,
+	NULL
+};
+
+static struct device_type cciss_host_type = {
+	.name		= "cciss_host",
+	.groups		= cciss_host_attr_groups,
+	.release	= cciss_hba_release,
+};
+
+static struct attribute *cciss_dev_attrs[] = {
+	&dev_attr_unique_id.attr,
+	&dev_attr_model.attr,
+	&dev_attr_vendor.attr,
+	&dev_attr_rev.attr,
+	&dev_attr_lunid.attr,
+	&dev_attr_raid_level.attr,
+	&dev_attr_usage_count.attr,
+	NULL
+};
+
+static struct attribute_group cciss_dev_attr_group = {
+	.attrs = cciss_dev_attrs,
+};
+
+static const struct attribute_group *cciss_dev_attr_groups[] = {
+	&cciss_dev_attr_group,
+	NULL
+};
+
+static struct device_type cciss_dev_type = {
+	.name		= "cciss_device",
+	.groups		= cciss_dev_attr_groups,
+	.release	= cciss_device_release,
+};
+
+static struct bus_type cciss_bus_type = {
+	.name		= "cciss",
+};
+
+/*
+ * cciss_hba_release is called when the reference count
+ * of h->dev goes to zero.
+ */
+static void cciss_hba_release(struct device *dev)
+{
+	/*
+	 * nothing to do, but need this to avoid a warning
+	 * about not having a release handler from lib/kref.c.
+	 */
+}
+
+/*
+ * Initialize sysfs entry for each controller.  This sets up and registers
+ * the 'cciss#' directory for each individual controller under
+ * /sys/bus/pci/devices/<dev>/.
+ */
+static int cciss_create_hba_sysfs_entry(struct ctlr_info *h)
+{
+	device_initialize(&h->dev);
+	h->dev.type = &cciss_host_type;
+	h->dev.bus = &cciss_bus_type;
+	dev_set_name(&h->dev, "%s", h->devname);
+	h->dev.parent = &h->pdev->dev;
+
+	return device_add(&h->dev);
+}
+
+/*
+ * Remove sysfs entries for an hba.
+ */
+static void cciss_destroy_hba_sysfs_entry(struct ctlr_info *h)
+{
+	device_del(&h->dev);
+	put_device(&h->dev); /* final put. */
+}
+
+/* cciss_device_release is called when the reference count
+ * of h->drv[x]dev goes to zero.
+ */
+static void cciss_device_release(struct device *dev)
+{
+	drive_info_struct *drv = to_drv(dev);
+	kfree(drv);
+}
+
+/*
+ * Initialize sysfs for each logical drive.  This sets up and registers
+ * the 'c#d#' directory for each individual logical drive under
+ * /sys/bus/pci/devices/<dev/ccis#/. We also create a link from
+ * /sys/block/cciss!c#d# to this entry.
+ */
+static long cciss_create_ld_sysfs_entry(struct ctlr_info *h,
+				       int drv_index)
+{
+	struct device *dev;
+
+	if (h->drv[drv_index]->device_initialized)
+		return 0;
+
+	dev = &h->drv[drv_index]->dev;
+	device_initialize(dev);
+	dev->type = &cciss_dev_type;
+	dev->bus = &cciss_bus_type;
+	dev_set_name(dev, "c%dd%d", h->ctlr, drv_index);
+	dev->parent = &h->dev;
+	h->drv[drv_index]->device_initialized = 1;
+	return device_add(dev);
+}
+
+/*
+ * Remove sysfs entries for a logical drive.
+ */
+static void cciss_destroy_ld_sysfs_entry(struct ctlr_info *h, int drv_index,
+	int ctlr_exiting)
+{
+	struct device *dev = &h->drv[drv_index]->dev;
+
+	/* special case for c*d0, we only destroy it on controller exit */
+	if (drv_index == 0 && !ctlr_exiting)
+		return;
+
+	device_del(dev);
+	put_device(dev); /* the "final" put. */
+	h->drv[drv_index] = NULL;
+}
+
+/*
+ * For operations that cannot sleep, a command block is allocated at init,
+ * and managed by cmd_alloc() and cmd_free() using a simple bitmap to track
+ * which ones are free or in use.
+ */
+static CommandList_struct *cmd_alloc(ctlr_info_t *h)
+{
+	CommandList_struct *c;
+	int i;
+	u64bit temp64;
+	dma_addr_t cmd_dma_handle, err_dma_handle;
+
+	do {
+		i = find_first_zero_bit(h->cmd_pool_bits, h->nr_cmds);
+		if (i == h->nr_cmds)
+			return NULL;
+	} while (test_and_set_bit(i & (BITS_PER_LONG - 1),
+		  h->cmd_pool_bits + (i / BITS_PER_LONG)) != 0);
+	c = h->cmd_pool + i;
+	memset(c, 0, sizeof(CommandList_struct));
+	cmd_dma_handle = h->cmd_pool_dhandle + i * sizeof(CommandList_struct);
+	c->err_info = h->errinfo_pool + i;
+	memset(c->err_info, 0, sizeof(ErrorInfo_struct));
+	err_dma_handle = h->errinfo_pool_dhandle
+	    + i * sizeof(ErrorInfo_struct);
+	h->nr_allocs++;
+
+	c->cmdindex = i;
+
+	INIT_LIST_HEAD(&c->list);
+	c->busaddr = (__u32) cmd_dma_handle;
+	temp64.val = (__u64) err_dma_handle;
+	c->ErrDesc.Addr.lower = temp64.val32.lower;
+	c->ErrDesc.Addr.upper = temp64.val32.upper;
+	c->ErrDesc.Len = sizeof(ErrorInfo_struct);
+
+	c->ctlr = h->ctlr;
+	return c;
+}
+
+/* allocate a command using pci_alloc_consistent, used for ioctls,
+ * etc., not for the main i/o path.
+ */
+static CommandList_struct *cmd_special_alloc(ctlr_info_t *h)
+{
+	CommandList_struct *c;
+	u64bit temp64;
+	dma_addr_t cmd_dma_handle, err_dma_handle;
+
+	c = (CommandList_struct *) pci_alloc_consistent(h->pdev,
+		sizeof(CommandList_struct), &cmd_dma_handle);
+	if (c == NULL)
+		return NULL;
+	memset(c, 0, sizeof(CommandList_struct));
+
+	c->cmdindex = -1;
+
+	c->err_info = (ErrorInfo_struct *)
+	    pci_alloc_consistent(h->pdev, sizeof(ErrorInfo_struct),
+		    &err_dma_handle);
+
+	if (c->err_info == NULL) {
+		pci_free_consistent(h->pdev,
+			sizeof(CommandList_struct), c, cmd_dma_handle);
+		return NULL;
+	}
+	memset(c->err_info, 0, sizeof(ErrorInfo_struct));
+
+	INIT_LIST_HEAD(&c->list);
+	c->busaddr = (__u32) cmd_dma_handle;
+	temp64.val = (__u64) err_dma_handle;
+	c->ErrDesc.Addr.lower = temp64.val32.lower;
+	c->ErrDesc.Addr.upper = temp64.val32.upper;
+	c->ErrDesc.Len = sizeof(ErrorInfo_struct);
+
+	c->ctlr = h->ctlr;
+	return c;
+}
+
+static void cmd_free(ctlr_info_t *h, CommandList_struct *c)
+{
+	int i;
+
+	i = c - h->cmd_pool;
+	clear_bit(i & (BITS_PER_LONG - 1),
+		  h->cmd_pool_bits + (i / BITS_PER_LONG));
+	h->nr_frees++;
+}
+
+static void cmd_special_free(ctlr_info_t *h, CommandList_struct *c)
+{
+	u64bit temp64;
+
+	temp64.val32.lower = c->ErrDesc.Addr.lower;
+	temp64.val32.upper = c->ErrDesc.Addr.upper;
+	pci_free_consistent(h->pdev, sizeof(ErrorInfo_struct),
+			    c->err_info, (dma_addr_t) temp64.val);
+	pci_free_consistent(h->pdev, sizeof(CommandList_struct), c,
+		(dma_addr_t) cciss_tag_discard_error_bits(h, (u32) c->busaddr));
+}
+
+static inline ctlr_info_t *get_host(struct gendisk *disk)
+{
+	return disk->queue->queuedata;
+}
+
+static inline drive_info_struct *get_drv(struct gendisk *disk)
+{
+	return disk->private_data;
+}
+
+/*
+ * Open.  Make sure the device is really there.
+ */
+static int cciss_open(struct block_device *bdev, fmode_t mode)
+{
+	ctlr_info_t *h = get_host(bdev->bd_disk);
+	drive_info_struct *drv = get_drv(bdev->bd_disk);
+
+	dev_dbg(&h->pdev->dev, "cciss_open %s\n", bdev->bd_disk->disk_name);
+	if (drv->busy_configuring)
+		return -EBUSY;
+	/*
+	 * Root is allowed to open raw volume zero even if it's not configured
+	 * so array config can still work. Root is also allowed to open any
+	 * volume that has a LUN ID, so it can issue IOCTL to reread the
+	 * disk information.  I don't think I really like this
+	 * but I'm already using way to many device nodes to claim another one
+	 * for "raw controller".
+	 */
+	if (drv->heads == 0) {
+		if (MINOR(bdev->bd_dev) != 0) {	/* not node 0? */
+			/* if not node 0 make sure it is a partition = 0 */
+			if (MINOR(bdev->bd_dev) & 0x0f) {
+				return -ENXIO;
+				/* if it is, make sure we have a LUN ID */
+			} else if (memcmp(drv->LunID, CTLR_LUNID,
+				sizeof(drv->LunID))) {
+				return -ENXIO;
+			}
+		}
+		if (!capable(CAP_SYS_ADMIN))
+			return -EPERM;
+	}
+	drv->usage_count++;
+	h->usage_count++;
+	return 0;
+}
+
+static int cciss_unlocked_open(struct block_device *bdev, fmode_t mode)
+{
+	int ret;
+
+	mutex_lock(&cciss_mutex);
+	ret = cciss_open(bdev, mode);
+	mutex_unlock(&cciss_mutex);
+
+	return ret;
+}
+
+/*
+ * Close.  Sync first.
+ */
+static int cciss_release(struct gendisk *disk, fmode_t mode)
+{
+	ctlr_info_t *h;
+	drive_info_struct *drv;
+
+	mutex_lock(&cciss_mutex);
+	h = get_host(disk);
+	drv = get_drv(disk);
+	dev_dbg(&h->pdev->dev, "cciss_release %s\n", disk->disk_name);
+	drv->usage_count--;
+	h->usage_count--;
+	mutex_unlock(&cciss_mutex);
+	return 0;
+}
+
+static int do_ioctl(struct block_device *bdev, fmode_t mode,
+		    unsigned cmd, unsigned long arg)
+{
+	int ret;
+	mutex_lock(&cciss_mutex);
+	ret = cciss_ioctl(bdev, mode, cmd, arg);
+	mutex_unlock(&cciss_mutex);
+	return ret;
+}
+
+#ifdef CONFIG_COMPAT
+
+static int cciss_ioctl32_passthru(struct block_device *bdev, fmode_t mode,
+				  unsigned cmd, unsigned long arg);
+static int cciss_ioctl32_big_passthru(struct block_device *bdev, fmode_t mode,
+				      unsigned cmd, unsigned long arg);
+
+static int cciss_compat_ioctl(struct block_device *bdev, fmode_t mode,
+			      unsigned cmd, unsigned long arg)
+{
+	switch (cmd) {
+	case CCISS_GETPCIINFO:
+	case CCISS_GETINTINFO:
+	case CCISS_SETINTINFO:
+	case CCISS_GETNODENAME:
+	case CCISS_SETNODENAME:
+	case CCISS_GETHEARTBEAT:
+	case CCISS_GETBUSTYPES:
+	case CCISS_GETFIRMVER:
+	case CCISS_GETDRIVVER:
+	case CCISS_REVALIDVOLS:
+	case CCISS_DEREGDISK:
+	case CCISS_REGNEWDISK:
+	case CCISS_REGNEWD:
+	case CCISS_RESCANDISK:
+	case CCISS_GETLUNINFO:
+		return do_ioctl(bdev, mode, cmd, arg);
+
+	case CCISS_PASSTHRU32:
+		return cciss_ioctl32_passthru(bdev, mode, cmd, arg);
+	case CCISS_BIG_PASSTHRU32:
+		return cciss_ioctl32_big_passthru(bdev, mode, cmd, arg);
+
+	default:
+		return -ENOIOCTLCMD;
+	}
+}
+
+static int cciss_ioctl32_passthru(struct block_device *bdev, fmode_t mode,
+				  unsigned cmd, unsigned long arg)
+{
+	IOCTL32_Command_struct __user *arg32 =
+	    (IOCTL32_Command_struct __user *) arg;
+	IOCTL_Command_struct arg64;
+	IOCTL_Command_struct __user *p = compat_alloc_user_space(sizeof(arg64));
+	int err;
+	u32 cp;
+
+	err = 0;
+	err |=
+	    copy_from_user(&arg64.LUN_info, &arg32->LUN_info,
+			   sizeof(arg64.LUN_info));
+	err |=
+	    copy_from_user(&arg64.Request, &arg32->Request,
+			   sizeof(arg64.Request));
+	err |=
+	    copy_from_user(&arg64.error_info, &arg32->error_info,
+			   sizeof(arg64.error_info));
+	err |= get_user(arg64.buf_size, &arg32->buf_size);
+	err |= get_user(cp, &arg32->buf);
+	arg64.buf = compat_ptr(cp);
+	err |= copy_to_user(p, &arg64, sizeof(arg64));
+
+	if (err)
+		return -EFAULT;
+
+	err = do_ioctl(bdev, mode, CCISS_PASSTHRU, (unsigned long)p);
+	if (err)
+		return err;
+	err |=
+	    copy_in_user(&arg32->error_info, &p->error_info,
+			 sizeof(arg32->error_info));
+	if (err)
+		return -EFAULT;
+	return err;
+}
+
+static int cciss_ioctl32_big_passthru(struct block_device *bdev, fmode_t mode,
+				      unsigned cmd, unsigned long arg)
+{
+	BIG_IOCTL32_Command_struct __user *arg32 =
+	    (BIG_IOCTL32_Command_struct __user *) arg;
+	BIG_IOCTL_Command_struct arg64;
+	BIG_IOCTL_Command_struct __user *p =
+	    compat_alloc_user_space(sizeof(arg64));
+	int err;
+	u32 cp;
+
+	memset(&arg64, 0, sizeof(arg64));
+	err = 0;
+	err |=
+	    copy_from_user(&arg64.LUN_info, &arg32->LUN_info,
+			   sizeof(arg64.LUN_info));
+	err |=
+	    copy_from_user(&arg64.Request, &arg32->Request,
+			   sizeof(arg64.Request));
+	err |=
+	    copy_from_user(&arg64.error_info, &arg32->error_info,
+			   sizeof(arg64.error_info));
+	err |= get_user(arg64.buf_size, &arg32->buf_size);
+	err |= get_user(arg64.malloc_size, &arg32->malloc_size);
+	err |= get_user(cp, &arg32->buf);
+	arg64.buf = compat_ptr(cp);
+	err |= copy_to_user(p, &arg64, sizeof(arg64));
+
+	if (err)
+		return -EFAULT;
+
+	err = do_ioctl(bdev, mode, CCISS_BIG_PASSTHRU, (unsigned long)p);
+	if (err)
+		return err;
+	err |=
+	    copy_in_user(&arg32->error_info, &p->error_info,
+			 sizeof(arg32->error_info));
+	if (err)
+		return -EFAULT;
+	return err;
+}
+#endif
+
+static int cciss_getgeo(struct block_device *bdev, struct hd_geometry *geo)
+{
+	drive_info_struct *drv = get_drv(bdev->bd_disk);
+
+	if (!drv->cylinders)
+		return -ENXIO;
+
+	geo->heads = drv->heads;
+	geo->sectors = drv->sectors;
+	geo->cylinders = drv->cylinders;
+	return 0;
+}
+
+static void check_ioctl_unit_attention(ctlr_info_t *h, CommandList_struct *c)
+{
+	if (c->err_info->CommandStatus == CMD_TARGET_STATUS &&
+			c->err_info->ScsiStatus != SAM_STAT_CHECK_CONDITION)
+		(void)check_for_unit_attention(h, c);
+}
+
+static int cciss_getpciinfo(ctlr_info_t *h, void __user *argp)
+{
+	cciss_pci_info_struct pciinfo;
+
+	if (!argp)
+		return -EINVAL;
+	pciinfo.domain = pci_domain_nr(h->pdev->bus);
+	pciinfo.bus = h->pdev->bus->number;
+	pciinfo.dev_fn = h->pdev->devfn;
+	pciinfo.board_id = h->board_id;
+	if (copy_to_user(argp, &pciinfo, sizeof(cciss_pci_info_struct)))
+		return -EFAULT;
+	return 0;
+}
+
+static int cciss_getintinfo(ctlr_info_t *h, void __user *argp)
+{
+	cciss_coalint_struct intinfo;
+
+	if (!argp)
+		return -EINVAL;
+	intinfo.delay = readl(&h->cfgtable->HostWrite.CoalIntDelay);
+	intinfo.count = readl(&h->cfgtable->HostWrite.CoalIntCount);
+	if (copy_to_user
+	    (argp, &intinfo, sizeof(cciss_coalint_struct)))
+		return -EFAULT;
+	return 0;
+}
+
+static int cciss_setintinfo(ctlr_info_t *h, void __user *argp)
+{
+	cciss_coalint_struct intinfo;
+	unsigned long flags;
+	int i;
+
+	if (!argp)
+		return -EINVAL;
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+	if (copy_from_user(&intinfo, argp, sizeof(intinfo)))
+		return -EFAULT;
+	if ((intinfo.delay == 0) && (intinfo.count == 0))
+		return -EINVAL;
+	spin_lock_irqsave(&h->lock, flags);
+	/* Update the field, and then ring the doorbell */
+	writel(intinfo.delay, &(h->cfgtable->HostWrite.CoalIntDelay));
+	writel(intinfo.count, &(h->cfgtable->HostWrite.CoalIntCount));
+	writel(CFGTBL_ChangeReq, h->vaddr + SA5_DOORBELL);
+
+	for (i = 0; i < MAX_IOCTL_CONFIG_WAIT; i++) {
+		if (!(readl(h->vaddr + SA5_DOORBELL) & CFGTBL_ChangeReq))
+			break;
+		udelay(1000); /* delay and try again */
+	}
+	spin_unlock_irqrestore(&h->lock, flags);
+	if (i >= MAX_IOCTL_CONFIG_WAIT)
+		return -EAGAIN;
+	return 0;
+}
+
+static int cciss_getnodename(ctlr_info_t *h, void __user *argp)
+{
+	NodeName_type NodeName;
+	int i;
+
+	if (!argp)
+		return -EINVAL;
+	for (i = 0; i < 16; i++)
+		NodeName[i] = readb(&h->cfgtable->ServerName[i]);
+	if (copy_to_user(argp, NodeName, sizeof(NodeName_type)))
+		return -EFAULT;
+	return 0;
+}
+
+static int cciss_setnodename(ctlr_info_t *h, void __user *argp)
+{
+	NodeName_type NodeName;
+	unsigned long flags;
+	int i;
+
+	if (!argp)
+		return -EINVAL;
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+	if (copy_from_user(NodeName, argp, sizeof(NodeName_type)))
+		return -EFAULT;
+	spin_lock_irqsave(&h->lock, flags);
+	/* Update the field, and then ring the doorbell */
+	for (i = 0; i < 16; i++)
+		writeb(NodeName[i], &h->cfgtable->ServerName[i]);
+	writel(CFGTBL_ChangeReq, h->vaddr + SA5_DOORBELL);
+	for (i = 0; i < MAX_IOCTL_CONFIG_WAIT; i++) {
+		if (!(readl(h->vaddr + SA5_DOORBELL) & CFGTBL_ChangeReq))
+			break;
+		udelay(1000); /* delay and try again */
+	}
+	spin_unlock_irqrestore(&h->lock, flags);
+	if (i >= MAX_IOCTL_CONFIG_WAIT)
+		return -EAGAIN;
+	return 0;
+}
+
+static int cciss_getheartbeat(ctlr_info_t *h, void __user *argp)
+{
+	Heartbeat_type heartbeat;
+
+	if (!argp)
+		return -EINVAL;
+	heartbeat = readl(&h->cfgtable->HeartBeat);
+	if (copy_to_user(argp, &heartbeat, sizeof(Heartbeat_type)))
+		return -EFAULT;
+	return 0;
+}
+
+static int cciss_getbustypes(ctlr_info_t *h, void __user *argp)
+{
+	BusTypes_type BusTypes;
+
+	if (!argp)
+		return -EINVAL;
+	BusTypes = readl(&h->cfgtable->BusTypes);
+	if (copy_to_user(argp, &BusTypes, sizeof(BusTypes_type)))
+		return -EFAULT;
+	return 0;
+}
+
+static int cciss_getfirmver(ctlr_info_t *h, void __user *argp)
+{
+	FirmwareVer_type firmware;
+
+	if (!argp)
+		return -EINVAL;
+	memcpy(firmware, h->firm_ver, 4);
+
+	if (copy_to_user
+	    (argp, firmware, sizeof(FirmwareVer_type)))
+		return -EFAULT;
+	return 0;
+}
+
+static int cciss_getdrivver(ctlr_info_t *h, void __user *argp)
+{
+	DriverVer_type DriverVer = DRIVER_VERSION;
+
+	if (!argp)
+		return -EINVAL;
+	if (copy_to_user(argp, &DriverVer, sizeof(DriverVer_type)))
+		return -EFAULT;
+	return 0;
+}
+
+static int cciss_getluninfo(ctlr_info_t *h,
+	struct gendisk *disk, void __user *argp)
+{
+	LogvolInfo_struct luninfo;
+	drive_info_struct *drv = get_drv(disk);
+
+	if (!argp)
+		return -EINVAL;
+	memcpy(&luninfo.LunID, drv->LunID, sizeof(luninfo.LunID));
+	luninfo.num_opens = drv->usage_count;
+	luninfo.num_parts = 0;
+	if (copy_to_user(argp, &luninfo, sizeof(LogvolInfo_struct)))
+		return -EFAULT;
+	return 0;
+}
+
+static int cciss_passthru(ctlr_info_t *h, void __user *argp)
+{
+	IOCTL_Command_struct iocommand;
+	CommandList_struct *c;
+	char *buff = NULL;
+	u64bit temp64;
+	DECLARE_COMPLETION_ONSTACK(wait);
+
+	if (!argp)
+		return -EINVAL;
+
+	if (!capable(CAP_SYS_RAWIO))
+		return -EPERM;
+
+	if (copy_from_user
+	    (&iocommand, argp, sizeof(IOCTL_Command_struct)))
+		return -EFAULT;
+	if ((iocommand.buf_size < 1) &&
+	    (iocommand.Request.Type.Direction != XFER_NONE)) {
+		return -EINVAL;
+	}
+	if (iocommand.buf_size > 0) {
+		buff = kmalloc(iocommand.buf_size, GFP_KERNEL);
+		if (buff == NULL)
+			return -EFAULT;
+	}
+	if (iocommand.Request.Type.Direction == XFER_WRITE) {
+		/* Copy the data into the buffer we created */
+		if (copy_from_user(buff, iocommand.buf, iocommand.buf_size)) {
+			kfree(buff);
+			return -EFAULT;
+		}
+	} else {
+		memset(buff, 0, iocommand.buf_size);
+	}
+	c = cmd_special_alloc(h);
+	if (!c) {
+		kfree(buff);
+		return -ENOMEM;
+	}
+	/* Fill in the command type */
+	c->cmd_type = CMD_IOCTL_PEND;
+	/* Fill in Command Header */
+	c->Header.ReplyQueue = 0;   /* unused in simple mode */
+	if (iocommand.buf_size > 0) { /* buffer to fill */
+		c->Header.SGList = 1;
+		c->Header.SGTotal = 1;
+	} else { /* no buffers to fill */
+		c->Header.SGList = 0;
+		c->Header.SGTotal = 0;
+	}
+	c->Header.LUN = iocommand.LUN_info;
+	/* use the kernel address the cmd block for tag */
+	c->Header.Tag.lower = c->busaddr;
+
+	/* Fill in Request block */
+	c->Request = iocommand.Request;
+
+	/* Fill in the scatter gather information */
+	if (iocommand.buf_size > 0) {
+		temp64.val = pci_map_single(h->pdev, buff,
+			iocommand.buf_size, PCI_DMA_BIDIRECTIONAL);
+		c->SG[0].Addr.lower = temp64.val32.lower;
+		c->SG[0].Addr.upper = temp64.val32.upper;
+		c->SG[0].Len = iocommand.buf_size;
+		c->SG[0].Ext = 0;  /* we are not chaining */
+	}
+	c->waiting = &wait;
+
+	enqueue_cmd_and_start_io(h, c);
+	wait_for_completion(&wait);
+
+	/* unlock the buffers from DMA */
+	temp64.val32.lower = c->SG[0].Addr.lower;
+	temp64.val32.upper = c->SG[0].Addr.upper;
+	pci_unmap_single(h->pdev, (dma_addr_t) temp64.val, iocommand.buf_size,
+			 PCI_DMA_BIDIRECTIONAL);
+	check_ioctl_unit_attention(h, c);
+
+	/* Copy the error information out */
+	iocommand.error_info = *(c->err_info);
+	if (copy_to_user(argp, &iocommand, sizeof(IOCTL_Command_struct))) {
+		kfree(buff);
+		cmd_special_free(h, c);
+		return -EFAULT;
+	}
+
+	if (iocommand.Request.Type.Direction == XFER_READ) {
+		/* Copy the data out of the buffer we created */
+		if (copy_to_user(iocommand.buf, buff, iocommand.buf_size)) {
+			kfree(buff);
+			cmd_special_free(h, c);
+			return -EFAULT;
+		}
+	}
+	kfree(buff);
+	cmd_special_free(h, c);
+	return 0;
+}
+
+static int cciss_bigpassthru(ctlr_info_t *h, void __user *argp)
+{
+	BIG_IOCTL_Command_struct *ioc;
+	CommandList_struct *c;
+	unsigned char **buff = NULL;
+	int *buff_size = NULL;
+	u64bit temp64;
+	BYTE sg_used = 0;
+	int status = 0;
+	int i;
+	DECLARE_COMPLETION_ONSTACK(wait);
+	__u32 left;
+	__u32 sz;
+	BYTE __user *data_ptr;
+
+	if (!argp)
+		return -EINVAL;
+	if (!capable(CAP_SYS_RAWIO))
+		return -EPERM;
+	ioc = kmalloc(sizeof(*ioc), GFP_KERNEL);
+	if (!ioc) {
+		status = -ENOMEM;
+		goto cleanup1;
+	}
+	if (copy_from_user(ioc, argp, sizeof(*ioc))) {
+		status = -EFAULT;
+		goto cleanup1;
+	}
+	if ((ioc->buf_size < 1) &&
+	    (ioc->Request.Type.Direction != XFER_NONE)) {
+		status = -EINVAL;
+		goto cleanup1;
+	}
+	/* Check kmalloc limits  using all SGs */
+	if (ioc->malloc_size > MAX_KMALLOC_SIZE) {
+		status = -EINVAL;
+		goto cleanup1;
+	}
+	if (ioc->buf_size > ioc->malloc_size * MAXSGENTRIES) {
+		status = -EINVAL;
+		goto cleanup1;
+	}
+	buff = kzalloc(MAXSGENTRIES * sizeof(char *), GFP_KERNEL);
+	if (!buff) {
+		status = -ENOMEM;
+		goto cleanup1;
+	}
+	buff_size = kmalloc(MAXSGENTRIES * sizeof(int), GFP_KERNEL);
+	if (!buff_size) {
+		status = -ENOMEM;
+		goto cleanup1;
+	}
+	left = ioc->buf_size;
+	data_ptr = ioc->buf;
+	while (left) {
+		sz = (left > ioc->malloc_size) ? ioc->malloc_size : left;
+		buff_size[sg_used] = sz;
+		buff[sg_used] = kmalloc(sz, GFP_KERNEL);
+		if (buff[sg_used] == NULL) {
+			status = -ENOMEM;
+			goto cleanup1;
+		}
+		if (ioc->Request.Type.Direction == XFER_WRITE) {
+			if (copy_from_user(buff[sg_used], data_ptr, sz)) {
+				status = -EFAULT;
+				goto cleanup1;
+			}
+		} else {
+			memset(buff[sg_used], 0, sz);
+		}
+		left -= sz;
+		data_ptr += sz;
+		sg_used++;
+	}
+	c = cmd_special_alloc(h);
+	if (!c) {
+		status = -ENOMEM;
+		goto cleanup1;
+	}
+	c->cmd_type = CMD_IOCTL_PEND;
+	c->Header.ReplyQueue = 0;
+	c->Header.SGList = sg_used;
+	c->Header.SGTotal = sg_used;
+	c->Header.LUN = ioc->LUN_info;
+	c->Header.Tag.lower = c->busaddr;
+
+	c->Request = ioc->Request;
+	for (i = 0; i < sg_used; i++) {
+		temp64.val = pci_map_single(h->pdev, buff[i], buff_size[i],
+				    PCI_DMA_BIDIRECTIONAL);
+		c->SG[i].Addr.lower = temp64.val32.lower;
+		c->SG[i].Addr.upper = temp64.val32.upper;
+		c->SG[i].Len = buff_size[i];
+		c->SG[i].Ext = 0;	/* we are not chaining */
+	}
+	c->waiting = &wait;
+	enqueue_cmd_and_start_io(h, c);
+	wait_for_completion(&wait);
+	/* unlock the buffers from DMA */
+	for (i = 0; i < sg_used; i++) {
+		temp64.val32.lower = c->SG[i].Addr.lower;
+		temp64.val32.upper = c->SG[i].Addr.upper;
+		pci_unmap_single(h->pdev,
+			(dma_addr_t) temp64.val, buff_size[i],
+			PCI_DMA_BIDIRECTIONAL);
+	}
+	check_ioctl_unit_attention(h, c);
+	/* Copy the error information out */
+	ioc->error_info = *(c->err_info);
+	if (copy_to_user(argp, ioc, sizeof(*ioc))) {
+		cmd_special_free(h, c);
+		status = -EFAULT;
+		goto cleanup1;
+	}
+	if (ioc->Request.Type.Direction == XFER_READ) {
+		/* Copy the data out of the buffer we created */
+		BYTE __user *ptr = ioc->buf;
+		for (i = 0; i < sg_used; i++) {
+			if (copy_to_user(ptr, buff[i], buff_size[i])) {
+				cmd_special_free(h, c);
+				status = -EFAULT;
+				goto cleanup1;
+			}
+			ptr += buff_size[i];
+		}
+	}
+	cmd_special_free(h, c);
+	status = 0;
+cleanup1:
+	if (buff) {
+		for (i = 0; i < sg_used; i++)
+			kfree(buff[i]);
+		kfree(buff);
+	}
+	kfree(buff_size);
+	kfree(ioc);
+	return status;
+}
+
+static int cciss_ioctl(struct block_device *bdev, fmode_t mode,
+	unsigned int cmd, unsigned long arg)
+{
+	struct gendisk *disk = bdev->bd_disk;
+	ctlr_info_t *h = get_host(disk);
+	void __user *argp = (void __user *)arg;
+
+	dev_dbg(&h->pdev->dev, "cciss_ioctl: Called with cmd=%x %lx\n",
+		cmd, arg);
+	switch (cmd) {
+	case CCISS_GETPCIINFO:
+		return cciss_getpciinfo(h, argp);
+	case CCISS_GETINTINFO:
+		return cciss_getintinfo(h, argp);
+	case CCISS_SETINTINFO:
+		return cciss_setintinfo(h, argp);
+	case CCISS_GETNODENAME:
+		return cciss_getnodename(h, argp);
+	case CCISS_SETNODENAME:
+		return cciss_setnodename(h, argp);
+	case CCISS_GETHEARTBEAT:
+		return cciss_getheartbeat(h, argp);
+	case CCISS_GETBUSTYPES:
+		return cciss_getbustypes(h, argp);
+	case CCISS_GETFIRMVER:
+		return cciss_getfirmver(h, argp);
+	case CCISS_GETDRIVVER:
+		return cciss_getdrivver(h, argp);
+	case CCISS_DEREGDISK:
+	case CCISS_REGNEWD:
+	case CCISS_REVALIDVOLS:
+		return rebuild_lun_table(h, 0, 1);
+	case CCISS_GETLUNINFO:
+		return cciss_getluninfo(h, disk, argp);
+	case CCISS_PASSTHRU:
+		return cciss_passthru(h, argp);
+	case CCISS_BIG_PASSTHRU:
+		return cciss_bigpassthru(h, argp);
+
+	/* scsi_cmd_blk_ioctl handles these, below, though some are not */
+	/* very meaningful for cciss.  SG_IO is the main one people want. */
+
+	case SG_GET_VERSION_NUM:
+	case SG_SET_TIMEOUT:
+	case SG_GET_TIMEOUT:
+	case SG_GET_RESERVED_SIZE:
+	case SG_SET_RESERVED_SIZE:
+	case SG_EMULATED_HOST:
+	case SG_IO:
+	case SCSI_IOCTL_SEND_COMMAND:
+		return scsi_cmd_blk_ioctl(bdev, mode, cmd, argp);
+
+	/* scsi_cmd_blk_ioctl would normally handle these, below, but */
+	/* they aren't a good fit for cciss, as CD-ROMs are */
+	/* not supported, and we don't have any bus/target/lun */
+	/* which we present to the kernel. */
+
+	case CDROM_SEND_PACKET:
+	case CDROMCLOSETRAY:
+	case CDROMEJECT:
+	case SCSI_IOCTL_GET_IDLUN:
+	case SCSI_IOCTL_GET_BUS_NUMBER:
+	default:
+		return -ENOTTY;
+	}
+}
+
+static void cciss_check_queues(ctlr_info_t *h)
+{
+	int start_queue = h->next_to_run;
+	int i;
+
+	/* check to see if we have maxed out the number of commands that can
+	 * be placed on the queue.  If so then exit.  We do this check here
+	 * in case the interrupt we serviced was from an ioctl and did not
+	 * free any new commands.
+	 */
+	if ((find_first_zero_bit(h->cmd_pool_bits, h->nr_cmds)) == h->nr_cmds)
+		return;
+
+	/* We have room on the queue for more commands.  Now we need to queue
+	 * them up.  We will also keep track of the next queue to run so
+	 * that every queue gets a chance to be started first.
+	 */
+	for (i = 0; i < h->highest_lun + 1; i++) {
+		int curr_queue = (start_queue + i) % (h->highest_lun + 1);
+		/* make sure the disk has been added and the drive is real
+		 * because this can be called from the middle of init_one.
+		 */
+		if (!h->drv[curr_queue])
+			continue;
+		if (!(h->drv[curr_queue]->queue) ||
+			!(h->drv[curr_queue]->heads))
+			continue;
+		blk_start_queue(h->gendisk[curr_queue]->queue);
+
+		/* check to see if we have maxed out the number of commands
+		 * that can be placed on the queue.
+		 */
+		if ((find_first_zero_bit(h->cmd_pool_bits, h->nr_cmds)) == h->nr_cmds) {
+			if (curr_queue == start_queue) {
+				h->next_to_run =
+				    (start_queue + 1) % (h->highest_lun + 1);
+				break;
+			} else {
+				h->next_to_run = curr_queue;
+				break;
+			}
+		}
+	}
+}
+
+static void cciss_softirq_done(struct request *rq)
+{
+	CommandList_struct *c = rq->completion_data;
+	ctlr_info_t *h = hba[c->ctlr];
+	SGDescriptor_struct *curr_sg = c->SG;
+	u64bit temp64;
+	unsigned long flags;
+	int i, ddir;
+	int sg_index = 0;
+
+	if (c->Request.Type.Direction == XFER_READ)
+		ddir = PCI_DMA_FROMDEVICE;
+	else
+		ddir = PCI_DMA_TODEVICE;
+
+	/* command did not need to be retried */
+	/* unmap the DMA mapping for all the scatter gather elements */
+	for (i = 0; i < c->Header.SGList; i++) {
+		if (curr_sg[sg_index].Ext == CCISS_SG_CHAIN) {
+			cciss_unmap_sg_chain_block(h, c);
+			/* Point to the next block */
+			curr_sg = h->cmd_sg_list[c->cmdindex];
+			sg_index = 0;
+		}
+		temp64.val32.lower = curr_sg[sg_index].Addr.lower;
+		temp64.val32.upper = curr_sg[sg_index].Addr.upper;
+		pci_unmap_page(h->pdev, temp64.val, curr_sg[sg_index].Len,
+				ddir);
+		++sg_index;
+	}
+
+	dev_dbg(&h->pdev->dev, "Done with %p\n", rq);
+
+	/* set the residual count for pc requests */
+	if (rq->cmd_type == REQ_TYPE_BLOCK_PC)
+		rq->resid_len = c->err_info->ResidualCnt;
+
+	blk_end_request_all(rq, (rq->errors == 0) ? 0 : -EIO);
+
+	spin_lock_irqsave(&h->lock, flags);
+	cmd_free(h, c);
+	cciss_check_queues(h);
+	spin_unlock_irqrestore(&h->lock, flags);
+}
+
+static inline void log_unit_to_scsi3addr(ctlr_info_t *h,
+	unsigned char scsi3addr[], uint32_t log_unit)
+{
+	memcpy(scsi3addr, h->drv[log_unit]->LunID,
+		sizeof(h->drv[log_unit]->LunID));
+}
+
+/* This function gets the SCSI vendor, model, and revision of a logical drive
+ * via the inquiry page 0.  Model, vendor, and rev are set to empty strings if
+ * they cannot be read.
+ */
+static void cciss_get_device_descr(ctlr_info_t *h, int logvol,
+				   char *vendor, char *model, char *rev)
+{
+	int rc;
+	InquiryData_struct *inq_buf;
+	unsigned char scsi3addr[8];
+
+	*vendor = '\0';
+	*model = '\0';
+	*rev = '\0';
+
+	inq_buf = kzalloc(sizeof(InquiryData_struct), GFP_KERNEL);
+	if (!inq_buf)
+		return;
+
+	log_unit_to_scsi3addr(h, scsi3addr, logvol);
+	rc = sendcmd_withirq(h, CISS_INQUIRY, inq_buf, sizeof(*inq_buf), 0,
+			scsi3addr, TYPE_CMD);
+	if (rc == IO_OK) {
+		memcpy(vendor, &inq_buf->data_byte[8], VENDOR_LEN);
+		vendor[VENDOR_LEN] = '\0';
+		memcpy(model, &inq_buf->data_byte[16], MODEL_LEN);
+		model[MODEL_LEN] = '\0';
+		memcpy(rev, &inq_buf->data_byte[32], REV_LEN);
+		rev[REV_LEN] = '\0';
+	}
+
+	kfree(inq_buf);
+	return;
+}
+
+/* This function gets the serial number of a logical drive via
+ * inquiry page 0x83.  Serial no. is 16 bytes.  If the serial
+ * number cannot be had, for whatever reason, 16 bytes of 0xff
+ * are returned instead.
+ */
+static void cciss_get_serial_no(ctlr_info_t *h, int logvol,
+				unsigned char *serial_no, int buflen)
+{
+#define PAGE_83_INQ_BYTES 64
+	int rc;
+	unsigned char *buf;
+	unsigned char scsi3addr[8];
+
+	if (buflen > 16)
+		buflen = 16;
+	memset(serial_no, 0xff, buflen);
+	buf = kzalloc(PAGE_83_INQ_BYTES, GFP_KERNEL);
+	if (!buf)
+		return;
+	memset(serial_no, 0, buflen);
+	log_unit_to_scsi3addr(h, scsi3addr, logvol);
+	rc = sendcmd_withirq(h, CISS_INQUIRY, buf,
+		PAGE_83_INQ_BYTES, 0x83, scsi3addr, TYPE_CMD);
+	if (rc == IO_OK)
+		memcpy(serial_no, &buf[8], buflen);
+	kfree(buf);
+	return;
+}
+
+/*
+ * cciss_add_disk sets up the block device queue for a logical drive
+ */
+static int cciss_add_disk(ctlr_info_t *h, struct gendisk *disk,
+				int drv_index)
+{
+	disk->queue = blk_init_queue(do_cciss_request, &h->lock);
+	if (!disk->queue)
+		goto init_queue_failure;
+	sprintf(disk->disk_name, "cciss/c%dd%d", h->ctlr, drv_index);
+	disk->major = h->major;
+	disk->first_minor = drv_index << NWD_SHIFT;
+	disk->fops = &cciss_fops;
+	if (cciss_create_ld_sysfs_entry(h, drv_index))
+		goto cleanup_queue;
+	disk->private_data = h->drv[drv_index];
+	disk->driverfs_dev = &h->drv[drv_index]->dev;
+
+	/* Set up queue information */
+	blk_queue_bounce_limit(disk->queue, h->pdev->dma_mask);
+
+	/* This is a hardware imposed limit. */
+	blk_queue_max_segments(disk->queue, h->maxsgentries);
+
+	blk_queue_max_hw_sectors(disk->queue, h->cciss_max_sectors);
+
+	blk_queue_softirq_done(disk->queue, cciss_softirq_done);
+
+	disk->queue->queuedata = h;
+
+	blk_queue_logical_block_size(disk->queue,
+				     h->drv[drv_index]->block_size);
+
+	/* Make sure all queue data is written out before */
+	/* setting h->drv[drv_index]->queue, as setting this */
+	/* allows the interrupt handler to start the queue */
+	wmb();
+	h->drv[drv_index]->queue = disk->queue;
+	add_disk(disk);
+	return 0;
+
+cleanup_queue:
+	blk_cleanup_queue(disk->queue);
+	disk->queue = NULL;
+init_queue_failure:
+	return -1;
+}
+
+/* This function will check the usage_count of the drive to be updated/added.
+ * If the usage_count is zero and it is a heretofore unknown drive, or,
+ * the drive's capacity, geometry, or serial number has changed,
+ * then the drive information will be updated and the disk will be
+ * re-registered with the kernel.  If these conditions don't hold,
+ * then it will be left alone for the next reboot.  The exception to this
+ * is disk 0 which will always be left registered with the kernel since it
+ * is also the controller node.  Any changes to disk 0 will show up on
+ * the next reboot.
+ */
+static void cciss_update_drive_info(ctlr_info_t *h, int drv_index,
+	int first_time, int via_ioctl)
+{
+	struct gendisk *disk;
+	InquiryData_struct *inq_buff = NULL;
+	unsigned int block_size;
+	sector_t total_size;
+	unsigned long flags = 0;
+	int ret = 0;
+	drive_info_struct *drvinfo;
+
+	/* Get information about the disk and modify the driver structure */
+	inq_buff = kmalloc(sizeof(InquiryData_struct), GFP_KERNEL);
+	drvinfo = kzalloc(sizeof(*drvinfo), GFP_KERNEL);
+	if (inq_buff == NULL || drvinfo == NULL)
+		goto mem_msg;
+
+	/* testing to see if 16-byte CDBs are already being used */
+	if (h->cciss_read == CCISS_READ_16) {
+		cciss_read_capacity_16(h, drv_index,
+			&total_size, &block_size);
+
+	} else {
+		cciss_read_capacity(h, drv_index, &total_size, &block_size);
+		/* if read_capacity returns all F's this volume is >2TB */
+		/* in size so we switch to 16-byte CDB's for all */
+		/* read/write ops */
+		if (total_size == 0xFFFFFFFFULL) {
+			cciss_read_capacity_16(h, drv_index,
+			&total_size, &block_size);
+			h->cciss_read = CCISS_READ_16;
+			h->cciss_write = CCISS_WRITE_16;
+		} else {
+			h->cciss_read = CCISS_READ_10;
+			h->cciss_write = CCISS_WRITE_10;
+		}
+	}
+
+	cciss_geometry_inquiry(h, drv_index, total_size, block_size,
+			       inq_buff, drvinfo);
+	drvinfo->block_size = block_size;
+	drvinfo->nr_blocks = total_size + 1;
+
+	cciss_get_device_descr(h, drv_index, drvinfo->vendor,
+				drvinfo->model, drvinfo->rev);
+	cciss_get_serial_no(h, drv_index, drvinfo->serial_no,
+			sizeof(drvinfo->serial_no));
+	/* Save the lunid in case we deregister the disk, below. */
+	memcpy(drvinfo->LunID, h->drv[drv_index]->LunID,
+		sizeof(drvinfo->LunID));
+
+	/* Is it the same disk we already know, and nothing's changed? */
+	if (h->drv[drv_index]->raid_level != -1 &&
+		((memcmp(drvinfo->serial_no,
+				h->drv[drv_index]->serial_no, 16) == 0) &&
+		drvinfo->block_size == h->drv[drv_index]->block_size &&
+		drvinfo->nr_blocks == h->drv[drv_index]->nr_blocks &&
+		drvinfo->heads == h->drv[drv_index]->heads &&
+		drvinfo->sectors == h->drv[drv_index]->sectors &&
+		drvinfo->cylinders == h->drv[drv_index]->cylinders))
+			/* The disk is unchanged, nothing to update */
+			goto freeret;
+
+	/* If we get here it's not the same disk, or something's changed,
+	 * so we need to * deregister it, and re-register it, if it's not
+	 * in use.
+	 * If the disk already exists then deregister it before proceeding
+	 * (unless it's the first disk (for the controller node).
+	 */
+	if (h->drv[drv_index]->raid_level != -1 && drv_index != 0) {
+		dev_warn(&h->pdev->dev, "disk %d has changed.\n", drv_index);
+		spin_lock_irqsave(&h->lock, flags);
+		h->drv[drv_index]->busy_configuring = 1;
+		spin_unlock_irqrestore(&h->lock, flags);
+
+		/* deregister_disk sets h->drv[drv_index]->queue = NULL
+		 * which keeps the interrupt handler from starting
+		 * the queue.
+		 */
+		ret = deregister_disk(h, drv_index, 0, via_ioctl);
+	}
+
+	/* If the disk is in use return */
+	if (ret)
+		goto freeret;
+
+	/* Save the new information from cciss_geometry_inquiry
+	 * and serial number inquiry.  If the disk was deregistered
+	 * above, then h->drv[drv_index] will be NULL.
+	 */
+	if (h->drv[drv_index] == NULL) {
+		drvinfo->device_initialized = 0;
+		h->drv[drv_index] = drvinfo;
+		drvinfo = NULL; /* so it won't be freed below. */
+	} else {
+		/* special case for cxd0 */
+		h->drv[drv_index]->block_size = drvinfo->block_size;
+		h->drv[drv_index]->nr_blocks = drvinfo->nr_blocks;
+		h->drv[drv_index]->heads = drvinfo->heads;
+		h->drv[drv_index]->sectors = drvinfo->sectors;
+		h->drv[drv_index]->cylinders = drvinfo->cylinders;
+		h->drv[drv_index]->raid_level = drvinfo->raid_level;
+		memcpy(h->drv[drv_index]->serial_no, drvinfo->serial_no, 16);
+		memcpy(h->drv[drv_index]->vendor, drvinfo->vendor,
+			VENDOR_LEN + 1);
+		memcpy(h->drv[drv_index]->model, drvinfo->model, MODEL_LEN + 1);
+		memcpy(h->drv[drv_index]->rev, drvinfo->rev, REV_LEN + 1);
+	}
+
+	++h->num_luns;
+	disk = h->gendisk[drv_index];
+	set_capacity(disk, h->drv[drv_index]->nr_blocks);
+
+	/* If it's not disk 0 (drv_index != 0)
+	 * or if it was disk 0, but there was previously
+	 * no actual corresponding configured logical drive
+	 * (raid_leve == -1) then we want to update the
+	 * logical drive's information.
+	 */
+	if (drv_index || first_time) {
+		if (cciss_add_disk(h, disk, drv_index) != 0) {
+			cciss_free_gendisk(h, drv_index);
+			cciss_free_drive_info(h, drv_index);
+			dev_warn(&h->pdev->dev, "could not update disk %d\n",
+				drv_index);
+			--h->num_luns;
+		}
+	}
+
+freeret:
+	kfree(inq_buff);
+	kfree(drvinfo);
+	return;
+mem_msg:
+	dev_err(&h->pdev->dev, "out of memory\n");
+	goto freeret;
+}
+
+/* This function will find the first index of the controllers drive array
+ * that has a null drv pointer and allocate the drive info struct and
+ * will return that index   This is where new drives will be added.
+ * If the index to be returned is greater than the highest_lun index for
+ * the controller then highest_lun is set * to this new index.
+ * If there are no available indexes or if tha allocation fails, then -1
+ * is returned.  * "controller_node" is used to know if this is a real
+ * logical drive, or just the controller node, which determines if this
+ * counts towards highest_lun.
+ */
+static int cciss_alloc_drive_info(ctlr_info_t *h, int controller_node)
+{
+	int i;
+	drive_info_struct *drv;
+
+	/* Search for an empty slot for our drive info */
+	for (i = 0; i < CISS_MAX_LUN; i++) {
+
+		/* if not cxd0 case, and it's occupied, skip it. */
+		if (h->drv[i] && i != 0)
+			continue;
+		/*
+		 * If it's cxd0 case, and drv is alloc'ed already, and a
+		 * disk is configured there, skip it.
+		 */
+		if (i == 0 && h->drv[i] && h->drv[i]->raid_level != -1)
+			continue;
+
+		/*
+		 * We've found an empty slot.  Update highest_lun
+		 * provided this isn't just the fake cxd0 controller node.
+		 */
+		if (i > h->highest_lun && !controller_node)
+			h->highest_lun = i;
+
+		/* If adding a real disk at cxd0, and it's already alloc'ed */
+		if (i == 0 && h->drv[i] != NULL)
+			return i;
+
+		/*
+		 * Found an empty slot, not already alloc'ed.  Allocate it.
+		 * Mark it with raid_level == -1, so we know it's new later on.
+		 */
+		drv = kzalloc(sizeof(*drv), GFP_KERNEL);
+		if (!drv)
+			return -1;
+		drv->raid_level = -1; /* so we know it's new */
+		h->drv[i] = drv;
+		return i;
+	}
+	return -1;
+}
+
+static void cciss_free_drive_info(ctlr_info_t *h, int drv_index)
+{
+	kfree(h->drv[drv_index]);
+	h->drv[drv_index] = NULL;
+}
+
+static void cciss_free_gendisk(ctlr_info_t *h, int drv_index)
+{
+	put_disk(h->gendisk[drv_index]);
+	h->gendisk[drv_index] = NULL;
+}
+
+/* cciss_add_gendisk finds a free hba[]->drv structure
+ * and allocates a gendisk if needed, and sets the lunid
+ * in the drvinfo structure.   It returns the index into
+ * the ->drv[] array, or -1 if none are free.
+ * is_controller_node indicates whether highest_lun should
+ * count this disk, or if it's only being added to provide
+ * a means to talk to the controller in case no logical
+ * drives have yet been configured.
+ */
+static int cciss_add_gendisk(ctlr_info_t *h, unsigned char lunid[],
+	int controller_node)
+{
+	int drv_index;
+
+	drv_index = cciss_alloc_drive_info(h, controller_node);
+	if (drv_index == -1)
+		return -1;
+
+	/*Check if the gendisk needs to be allocated */
+	if (!h->gendisk[drv_index]) {
+		h->gendisk[drv_index] =
+			alloc_disk(1 << NWD_SHIFT);
+		if (!h->gendisk[drv_index]) {
+			dev_err(&h->pdev->dev,
+				"could not allocate a new disk %d\n",
+				drv_index);
+			goto err_free_drive_info;
+		}
+	}
+	memcpy(h->drv[drv_index]->LunID, lunid,
+		sizeof(h->drv[drv_index]->LunID));
+	if (cciss_create_ld_sysfs_entry(h, drv_index))
+		goto err_free_disk;
+	/* Don't need to mark this busy because nobody */
+	/* else knows about this disk yet to contend */
+	/* for access to it. */
+	h->drv[drv_index]->busy_configuring = 0;
+	wmb();
+	return drv_index;
+
+err_free_disk:
+	cciss_free_gendisk(h, drv_index);
+err_free_drive_info:
+	cciss_free_drive_info(h, drv_index);
+	return -1;
+}
+
+/* This is for the special case of a controller which
+ * has no logical drives.  In this case, we still need
+ * to register a disk so the controller can be accessed
+ * by the Array Config Utility.
+ */
+static void cciss_add_controller_node(ctlr_info_t *h)
+{
+	struct gendisk *disk;
+	int drv_index;
+
+	if (h->gendisk[0] != NULL) /* already did this? Then bail. */
+		return;
+
+	drv_index = cciss_add_gendisk(h, CTLR_LUNID, 1);
+	if (drv_index == -1)
+		goto error;
+	h->drv[drv_index]->block_size = 512;
+	h->drv[drv_index]->nr_blocks = 0;
+	h->drv[drv_index]->heads = 0;
+	h->drv[drv_index]->sectors = 0;
+	h->drv[drv_index]->cylinders = 0;
+	h->drv[drv_index]->raid_level = -1;
+	memset(h->drv[drv_index]->serial_no, 0, 16);
+	disk = h->gendisk[drv_index];
+	if (cciss_add_disk(h, disk, drv_index) == 0)
+		return;
+	cciss_free_gendisk(h, drv_index);
+	cciss_free_drive_info(h, drv_index);
+error:
+	dev_warn(&h->pdev->dev, "could not add disk 0.\n");
+	return;
+}
+
+/* This function will add and remove logical drives from the Logical
+ * drive array of the controller and maintain persistency of ordering
+ * so that mount points are preserved until the next reboot.  This allows
+ * for the removal of logical drives in the middle of the drive array
+ * without a re-ordering of those drives.
+ * INPUT
+ * h		= The controller to perform the operations on
+ */
+static int rebuild_lun_table(ctlr_info_t *h, int first_time,
+	int via_ioctl)
+{
+	int num_luns;
+	ReportLunData_struct *ld_buff = NULL;
+	int return_code;
+	int listlength = 0;
+	int i;
+	int drv_found;
+	int drv_index = 0;
+	unsigned char lunid[8] = CTLR_LUNID;
+	unsigned long flags;
+
+	if (!capable(CAP_SYS_RAWIO))
+		return -EPERM;
+
+	/* Set busy_configuring flag for this operation */
+	spin_lock_irqsave(&h->lock, flags);
+	if (h->busy_configuring) {
+		spin_unlock_irqrestore(&h->lock, flags);
+		return -EBUSY;
+	}
+	h->busy_configuring = 1;
+	spin_unlock_irqrestore(&h->lock, flags);
+
+	ld_buff = kzalloc(sizeof(ReportLunData_struct), GFP_KERNEL);
+	if (ld_buff == NULL)
+		goto mem_msg;
+
+	return_code = sendcmd_withirq(h, CISS_REPORT_LOG, ld_buff,
+				      sizeof(ReportLunData_struct),
+				      0, CTLR_LUNID, TYPE_CMD);
+
+	if (return_code == IO_OK)
+		listlength = be32_to_cpu(*(__be32 *) ld_buff->LUNListLength);
+	else {	/* reading number of logical volumes failed */
+		dev_warn(&h->pdev->dev,
+			"report logical volume command failed\n");
+		listlength = 0;
+		goto freeret;
+	}
+
+	num_luns = listlength / 8;	/* 8 bytes per entry */
+	if (num_luns > CISS_MAX_LUN) {
+		num_luns = CISS_MAX_LUN;
+		dev_warn(&h->pdev->dev, "more luns configured"
+		       " on controller than can be handled by"
+		       " this driver.\n");
+	}
+
+	if (num_luns == 0)
+		cciss_add_controller_node(h);
+
+	/* Compare controller drive array to driver's drive array
+	 * to see if any drives are missing on the controller due
+	 * to action of Array Config Utility (user deletes drive)
+	 * and deregister logical drives which have disappeared.
+	 */
+	for (i = 0; i <= h->highest_lun; i++) {
+		int j;
+		drv_found = 0;
+
+		/* skip holes in the array from already deleted drives */
+		if (h->drv[i] == NULL)
+			continue;
+
+		for (j = 0; j < num_luns; j++) {
+			memcpy(lunid, &ld_buff->LUN[j][0], sizeof(lunid));
+			if (memcmp(h->drv[i]->LunID, lunid,
+				sizeof(lunid)) == 0) {
+				drv_found = 1;
+				break;
+			}
+		}
+		if (!drv_found) {
+			/* Deregister it from the OS, it's gone. */
+			spin_lock_irqsave(&h->lock, flags);
+			h->drv[i]->busy_configuring = 1;
+			spin_unlock_irqrestore(&h->lock, flags);
+			return_code = deregister_disk(h, i, 1, via_ioctl);
+			if (h->drv[i] != NULL)
+				h->drv[i]->busy_configuring = 0;
+		}
+	}
+
+	/* Compare controller drive array to driver's drive array.
+	 * Check for updates in the drive information and any new drives
+	 * on the controller due to ACU adding logical drives, or changing
+	 * a logical drive's size, etc.  Reregister any new/changed drives
+	 */
+	for (i = 0; i < num_luns; i++) {
+		int j;
+
+		drv_found = 0;
+
+		memcpy(lunid, &ld_buff->LUN[i][0], sizeof(lunid));
+		/* Find if the LUN is already in the drive array
+		 * of the driver.  If so then update its info
+		 * if not in use.  If it does not exist then find
+		 * the first free index and add it.
+		 */
+		for (j = 0; j <= h->highest_lun; j++) {
+			if (h->drv[j] != NULL &&
+				memcmp(h->drv[j]->LunID, lunid,
+					sizeof(h->drv[j]->LunID)) == 0) {
+				drv_index = j;
+				drv_found = 1;
+				break;
+			}
+		}
+
+		/* check if the drive was found already in the array */
+		if (!drv_found) {
+			drv_index = cciss_add_gendisk(h, lunid, 0);
+			if (drv_index == -1)
+				goto freeret;
+		}
+		cciss_update_drive_info(h, drv_index, first_time, via_ioctl);
+	}		/* end for */
+
+freeret:
+	kfree(ld_buff);
+	h->busy_configuring = 0;
+	/* We return -1 here to tell the ACU that we have registered/updated
+	 * all of the drives that we can and to keep it from calling us
+	 * additional times.
+	 */
+	return -1;
+mem_msg:
+	dev_err(&h->pdev->dev, "out of memory\n");
+	h->busy_configuring = 0;
+	goto freeret;
+}
+
+static void cciss_clear_drive_info(drive_info_struct *drive_info)
+{
+	/* zero out the disk size info */
+	drive_info->nr_blocks = 0;
+	drive_info->block_size = 0;
+	drive_info->heads = 0;
+	drive_info->sectors = 0;
+	drive_info->cylinders = 0;
+	drive_info->raid_level = -1;
+	memset(drive_info->serial_no, 0, sizeof(drive_info->serial_no));
+	memset(drive_info->model, 0, sizeof(drive_info->model));
+	memset(drive_info->rev, 0, sizeof(drive_info->rev));
+	memset(drive_info->vendor, 0, sizeof(drive_info->vendor));
+	/*
+	 * don't clear the LUNID though, we need to remember which
+	 * one this one is.
+	 */
+}
+
+/* This function will deregister the disk and it's queue from the
+ * kernel.  It must be called with the controller lock held and the
+ * drv structures busy_configuring flag set.  It's parameters are:
+ *
+ * disk = This is the disk to be deregistered
+ * drv  = This is the drive_info_struct associated with the disk to be
+ *        deregistered.  It contains information about the disk used
+ *        by the driver.
+ * clear_all = This flag determines whether or not the disk information
+ *             is going to be completely cleared out and the highest_lun
+ *             reset.  Sometimes we want to clear out information about
+ *             the disk in preparation for re-adding it.  In this case
+ *             the highest_lun should be left unchanged and the LunID
+ *             should not be cleared.
+ * via_ioctl
+ *    This indicates whether we've reached this path via ioctl.
+ *    This affects the maximum usage count allowed for c0d0 to be messed with.
+ *    If this path is reached via ioctl(), then the max_usage_count will
+ *    be 1, as the process calling ioctl() has got to have the device open.
+ *    If we get here via sysfs, then the max usage count will be zero.
+*/
+static int deregister_disk(ctlr_info_t *h, int drv_index,
+			   int clear_all, int via_ioctl)
+{
+	int i;
+	struct gendisk *disk;
+	drive_info_struct *drv;
+	int recalculate_highest_lun;
+
+	if (!capable(CAP_SYS_RAWIO))
+		return -EPERM;
+
+	drv = h->drv[drv_index];
+	disk = h->gendisk[drv_index];
+
+	/* make sure logical volume is NOT is use */
+	if (clear_all || (h->gendisk[0] == disk)) {
+		if (drv->usage_count > via_ioctl)
+			return -EBUSY;
+	} else if (drv->usage_count > 0)
+		return -EBUSY;
+
+	recalculate_highest_lun = (drv == h->drv[h->highest_lun]);
+
+	/* invalidate the devices and deregister the disk.  If it is disk
+	 * zero do not deregister it but just zero out it's values.  This
+	 * allows us to delete disk zero but keep the controller registered.
+	 */
+	if (h->gendisk[0] != disk) {
+		struct request_queue *q = disk->queue;
+		if (disk->flags & GENHD_FL_UP) {
+			cciss_destroy_ld_sysfs_entry(h, drv_index, 0);
+			del_gendisk(disk);
+		}
+		if (q)
+			blk_cleanup_queue(q);
+		/* If clear_all is set then we are deleting the logical
+		 * drive, not just refreshing its info.  For drives
+		 * other than disk 0 we will call put_disk.  We do not
+		 * do this for disk 0 as we need it to be able to
+		 * configure the controller.
+		 */
+		if (clear_all){
+			/* This isn't pretty, but we need to find the
+			 * disk in our array and NULL our the pointer.
+			 * This is so that we will call alloc_disk if
+			 * this index is used again later.
+			 */
+			for (i=0; i < CISS_MAX_LUN; i++){
+				if (h->gendisk[i] == disk) {
+					h->gendisk[i] = NULL;
+					break;
+				}
+			}
+			put_disk(disk);
+		}
+	} else {
+		set_capacity(disk, 0);
+		cciss_clear_drive_info(drv);
+	}
+
+	--h->num_luns;
+
+	/* if it was the last disk, find the new hightest lun */
+	if (clear_all && recalculate_highest_lun) {
+		int newhighest = -1;
+		for (i = 0; i <= h->highest_lun; i++) {
+			/* if the disk has size > 0, it is available */
+			if (h->drv[i] && h->drv[i]->heads)
+				newhighest = i;
+		}
+		h->highest_lun = newhighest;
+	}
+	return 0;
+}
+
+static int fill_cmd(ctlr_info_t *h, CommandList_struct *c, __u8 cmd, void *buff,
+		size_t size, __u8 page_code, unsigned char *scsi3addr,
+		int cmd_type)
+{
+	u64bit buff_dma_handle;
+	int status = IO_OK;
+
+	c->cmd_type = CMD_IOCTL_PEND;
+	c->Header.ReplyQueue = 0;
+	if (buff != NULL) {
+		c->Header.SGList = 1;
+		c->Header.SGTotal = 1;
+	} else {
+		c->Header.SGList = 0;
+		c->Header.SGTotal = 0;
+	}
+	c->Header.Tag.lower = c->busaddr;
+	memcpy(c->Header.LUN.LunAddrBytes, scsi3addr, 8);
+
+	c->Request.Type.Type = cmd_type;
+	if (cmd_type == TYPE_CMD) {
+		switch (cmd) {
+		case CISS_INQUIRY:
+			/* are we trying to read a vital product page */
+			if (page_code != 0) {
+				c->Request.CDB[1] = 0x01;
+				c->Request.CDB[2] = page_code;
+			}
+			c->Request.CDBLen = 6;
+			c->Request.Type.Attribute = ATTR_SIMPLE;
+			c->Request.Type.Direction = XFER_READ;
+			c->Request.Timeout = 0;
+			c->Request.CDB[0] = CISS_INQUIRY;
+			c->Request.CDB[4] = size & 0xFF;
+			break;
+		case CISS_REPORT_LOG:
+		case CISS_REPORT_PHYS:
+			/* Talking to controller so It's a physical command
+			   mode = 00 target = 0.  Nothing to write.
+			 */
+			c->Request.CDBLen = 12;
+			c->Request.Type.Attribute = ATTR_SIMPLE;
+			c->Request.Type.Direction = XFER_READ;
+			c->Request.Timeout = 0;
+			c->Request.CDB[0] = cmd;
+			c->Request.CDB[6] = (size >> 24) & 0xFF; /* MSB */
+			c->Request.CDB[7] = (size >> 16) & 0xFF;
+			c->Request.CDB[8] = (size >> 8) & 0xFF;
+			c->Request.CDB[9] = size & 0xFF;
+			break;
+
+		case CCISS_READ_CAPACITY:
+			c->Request.CDBLen = 10;
+			c->Request.Type.Attribute = ATTR_SIMPLE;
+			c->Request.Type.Direction = XFER_READ;
+			c->Request.Timeout = 0;
+			c->Request.CDB[0] = cmd;
+			break;
+		case CCISS_READ_CAPACITY_16:
+			c->Request.CDBLen = 16;
+			c->Request.Type.Attribute = ATTR_SIMPLE;
+			c->Request.Type.Direction = XFER_READ;
+			c->Request.Timeout = 0;
+			c->Request.CDB[0] = cmd;
+			c->Request.CDB[1] = 0x10;
+			c->Request.CDB[10] = (size >> 24) & 0xFF;
+			c->Request.CDB[11] = (size >> 16) & 0xFF;
+			c->Request.CDB[12] = (size >> 8) & 0xFF;
+			c->Request.CDB[13] = size & 0xFF;
+			c->Request.Timeout = 0;
+			c->Request.CDB[0] = cmd;
+			break;
+		case CCISS_CACHE_FLUSH:
+			c->Request.CDBLen = 12;
+			c->Request.Type.Attribute = ATTR_SIMPLE;
+			c->Request.Type.Direction = XFER_WRITE;
+			c->Request.Timeout = 0;
+			c->Request.CDB[0] = BMIC_WRITE;
+			c->Request.CDB[6] = BMIC_CACHE_FLUSH;
+			break;
+		case TEST_UNIT_READY:
+			c->Request.CDBLen = 6;
+			c->Request.Type.Attribute = ATTR_SIMPLE;
+			c->Request.Type.Direction = XFER_NONE;
+			c->Request.Timeout = 0;
+			break;
+		default:
+			dev_warn(&h->pdev->dev, "Unknown Command 0x%c\n", cmd);
+			return IO_ERROR;
+		}
+	} else if (cmd_type == TYPE_MSG) {
+		switch (cmd) {
+		case CCISS_ABORT_MSG:
+			c->Request.CDBLen = 12;
+			c->Request.Type.Attribute = ATTR_SIMPLE;
+			c->Request.Type.Direction = XFER_WRITE;
+			c->Request.Timeout = 0;
+			c->Request.CDB[0] = cmd;	/* abort */
+			c->Request.CDB[1] = 0;	/* abort a command */
+			/* buff contains the tag of the command to abort */
+			memcpy(&c->Request.CDB[4], buff, 8);
+			break;
+		case CCISS_RESET_MSG:
+			c->Request.CDBLen = 16;
+			c->Request.Type.Attribute = ATTR_SIMPLE;
+			c->Request.Type.Direction = XFER_NONE;
+			c->Request.Timeout = 0;
+			memset(&c->Request.CDB[0], 0, sizeof(c->Request.CDB));
+			c->Request.CDB[0] = cmd;	/* reset */
+			c->Request.CDB[1] = CCISS_RESET_TYPE_TARGET;
+			break;
+		case CCISS_NOOP_MSG:
+			c->Request.CDBLen = 1;
+			c->Request.Type.Attribute = ATTR_SIMPLE;
+			c->Request.Type.Direction = XFER_WRITE;
+			c->Request.Timeout = 0;
+			c->Request.CDB[0] = cmd;
+			break;
+		default:
+			dev_warn(&h->pdev->dev,
+				"unknown message type %d\n", cmd);
+			return IO_ERROR;
+		}
+	} else {
+		dev_warn(&h->pdev->dev, "unknown command type %d\n", cmd_type);
+		return IO_ERROR;
+	}
+	/* Fill in the scatter gather information */
+	if (size > 0) {
+		buff_dma_handle.val = (__u64) pci_map_single(h->pdev,
+							     buff, size,
+							     PCI_DMA_BIDIRECTIONAL);
+		c->SG[0].Addr.lower = buff_dma_handle.val32.lower;
+		c->SG[0].Addr.upper = buff_dma_handle.val32.upper;
+		c->SG[0].Len = size;
+		c->SG[0].Ext = 0;	/* we are not chaining */
+	}
+	return status;
+}
+
+static int __devinit cciss_send_reset(ctlr_info_t *h, unsigned char *scsi3addr,
+	u8 reset_type)
+{
+	CommandList_struct *c;
+	int return_status;
+
+	c = cmd_alloc(h);
+	if (!c)
+		return -ENOMEM;
+	return_status = fill_cmd(h, c, CCISS_RESET_MSG, NULL, 0, 0,
+		CTLR_LUNID, TYPE_MSG);
+	c->Request.CDB[1] = reset_type; /* fill_cmd defaults to target reset */
+	if (return_status != IO_OK) {
+		cmd_special_free(h, c);
+		return return_status;
+	}
+	c->waiting = NULL;
+	enqueue_cmd_and_start_io(h, c);
+	/* Don't wait for completion, the reset won't complete.  Don't free
+	 * the command either.  This is the last command we will send before
+	 * re-initializing everything, so it doesn't matter and won't leak.
+	 */
+	return 0;
+}
+
+static int check_target_status(ctlr_info_t *h, CommandList_struct *c)
+{
+	switch (c->err_info->ScsiStatus) {
+	case SAM_STAT_GOOD:
+		return IO_OK;
+	case SAM_STAT_CHECK_CONDITION:
+		switch (0xf & c->err_info->SenseInfo[2]) {
+		case 0: return IO_OK; /* no sense */
+		case 1: return IO_OK; /* recovered error */
+		default:
+			if (check_for_unit_attention(h, c))
+				return IO_NEEDS_RETRY;
+			dev_warn(&h->pdev->dev, "cmd 0x%02x "
+				"check condition, sense key = 0x%02x\n",
+				c->Request.CDB[0], c->err_info->SenseInfo[2]);
+		}
+		break;
+	default:
+		dev_warn(&h->pdev->dev, "cmd 0x%02x"
+			"scsi status = 0x%02x\n",
+			c->Request.CDB[0], c->err_info->ScsiStatus);
+		break;
+	}
+	return IO_ERROR;
+}
+
+static int process_sendcmd_error(ctlr_info_t *h, CommandList_struct *c)
+{
+	int return_status = IO_OK;
+
+	if (c->err_info->CommandStatus == CMD_SUCCESS)
+		return IO_OK;
+
+	switch (c->err_info->CommandStatus) {
+	case CMD_TARGET_STATUS:
+		return_status = check_target_status(h, c);
+		break;
+	case CMD_DATA_UNDERRUN:
+	case CMD_DATA_OVERRUN:
+		/* expected for inquiry and report lun commands */
+		break;
+	case CMD_INVALID:
+		dev_warn(&h->pdev->dev, "cmd 0x%02x is "
+		       "reported invalid\n", c->Request.CDB[0]);
+		return_status = IO_ERROR;
+		break;
+	case CMD_PROTOCOL_ERR:
+		dev_warn(&h->pdev->dev, "cmd 0x%02x has "
+		       "protocol error\n", c->Request.CDB[0]);
+		return_status = IO_ERROR;
+		break;
+	case CMD_HARDWARE_ERR:
+		dev_warn(&h->pdev->dev, "cmd 0x%02x had "
+		       " hardware error\n", c->Request.CDB[0]);
+		return_status = IO_ERROR;
+		break;
+	case CMD_CONNECTION_LOST:
+		dev_warn(&h->pdev->dev, "cmd 0x%02x had "
+		       "connection lost\n", c->Request.CDB[0]);
+		return_status = IO_ERROR;
+		break;
+	case CMD_ABORTED:
+		dev_warn(&h->pdev->dev, "cmd 0x%02x was "
+		       "aborted\n", c->Request.CDB[0]);
+		return_status = IO_ERROR;
+		break;
+	case CMD_ABORT_FAILED:
+		dev_warn(&h->pdev->dev, "cmd 0x%02x reports "
+		       "abort failed\n", c->Request.CDB[0]);
+		return_status = IO_ERROR;
+		break;
+	case CMD_UNSOLICITED_ABORT:
+		dev_warn(&h->pdev->dev, "unsolicited abort 0x%02x\n",
+			c->Request.CDB[0]);
+		return_status = IO_NEEDS_RETRY;
+		break;
+	case CMD_UNABORTABLE:
+		dev_warn(&h->pdev->dev, "cmd unabortable\n");
+		return_status = IO_ERROR;
+		break;
+	default:
+		dev_warn(&h->pdev->dev, "cmd 0x%02x returned "
+		       "unknown status %x\n", c->Request.CDB[0],
+		       c->err_info->CommandStatus);
+		return_status = IO_ERROR;
+	}
+	return return_status;
+}
+
+static int sendcmd_withirq_core(ctlr_info_t *h, CommandList_struct *c,
+	int attempt_retry)
+{
+	DECLARE_COMPLETION_ONSTACK(wait);
+	u64bit buff_dma_handle;
+	int return_status = IO_OK;
+
+resend_cmd2:
+	c->waiting = &wait;
+	enqueue_cmd_and_start_io(h, c);
+
+	wait_for_completion(&wait);
+
+	if (c->err_info->CommandStatus == 0 || !attempt_retry)
+		goto command_done;
+
+	return_status = process_sendcmd_error(h, c);
+
+	if (return_status == IO_NEEDS_RETRY &&
+		c->retry_count < MAX_CMD_RETRIES) {
+		dev_warn(&h->pdev->dev, "retrying 0x%02x\n",
+			c->Request.CDB[0]);
+		c->retry_count++;
+		/* erase the old error information */
+		memset(c->err_info, 0, sizeof(ErrorInfo_struct));
+		return_status = IO_OK;
+		INIT_COMPLETION(wait);
+		goto resend_cmd2;
+	}
+
+command_done:
+	/* unlock the buffers from DMA */
+	buff_dma_handle.val32.lower = c->SG[0].Addr.lower;
+	buff_dma_handle.val32.upper = c->SG[0].Addr.upper;
+	pci_unmap_single(h->pdev, (dma_addr_t) buff_dma_handle.val,
+			 c->SG[0].Len, PCI_DMA_BIDIRECTIONAL);
+	return return_status;
+}
+
+static int sendcmd_withirq(ctlr_info_t *h, __u8 cmd, void *buff, size_t size,
+			   __u8 page_code, unsigned char scsi3addr[],
+			int cmd_type)
+{
+	CommandList_struct *c;
+	int return_status;
+
+	c = cmd_special_alloc(h);
+	if (!c)
+		return -ENOMEM;
+	return_status = fill_cmd(h, c, cmd, buff, size, page_code,
+		scsi3addr, cmd_type);
+	if (return_status == IO_OK)
+		return_status = sendcmd_withirq_core(h, c, 1);
+
+	cmd_special_free(h, c);
+	return return_status;
+}
+
+static void cciss_geometry_inquiry(ctlr_info_t *h, int logvol,
+				   sector_t total_size,
+				   unsigned int block_size,
+				   InquiryData_struct *inq_buff,
+				   drive_info_struct *drv)
+{
+	int return_code;
+	unsigned long t;
+	unsigned char scsi3addr[8];
+
+	memset(inq_buff, 0, sizeof(InquiryData_struct));
+	log_unit_to_scsi3addr(h, scsi3addr, logvol);
+	return_code = sendcmd_withirq(h, CISS_INQUIRY, inq_buff,
+			sizeof(*inq_buff), 0xC1, scsi3addr, TYPE_CMD);
+	if (return_code == IO_OK) {
+		if (inq_buff->data_byte[8] == 0xFF) {
+			dev_warn(&h->pdev->dev,
+			       "reading geometry failed, volume "
+			       "does not support reading geometry\n");
+			drv->heads = 255;
+			drv->sectors = 32;	/* Sectors per track */
+			drv->cylinders = total_size + 1;
+			drv->raid_level = RAID_UNKNOWN;
+		} else {
+			drv->heads = inq_buff->data_byte[6];
+			drv->sectors = inq_buff->data_byte[7];
+			drv->cylinders = (inq_buff->data_byte[4] & 0xff) << 8;
+			drv->cylinders += inq_buff->data_byte[5];
+			drv->raid_level = inq_buff->data_byte[8];
+		}
+		drv->block_size = block_size;
+		drv->nr_blocks = total_size + 1;
+		t = drv->heads * drv->sectors;
+		if (t > 1) {
+			sector_t real_size = total_size + 1;
+			unsigned long rem = sector_div(real_size, t);
+			if (rem)
+				real_size++;
+			drv->cylinders = real_size;
+		}
+	} else {		/* Get geometry failed */
+		dev_warn(&h->pdev->dev, "reading geometry failed\n");
+	}
+}
+
+static void
+cciss_read_capacity(ctlr_info_t *h, int logvol, sector_t *total_size,
+		    unsigned int *block_size)
+{
+	ReadCapdata_struct *buf;
+	int return_code;
+	unsigned char scsi3addr[8];
+
+	buf = kzalloc(sizeof(ReadCapdata_struct), GFP_KERNEL);
+	if (!buf) {
+		dev_warn(&h->pdev->dev, "out of memory\n");
+		return;
+	}
+
+	log_unit_to_scsi3addr(h, scsi3addr, logvol);
+	return_code = sendcmd_withirq(h, CCISS_READ_CAPACITY, buf,
+		sizeof(ReadCapdata_struct), 0, scsi3addr, TYPE_CMD);
+	if (return_code == IO_OK) {
+		*total_size = be32_to_cpu(*(__be32 *) buf->total_size);
+		*block_size = be32_to_cpu(*(__be32 *) buf->block_size);
+	} else {		/* read capacity command failed */
+		dev_warn(&h->pdev->dev, "read capacity failed\n");
+		*total_size = 0;
+		*block_size = BLOCK_SIZE;
+	}
+	kfree(buf);
+}
+
+static void cciss_read_capacity_16(ctlr_info_t *h, int logvol,
+	sector_t *total_size, unsigned int *block_size)
+{
+	ReadCapdata_struct_16 *buf;
+	int return_code;
+	unsigned char scsi3addr[8];
+
+	buf = kzalloc(sizeof(ReadCapdata_struct_16), GFP_KERNEL);
+	if (!buf) {
+		dev_warn(&h->pdev->dev, "out of memory\n");
+		return;
+	}
+
+	log_unit_to_scsi3addr(h, scsi3addr, logvol);
+	return_code = sendcmd_withirq(h, CCISS_READ_CAPACITY_16,
+		buf, sizeof(ReadCapdata_struct_16),
+			0, scsi3addr, TYPE_CMD);
+	if (return_code == IO_OK) {
+		*total_size = be64_to_cpu(*(__be64 *) buf->total_size);
+		*block_size = be32_to_cpu(*(__be32 *) buf->block_size);
+	} else {		/* read capacity command failed */
+		dev_warn(&h->pdev->dev, "read capacity failed\n");
+		*total_size = 0;
+		*block_size = BLOCK_SIZE;
+	}
+	dev_info(&h->pdev->dev, "      blocks= %llu block_size= %d\n",
+	       (unsigned long long)*total_size+1, *block_size);
+	kfree(buf);
+}
+
+static int cciss_revalidate(struct gendisk *disk)
+{
+	ctlr_info_t *h = get_host(disk);
+	drive_info_struct *drv = get_drv(disk);
+	int logvol;
+	int FOUND = 0;
+	unsigned int block_size;
+	sector_t total_size;
+	InquiryData_struct *inq_buff = NULL;
+
+	for (logvol = 0; logvol <= h->highest_lun; logvol++) {
+		if (!h->drv[logvol])
+			continue;
+		if (memcmp(h->drv[logvol]->LunID, drv->LunID,
+			sizeof(drv->LunID)) == 0) {
+			FOUND = 1;
+			break;
+		}
+	}
+
+	if (!FOUND)
+		return 1;
+
+	inq_buff = kmalloc(sizeof(InquiryData_struct), GFP_KERNEL);
+	if (inq_buff == NULL) {
+		dev_warn(&h->pdev->dev, "out of memory\n");
+		return 1;
+	}
+	if (h->cciss_read == CCISS_READ_10) {
+		cciss_read_capacity(h, logvol,
+					&total_size, &block_size);
+	} else {
+		cciss_read_capacity_16(h, logvol,
+					&total_size, &block_size);
+	}
+	cciss_geometry_inquiry(h, logvol, total_size, block_size,
+			       inq_buff, drv);
+
+	blk_queue_logical_block_size(drv->queue, drv->block_size);
+	set_capacity(disk, drv->nr_blocks);
+
+	kfree(inq_buff);
+	return 0;
+}
+
+/*
+ * Map (physical) PCI mem into (virtual) kernel space
+ */
+static void __iomem *remap_pci_mem(ulong base, ulong size)
+{
+	ulong page_base = ((ulong) base) & PAGE_MASK;
+	ulong page_offs = ((ulong) base) - page_base;
+	void __iomem *page_remapped = ioremap(page_base, page_offs + size);
+
+	return page_remapped ? (page_remapped + page_offs) : NULL;
+}
+
+/*
+ * Takes jobs of the Q and sends them to the hardware, then puts it on
+ * the Q to wait for completion.
+ */
+static void start_io(ctlr_info_t *h)
+{
+	CommandList_struct *c;
+
+	while (!list_empty(&h->reqQ)) {
+		c = list_entry(h->reqQ.next, CommandList_struct, list);
+		/* can't do anything if fifo is full */
+		if ((h->access.fifo_full(h))) {
+			dev_warn(&h->pdev->dev, "fifo full\n");
+			break;
+		}
+
+		/* Get the first entry from the Request Q */
+		removeQ(c);
+		h->Qdepth--;
+
+		/* Tell the controller execute command */
+		h->access.submit_command(h, c);
+
+		/* Put job onto the completed Q */
+		addQ(&h->cmpQ, c);
+	}
+}
+
+/* Assumes that h->lock is held. */
+/* Zeros out the error record and then resends the command back */
+/* to the controller */
+static inline void resend_cciss_cmd(ctlr_info_t *h, CommandList_struct *c)
+{
+	/* erase the old error information */
+	memset(c->err_info, 0, sizeof(ErrorInfo_struct));
+
+	/* add it to software queue and then send it to the controller */
+	addQ(&h->reqQ, c);
+	h->Qdepth++;
+	if (h->Qdepth > h->maxQsinceinit)
+		h->maxQsinceinit = h->Qdepth;
+
+	start_io(h);
+}
+
+static inline unsigned int make_status_bytes(unsigned int scsi_status_byte,
+	unsigned int msg_byte, unsigned int host_byte,
+	unsigned int driver_byte)
+{
+	/* inverse of macros in scsi.h */
+	return (scsi_status_byte & 0xff) |
+		((msg_byte & 0xff) << 8) |
+		((host_byte & 0xff) << 16) |
+		((driver_byte & 0xff) << 24);
+}
+
+static inline int evaluate_target_status(ctlr_info_t *h,
+			CommandList_struct *cmd, int *retry_cmd)
+{
+	unsigned char sense_key;
+	unsigned char status_byte, msg_byte, host_byte, driver_byte;
+	int error_value;
+
+	*retry_cmd = 0;
+	/* If we get in here, it means we got "target status", that is, scsi status */
+	status_byte = cmd->err_info->ScsiStatus;
+	driver_byte = DRIVER_OK;
+	msg_byte = cmd->err_info->CommandStatus; /* correct?  seems too device specific */
+
+	if (cmd->rq->cmd_type == REQ_TYPE_BLOCK_PC)
+		host_byte = DID_PASSTHROUGH;
+	else
+		host_byte = DID_OK;
+
+	error_value = make_status_bytes(status_byte, msg_byte,
+		host_byte, driver_byte);
+
+	if (cmd->err_info->ScsiStatus != SAM_STAT_CHECK_CONDITION) {
+		if (cmd->rq->cmd_type != REQ_TYPE_BLOCK_PC)
+			dev_warn(&h->pdev->dev, "cmd %p "
+			       "has SCSI Status 0x%x\n",
+			       cmd, cmd->err_info->ScsiStatus);
+		return error_value;
+	}
+
+	/* check the sense key */
+	sense_key = 0xf & cmd->err_info->SenseInfo[2];
+	/* no status or recovered error */
+	if (((sense_key == 0x0) || (sense_key == 0x1)) &&
+	    (cmd->rq->cmd_type != REQ_TYPE_BLOCK_PC))
+		error_value = 0;
+
+	if (check_for_unit_attention(h, cmd)) {
+		*retry_cmd = !(cmd->rq->cmd_type == REQ_TYPE_BLOCK_PC);
+		return 0;
+	}
+
+	/* Not SG_IO or similar? */
+	if (cmd->rq->cmd_type != REQ_TYPE_BLOCK_PC) {
+		if (error_value != 0)
+			dev_warn(&h->pdev->dev, "cmd %p has CHECK CONDITION"
+			       " sense key = 0x%x\n", cmd, sense_key);
+		return error_value;
+	}
+
+	/* SG_IO or similar, copy sense data back */
+	if (cmd->rq->sense) {
+		if (cmd->rq->sense_len > cmd->err_info->SenseLen)
+			cmd->rq->sense_len = cmd->err_info->SenseLen;
+		memcpy(cmd->rq->sense, cmd->err_info->SenseInfo,
+			cmd->rq->sense_len);
+	} else
+		cmd->rq->sense_len = 0;
+
+	return error_value;
+}
+
+/* checks the status of the job and calls complete buffers to mark all
+ * buffers for the completed job. Note that this function does not need
+ * to hold the hba/queue lock.
+ */
+static inline void complete_command(ctlr_info_t *h, CommandList_struct *cmd,
+				    int timeout)
+{
+	int retry_cmd = 0;
+	struct request *rq = cmd->rq;
+
+	rq->errors = 0;
+
+	if (timeout)
+		rq->errors = make_status_bytes(0, 0, 0, DRIVER_TIMEOUT);
+
+	if (cmd->err_info->CommandStatus == 0)	/* no error has occurred */
+		goto after_error_processing;
+
+	switch (cmd->err_info->CommandStatus) {
+	case CMD_TARGET_STATUS:
+		rq->errors = evaluate_target_status(h, cmd, &retry_cmd);
+		break;
+	case CMD_DATA_UNDERRUN:
+		if (cmd->rq->cmd_type == REQ_TYPE_FS) {
+			dev_warn(&h->pdev->dev, "cmd %p has"
+			       " completed with data underrun "
+			       "reported\n", cmd);
+			cmd->rq->resid_len = cmd->err_info->ResidualCnt;
+		}
+		break;
+	case CMD_DATA_OVERRUN:
+		if (cmd->rq->cmd_type == REQ_TYPE_FS)
+			dev_warn(&h->pdev->dev, "cciss: cmd %p has"
+			       " completed with data overrun "
+			       "reported\n", cmd);
+		break;
+	case CMD_INVALID:
+		dev_warn(&h->pdev->dev, "cciss: cmd %p is "
+		       "reported invalid\n", cmd);
+		rq->errors = make_status_bytes(SAM_STAT_GOOD,
+			cmd->err_info->CommandStatus, DRIVER_OK,
+			(cmd->rq->cmd_type == REQ_TYPE_BLOCK_PC) ?
+				DID_PASSTHROUGH : DID_ERROR);
+		break;
+	case CMD_PROTOCOL_ERR:
+		dev_warn(&h->pdev->dev, "cciss: cmd %p has "
+		       "protocol error\n", cmd);
+		rq->errors = make_status_bytes(SAM_STAT_GOOD,
+			cmd->err_info->CommandStatus, DRIVER_OK,
+			(cmd->rq->cmd_type == REQ_TYPE_BLOCK_PC) ?
+				DID_PASSTHROUGH : DID_ERROR);
+		break;
+	case CMD_HARDWARE_ERR:
+		dev_warn(&h->pdev->dev, "cciss: cmd %p had "
+		       " hardware error\n", cmd);
+		rq->errors = make_status_bytes(SAM_STAT_GOOD,
+			cmd->err_info->CommandStatus, DRIVER_OK,
+			(cmd->rq->cmd_type == REQ_TYPE_BLOCK_PC) ?
+				DID_PASSTHROUGH : DID_ERROR);
+		break;
+	case CMD_CONNECTION_LOST:
+		dev_warn(&h->pdev->dev, "cciss: cmd %p had "
+		       "connection lost\n", cmd);
+		rq->errors = make_status_bytes(SAM_STAT_GOOD,
+			cmd->err_info->CommandStatus, DRIVER_OK,
+			(cmd->rq->cmd_type == REQ_TYPE_BLOCK_PC) ?
+				DID_PASSTHROUGH : DID_ERROR);
+		break;
+	case CMD_ABORTED:
+		dev_warn(&h->pdev->dev, "cciss: cmd %p was "
+		       "aborted\n", cmd);
+		rq->errors = make_status_bytes(SAM_STAT_GOOD,
+			cmd->err_info->CommandStatus, DRIVER_OK,
+			(cmd->rq->cmd_type == REQ_TYPE_BLOCK_PC) ?
+				DID_PASSTHROUGH : DID_ABORT);
+		break;
+	case CMD_ABORT_FAILED:
+		dev_warn(&h->pdev->dev, "cciss: cmd %p reports "
+		       "abort failed\n", cmd);
+		rq->errors = make_status_bytes(SAM_STAT_GOOD,
+			cmd->err_info->CommandStatus, DRIVER_OK,
+			(cmd->rq->cmd_type == REQ_TYPE_BLOCK_PC) ?
+				DID_PASSTHROUGH : DID_ERROR);
+		break;
+	case CMD_UNSOLICITED_ABORT:
+		dev_warn(&h->pdev->dev, "cciss%d: unsolicited "
+		       "abort %p\n", h->ctlr, cmd);
+		if (cmd->retry_count < MAX_CMD_RETRIES) {
+			retry_cmd = 1;
+			dev_warn(&h->pdev->dev, "retrying %p\n", cmd);
+			cmd->retry_count++;
+		} else
+			dev_warn(&h->pdev->dev,
+				"%p retried too many times\n", cmd);
+		rq->errors = make_status_bytes(SAM_STAT_GOOD,
+			cmd->err_info->CommandStatus, DRIVER_OK,
+			(cmd->rq->cmd_type == REQ_TYPE_BLOCK_PC) ?
+				DID_PASSTHROUGH : DID_ABORT);
+		break;
+	case CMD_TIMEOUT:
+		dev_warn(&h->pdev->dev, "cmd %p timedout\n", cmd);
+		rq->errors = make_status_bytes(SAM_STAT_GOOD,
+			cmd->err_info->CommandStatus, DRIVER_OK,
+			(cmd->rq->cmd_type == REQ_TYPE_BLOCK_PC) ?
+				DID_PASSTHROUGH : DID_ERROR);
+		break;
+	case CMD_UNABORTABLE:
+		dev_warn(&h->pdev->dev, "cmd %p unabortable\n", cmd);
+		rq->errors = make_status_bytes(SAM_STAT_GOOD,
+			cmd->err_info->CommandStatus, DRIVER_OK,
+			cmd->rq->cmd_type == REQ_TYPE_BLOCK_PC ?
+				DID_PASSTHROUGH : DID_ERROR);
+		break;
+	default:
+		dev_warn(&h->pdev->dev, "cmd %p returned "
+		       "unknown status %x\n", cmd,
+		       cmd->err_info->CommandStatus);
+		rq->errors = make_status_bytes(SAM_STAT_GOOD,
+			cmd->err_info->CommandStatus, DRIVER_OK,
+			(cmd->rq->cmd_type == REQ_TYPE_BLOCK_PC) ?
+				DID_PASSTHROUGH : DID_ERROR);
+	}
+
+after_error_processing:
+
+	/* We need to return this command */
+	if (retry_cmd) {
+		resend_cciss_cmd(h, cmd);
+		return;
+	}
+	cmd->rq->completion_data = cmd;
+	blk_complete_request(cmd->rq);
+}
+
+static inline u32 cciss_tag_contains_index(u32 tag)
+{
+#define DIRECT_LOOKUP_BIT 0x10
+	return tag & DIRECT_LOOKUP_BIT;
+}
+
+static inline u32 cciss_tag_to_index(u32 tag)
+{
+#define DIRECT_LOOKUP_SHIFT 5
+	return tag >> DIRECT_LOOKUP_SHIFT;
+}
+
+static inline u32 cciss_tag_discard_error_bits(ctlr_info_t *h, u32 tag)
+{
+#define CCISS_PERF_ERROR_BITS ((1 << DIRECT_LOOKUP_SHIFT) - 1)
+#define CCISS_SIMPLE_ERROR_BITS 0x03
+	if (likely(h->transMethod & CFGTBL_Trans_Performant))
+		return tag & ~CCISS_PERF_ERROR_BITS;
+	return tag & ~CCISS_SIMPLE_ERROR_BITS;
+}
+
+static inline void cciss_mark_tag_indexed(u32 *tag)
+{
+	*tag |= DIRECT_LOOKUP_BIT;
+}
+
+static inline void cciss_set_tag_index(u32 *tag, u32 index)
+{
+	*tag |= (index << DIRECT_LOOKUP_SHIFT);
+}
+
+/*
+ * Get a request and submit it to the controller.
+ */
+static void do_cciss_request(struct request_queue *q)
+{
+	ctlr_info_t *h = q->queuedata;
+	CommandList_struct *c;
+	sector_t start_blk;
+	int seg;
+	struct request *creq;
+	u64bit temp64;
+	struct scatterlist *tmp_sg;
+	SGDescriptor_struct *curr_sg;
+	drive_info_struct *drv;
+	int i, dir;
+	int sg_index = 0;
+	int chained = 0;
+
+      queue:
+	creq = blk_peek_request(q);
+	if (!creq)
+		goto startio;
+
+	BUG_ON(creq->nr_phys_segments > h->maxsgentries);
+
+	c = cmd_alloc(h);
+	if (!c)
+		goto full;
+
+	blk_start_request(creq);
+
+	tmp_sg = h->scatter_list[c->cmdindex];
+	spin_unlock_irq(q->queue_lock);
+
+	c->cmd_type = CMD_RWREQ;
+	c->rq = creq;
+
+	/* fill in the request */
+	drv = creq->rq_disk->private_data;
+	c->Header.ReplyQueue = 0;	/* unused in simple mode */
+	/* got command from pool, so use the command block index instead */
+	/* for direct lookups. */
+	/* The first 2 bits are reserved for controller error reporting. */
+	cciss_set_tag_index(&c->Header.Tag.lower, c->cmdindex);
+	cciss_mark_tag_indexed(&c->Header.Tag.lower);
+	memcpy(&c->Header.LUN, drv->LunID, sizeof(drv->LunID));
+	c->Request.CDBLen = 10;	/* 12 byte commands not in FW yet; */
+	c->Request.Type.Type = TYPE_CMD;	/* It is a command. */
+	c->Request.Type.Attribute = ATTR_SIMPLE;
+	c->Request.Type.Direction =
+	    (rq_data_dir(creq) == READ) ? XFER_READ : XFER_WRITE;
+	c->Request.Timeout = 0;	/* Don't time out */
+	c->Request.CDB[0] =
+	    (rq_data_dir(creq) == READ) ? h->cciss_read : h->cciss_write;
+	start_blk = blk_rq_pos(creq);
+	dev_dbg(&h->pdev->dev, "sector =%d nr_sectors=%d\n",
+	       (int)blk_rq_pos(creq), (int)blk_rq_sectors(creq));
+	sg_init_table(tmp_sg, h->maxsgentries);
+	seg = blk_rq_map_sg(q, creq, tmp_sg);
+
+	/* get the DMA records for the setup */
+	if (c->Request.Type.Direction == XFER_READ)
+		dir = PCI_DMA_FROMDEVICE;
+	else
+		dir = PCI_DMA_TODEVICE;
+
+	curr_sg = c->SG;
+	sg_index = 0;
+	chained = 0;
+
+	for (i = 0; i < seg; i++) {
+		if (((sg_index+1) == (h->max_cmd_sgentries)) &&
+			!chained && ((seg - i) > 1)) {
+			/* Point to next chain block. */
+			curr_sg = h->cmd_sg_list[c->cmdindex];
+			sg_index = 0;
+			chained = 1;
+		}
+		curr_sg[sg_index].Len = tmp_sg[i].length;
+		temp64.val = (__u64) pci_map_page(h->pdev, sg_page(&tmp_sg[i]),
+						tmp_sg[i].offset,
+						tmp_sg[i].length, dir);
+		curr_sg[sg_index].Addr.lower = temp64.val32.lower;
+		curr_sg[sg_index].Addr.upper = temp64.val32.upper;
+		curr_sg[sg_index].Ext = 0;  /* we are not chaining */
+		++sg_index;
+	}
+	if (chained)
+		cciss_map_sg_chain_block(h, c, h->cmd_sg_list[c->cmdindex],
+			(seg - (h->max_cmd_sgentries - 1)) *
+				sizeof(SGDescriptor_struct));
+
+	/* track how many SG entries we are using */
+	if (seg > h->maxSG)
+		h->maxSG = seg;
+
+	dev_dbg(&h->pdev->dev, "Submitting %u sectors in %d segments "
+			"chained[%d]\n",
+			blk_rq_sectors(creq), seg, chained);
+
+	c->Header.SGTotal = seg + chained;
+	if (seg <= h->max_cmd_sgentries)
+		c->Header.SGList = c->Header.SGTotal;
+	else
+		c->Header.SGList = h->max_cmd_sgentries;
+	set_performant_mode(h, c);
+
+	if (likely(creq->cmd_type == REQ_TYPE_FS)) {
+		if(h->cciss_read == CCISS_READ_10) {
+			c->Request.CDB[1] = 0;
+			c->Request.CDB[2] = (start_blk >> 24) & 0xff; /* MSB */
+			c->Request.CDB[3] = (start_blk >> 16) & 0xff;
+			c->Request.CDB[4] = (start_blk >> 8) & 0xff;
+			c->Request.CDB[5] = start_blk & 0xff;
+			c->Request.CDB[6] = 0; /* (sect >> 24) & 0xff; MSB */
+			c->Request.CDB[7] = (blk_rq_sectors(creq) >> 8) & 0xff;
+			c->Request.CDB[8] = blk_rq_sectors(creq) & 0xff;
+			c->Request.CDB[9] = c->Request.CDB[11] = c->Request.CDB[12] = 0;
+		} else {
+			u32 upper32 = upper_32_bits(start_blk);
+
+			c->Request.CDBLen = 16;
+			c->Request.CDB[1]= 0;
+			c->Request.CDB[2]= (upper32 >> 24) & 0xff; /* MSB */
+			c->Request.CDB[3]= (upper32 >> 16) & 0xff;
+			c->Request.CDB[4]= (upper32 >>  8) & 0xff;
+			c->Request.CDB[5]= upper32 & 0xff;
+			c->Request.CDB[6]= (start_blk >> 24) & 0xff;
+			c->Request.CDB[7]= (start_blk >> 16) & 0xff;
+			c->Request.CDB[8]= (start_blk >>  8) & 0xff;
+			c->Request.CDB[9]= start_blk & 0xff;
+			c->Request.CDB[10]= (blk_rq_sectors(creq) >> 24) & 0xff;
+			c->Request.CDB[11]= (blk_rq_sectors(creq) >> 16) & 0xff;
+			c->Request.CDB[12]= (blk_rq_sectors(creq) >>  8) & 0xff;
+			c->Request.CDB[13]= blk_rq_sectors(creq) & 0xff;
+			c->Request.CDB[14] = c->Request.CDB[15] = 0;
+		}
+	} else if (creq->cmd_type == REQ_TYPE_BLOCK_PC) {
+		c->Request.CDBLen = creq->cmd_len;
+		memcpy(c->Request.CDB, creq->cmd, BLK_MAX_CDB);
+	} else {
+		dev_warn(&h->pdev->dev, "bad request type %d\n",
+			creq->cmd_type);
+		BUG();
+	}
+
+	spin_lock_irq(q->queue_lock);
+
+	addQ(&h->reqQ, c);
+	h->Qdepth++;
+	if (h->Qdepth > h->maxQsinceinit)
+		h->maxQsinceinit = h->Qdepth;
+
+	goto queue;
+full:
+	blk_stop_queue(q);
+startio:
+	/* We will already have the driver lock here so not need
+	 * to lock it.
+	 */
+	start_io(h);
+}
+
+static inline unsigned long get_next_completion(ctlr_info_t *h)
+{
+	return h->access.command_completed(h);
+}
+
+static inline int interrupt_pending(ctlr_info_t *h)
+{
+	return h->access.intr_pending(h);
+}
+
+static inline long interrupt_not_for_us(ctlr_info_t *h)
+{
+	return ((h->access.intr_pending(h) == 0) ||
+		(h->interrupts_enabled == 0));
+}
+
+static inline int bad_tag(ctlr_info_t *h, u32 tag_index,
+			u32 raw_tag)
+{
+	if (unlikely(tag_index >= h->nr_cmds)) {
+		dev_warn(&h->pdev->dev, "bad tag 0x%08x ignored.\n", raw_tag);
+		return 1;
+	}
+	return 0;
+}
+
+static inline void finish_cmd(ctlr_info_t *h, CommandList_struct *c,
+				u32 raw_tag)
+{
+	removeQ(c);
+	if (likely(c->cmd_type == CMD_RWREQ))
+		complete_command(h, c, 0);
+	else if (c->cmd_type == CMD_IOCTL_PEND)
+		complete(c->waiting);
+#ifdef CONFIG_CISS_SCSI_TAPE
+	else if (c->cmd_type == CMD_SCSI)
+		complete_scsi_command(c, 0, raw_tag);
+#endif
+}
+
+static inline u32 next_command(ctlr_info_t *h)
+{
+	u32 a;
+
+	if (unlikely(!(h->transMethod & CFGTBL_Trans_Performant)))
+		return h->access.command_completed(h);
+
+	if ((*(h->reply_pool_head) & 1) == (h->reply_pool_wraparound)) {
+		a = *(h->reply_pool_head); /* Next cmd in ring buffer */
+		(h->reply_pool_head)++;
+		h->commands_outstanding--;
+	} else {
+		a = FIFO_EMPTY;
+	}
+	/* Check for wraparound */
+	if (h->reply_pool_head == (h->reply_pool + h->max_commands)) {
+		h->reply_pool_head = h->reply_pool;
+		h->reply_pool_wraparound ^= 1;
+	}
+	return a;
+}
+
+/* process completion of an indexed ("direct lookup") command */
+static inline u32 process_indexed_cmd(ctlr_info_t *h, u32 raw_tag)
+{
+	u32 tag_index;
+	CommandList_struct *c;
+
+	tag_index = cciss_tag_to_index(raw_tag);
+	if (bad_tag(h, tag_index, raw_tag))
+		return next_command(h);
+	c = h->cmd_pool + tag_index;
+	finish_cmd(h, c, raw_tag);
+	return next_command(h);
+}
+
+/* process completion of a non-indexed command */
+static inline u32 process_nonindexed_cmd(ctlr_info_t *h, u32 raw_tag)
+{
+	CommandList_struct *c = NULL;
+	__u32 busaddr_masked, tag_masked;
+
+	tag_masked = cciss_tag_discard_error_bits(h, raw_tag);
+	list_for_each_entry(c, &h->cmpQ, list) {
+		busaddr_masked = cciss_tag_discard_error_bits(h, c->busaddr);
+		if (busaddr_masked == tag_masked) {
+			finish_cmd(h, c, raw_tag);
+			return next_command(h);
+		}
+	}
+	bad_tag(h, h->nr_cmds + 1, raw_tag);
+	return next_command(h);
+}
+
+/* Some controllers, like p400, will give us one interrupt
+ * after a soft reset, even if we turned interrupts off.
+ * Only need to check for this in the cciss_xxx_discard_completions
+ * functions.
+ */
+static int ignore_bogus_interrupt(ctlr_info_t *h)
+{
+	if (likely(!reset_devices))
+		return 0;
+
+	if (likely(h->interrupts_enabled))
+		return 0;
+
+	dev_info(&h->pdev->dev, "Received interrupt while interrupts disabled "
+		"(known firmware bug.)  Ignoring.\n");
+
+	return 1;
+}
+
+static irqreturn_t cciss_intx_discard_completions(int irq, void *dev_id)
+{
+	ctlr_info_t *h = dev_id;
+	unsigned long flags;
+	u32 raw_tag;
+
+	if (ignore_bogus_interrupt(h))
+		return IRQ_NONE;
+
+	if (interrupt_not_for_us(h))
+		return IRQ_NONE;
+	spin_lock_irqsave(&h->lock, flags);
+	while (interrupt_pending(h)) {
+		raw_tag = get_next_completion(h);
+		while (raw_tag != FIFO_EMPTY)
+			raw_tag = next_command(h);
+	}
+	spin_unlock_irqrestore(&h->lock, flags);
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t cciss_msix_discard_completions(int irq, void *dev_id)
+{
+	ctlr_info_t *h = dev_id;
+	unsigned long flags;
+	u32 raw_tag;
+
+	if (ignore_bogus_interrupt(h))
+		return IRQ_NONE;
+
+	spin_lock_irqsave(&h->lock, flags);
+	raw_tag = get_next_completion(h);
+	while (raw_tag != FIFO_EMPTY)
+		raw_tag = next_command(h);
+	spin_unlock_irqrestore(&h->lock, flags);
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t do_cciss_intx(int irq, void *dev_id)
+{
+	ctlr_info_t *h = dev_id;
+	unsigned long flags;
+	u32 raw_tag;
+
+	if (interrupt_not_for_us(h))
+		return IRQ_NONE;
+	spin_lock_irqsave(&h->lock, flags);
+	while (interrupt_pending(h)) {
+		raw_tag = get_next_completion(h);
+		while (raw_tag != FIFO_EMPTY) {
+			if (cciss_tag_contains_index(raw_tag))
+				raw_tag = process_indexed_cmd(h, raw_tag);
+			else
+				raw_tag = process_nonindexed_cmd(h, raw_tag);
+		}
+	}
+	spin_unlock_irqrestore(&h->lock, flags);
+	return IRQ_HANDLED;
+}
+
+/* Add a second interrupt handler for MSI/MSI-X mode. In this mode we never
+ * check the interrupt pending register because it is not set.
+ */
+static irqreturn_t do_cciss_msix_intr(int irq, void *dev_id)
+{
+	ctlr_info_t *h = dev_id;
+	unsigned long flags;
+	u32 raw_tag;
+
+	spin_lock_irqsave(&h->lock, flags);
+	raw_tag = get_next_completion(h);
+	while (raw_tag != FIFO_EMPTY) {
+		if (cciss_tag_contains_index(raw_tag))
+			raw_tag = process_indexed_cmd(h, raw_tag);
+		else
+			raw_tag = process_nonindexed_cmd(h, raw_tag);
+	}
+	spin_unlock_irqrestore(&h->lock, flags);
+	return IRQ_HANDLED;
+}
+
+/**
+ * add_to_scan_list() - add controller to rescan queue
+ * @h:		      Pointer to the controller.
+ *
+ * Adds the controller to the rescan queue if not already on the queue.
+ *
+ * returns 1 if added to the queue, 0 if skipped (could be on the
+ * queue already, or the controller could be initializing or shutting
+ * down).
+ **/
+static int add_to_scan_list(struct ctlr_info *h)
+{
+	struct ctlr_info *test_h;
+	int found = 0;
+	int ret = 0;
+
+	if (h->busy_initializing)
+		return 0;
+
+	if (!mutex_trylock(&h->busy_shutting_down))
+		return 0;
+
+	mutex_lock(&scan_mutex);
+	list_for_each_entry(test_h, &scan_q, scan_list) {
+		if (test_h == h) {
+			found = 1;
+			break;
+		}
+	}
+	if (!found && !h->busy_scanning) {
+		INIT_COMPLETION(h->scan_wait);
+		list_add_tail(&h->scan_list, &scan_q);
+		ret = 1;
+	}
+	mutex_unlock(&scan_mutex);
+	mutex_unlock(&h->busy_shutting_down);
+
+	return ret;
+}
+
+/**
+ * remove_from_scan_list() - remove controller from rescan queue
+ * @h:			   Pointer to the controller.
+ *
+ * Removes the controller from the rescan queue if present. Blocks if
+ * the controller is currently conducting a rescan.  The controller
+ * can be in one of three states:
+ * 1. Doesn't need a scan
+ * 2. On the scan list, but not scanning yet (we remove it)
+ * 3. Busy scanning (and not on the list). In this case we want to wait for
+ *    the scan to complete to make sure the scanning thread for this
+ *    controller is completely idle.
+ **/
+static void remove_from_scan_list(struct ctlr_info *h)
+{
+	struct ctlr_info *test_h, *tmp_h;
+
+	mutex_lock(&scan_mutex);
+	list_for_each_entry_safe(test_h, tmp_h, &scan_q, scan_list) {
+		if (test_h == h) { /* state 2. */
+			list_del(&h->scan_list);
+			complete_all(&h->scan_wait);
+			mutex_unlock(&scan_mutex);
+			return;
+		}
+	}
+	if (h->busy_scanning) { /* state 3. */
+		mutex_unlock(&scan_mutex);
+		wait_for_completion(&h->scan_wait);
+	} else { /* state 1, nothing to do. */
+		mutex_unlock(&scan_mutex);
+	}
+}
+
+/**
+ * scan_thread() - kernel thread used to rescan controllers
+ * @data:	 Ignored.
+ *
+ * A kernel thread used scan for drive topology changes on
+ * controllers. The thread processes only one controller at a time
+ * using a queue.  Controllers are added to the queue using
+ * add_to_scan_list() and removed from the queue either after done
+ * processing or using remove_from_scan_list().
+ *
+ * returns 0.
+ **/
+static int scan_thread(void *data)
+{
+	struct ctlr_info *h;
+
+	while (1) {
+		set_current_state(TASK_INTERRUPTIBLE);
+		schedule();
+		if (kthread_should_stop())
+			break;
+
+		while (1) {
+			mutex_lock(&scan_mutex);
+			if (list_empty(&scan_q)) {
+				mutex_unlock(&scan_mutex);
+				break;
+			}
+
+			h = list_entry(scan_q.next,
+				       struct ctlr_info,
+				       scan_list);
+			list_del(&h->scan_list);
+			h->busy_scanning = 1;
+			mutex_unlock(&scan_mutex);
+
+			rebuild_lun_table(h, 0, 0);
+			complete_all(&h->scan_wait);
+			mutex_lock(&scan_mutex);
+			h->busy_scanning = 0;
+			mutex_unlock(&scan_mutex);
+		}
+	}
+
+	return 0;
+}
+
+static int check_for_unit_attention(ctlr_info_t *h, CommandList_struct *c)
+{
+	if (c->err_info->SenseInfo[2] != UNIT_ATTENTION)
+		return 0;
+
+	switch (c->err_info->SenseInfo[12]) {
+	case STATE_CHANGED:
+		dev_warn(&h->pdev->dev, "a state change "
+			"detected, command retried\n");
+		return 1;
+	break;
+	case LUN_FAILED:
+		dev_warn(&h->pdev->dev, "LUN failure "
+			"detected, action required\n");
+		return 1;
+	break;
+	case REPORT_LUNS_CHANGED:
+		dev_warn(&h->pdev->dev, "report LUN data changed\n");
+	/*
+	 * Here, we could call add_to_scan_list and wake up the scan thread,
+	 * except that it's quite likely that we will get more than one
+	 * REPORT_LUNS_CHANGED condition in quick succession, which means
+	 * that those which occur after the first one will likely happen
+	 * *during* the scan_thread's rescan.  And the rescan code is not
+	 * robust enough to restart in the middle, undoing what it has already
+	 * done, and it's not clear that it's even possible to do this, since
+	 * part of what it does is notify the block layer, which starts
+	 * doing it's own i/o to read partition tables and so on, and the
+	 * driver doesn't have visibility to know what might need undoing.
+	 * In any event, if possible, it is horribly complicated to get right
+	 * so we just don't do it for now.
+	 *
+	 * Note: this REPORT_LUNS_CHANGED condition only occurs on the MSA2012.
+	 */
+		return 1;
+	break;
+	case POWER_OR_RESET:
+		dev_warn(&h->pdev->dev,
+			"a power on or device reset detected\n");
+		return 1;
+	break;
+	case UNIT_ATTENTION_CLEARED:
+		dev_warn(&h->pdev->dev,
+			"unit attention cleared by another initiator\n");
+		return 1;
+	break;
+	default:
+		dev_warn(&h->pdev->dev, "unknown unit attention detected\n");
+		return 1;
+	}
+}
+
+/*
+ *  We cannot read the structure directly, for portability we must use
+ *   the io functions.
+ *   This is for debug only.
+ */
+static void print_cfg_table(ctlr_info_t *h)
+{
+	int i;
+	char temp_name[17];
+	CfgTable_struct *tb = h->cfgtable;
+
+	dev_dbg(&h->pdev->dev, "Controller Configuration information\n");
+	dev_dbg(&h->pdev->dev, "------------------------------------\n");
+	for (i = 0; i < 4; i++)
+		temp_name[i] = readb(&(tb->Signature[i]));
+	temp_name[4] = '\0';
+	dev_dbg(&h->pdev->dev, "   Signature = %s\n", temp_name);
+	dev_dbg(&h->pdev->dev, "   Spec Number = %d\n",
+		readl(&(tb->SpecValence)));
+	dev_dbg(&h->pdev->dev, "   Transport methods supported = 0x%x\n",
+	       readl(&(tb->TransportSupport)));
+	dev_dbg(&h->pdev->dev, "   Transport methods active = 0x%x\n",
+	       readl(&(tb->TransportActive)));
+	dev_dbg(&h->pdev->dev, "   Requested transport Method = 0x%x\n",
+	       readl(&(tb->HostWrite.TransportRequest)));
+	dev_dbg(&h->pdev->dev, "   Coalesce Interrupt Delay = 0x%x\n",
+	       readl(&(tb->HostWrite.CoalIntDelay)));
+	dev_dbg(&h->pdev->dev, "   Coalesce Interrupt Count = 0x%x\n",
+	       readl(&(tb->HostWrite.CoalIntCount)));
+	dev_dbg(&h->pdev->dev, "   Max outstanding commands = 0x%d\n",
+	       readl(&(tb->CmdsOutMax)));
+	dev_dbg(&h->pdev->dev, "   Bus Types = 0x%x\n",
+		readl(&(tb->BusTypes)));
+	for (i = 0; i < 16; i++)
+		temp_name[i] = readb(&(tb->ServerName[i]));
+	temp_name[16] = '\0';
+	dev_dbg(&h->pdev->dev, "   Server Name = %s\n", temp_name);
+	dev_dbg(&h->pdev->dev, "   Heartbeat Counter = 0x%x\n\n\n",
+		readl(&(tb->HeartBeat)));
+}
+
+static int find_PCI_BAR_index(struct pci_dev *pdev, unsigned long pci_bar_addr)
+{
+	int i, offset, mem_type, bar_type;
+	if (pci_bar_addr == PCI_BASE_ADDRESS_0)	/* looking for BAR zero? */
+		return 0;
+	offset = 0;
+	for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
+		bar_type = pci_resource_flags(pdev, i) & PCI_BASE_ADDRESS_SPACE;
+		if (bar_type == PCI_BASE_ADDRESS_SPACE_IO)
+			offset += 4;
+		else {
+			mem_type = pci_resource_flags(pdev, i) &
+			    PCI_BASE_ADDRESS_MEM_TYPE_MASK;
+			switch (mem_type) {
+			case PCI_BASE_ADDRESS_MEM_TYPE_32:
+			case PCI_BASE_ADDRESS_MEM_TYPE_1M:
+				offset += 4;	/* 32 bit */
+				break;
+			case PCI_BASE_ADDRESS_MEM_TYPE_64:
+				offset += 8;
+				break;
+			default:	/* reserved in PCI 2.2 */
+				dev_warn(&pdev->dev,
+				       "Base address is invalid\n");
+				return -1;
+				break;
+			}
+		}
+		if (offset == pci_bar_addr - PCI_BASE_ADDRESS_0)
+			return i + 1;
+	}
+	return -1;
+}
+
+/* Fill in bucket_map[], given nsgs (the max number of
+ * scatter gather elements supported) and bucket[],
+ * which is an array of 8 integers.  The bucket[] array
+ * contains 8 different DMA transfer sizes (in 16
+ * byte increments) which the controller uses to fetch
+ * commands.  This function fills in bucket_map[], which
+ * maps a given number of scatter gather elements to one of
+ * the 8 DMA transfer sizes.  The point of it is to allow the
+ * controller to only do as much DMA as needed to fetch the
+ * command, with the DMA transfer size encoded in the lower
+ * bits of the command address.
+ */
+static void  calc_bucket_map(int bucket[], int num_buckets,
+	int nsgs, int *bucket_map)
+{
+	int i, j, b, size;
+
+	/* even a command with 0 SGs requires 4 blocks */
+#define MINIMUM_TRANSFER_BLOCKS 4
+#define NUM_BUCKETS 8
+	/* Note, bucket_map must have nsgs+1 entries. */
+	for (i = 0; i <= nsgs; i++) {
+		/* Compute size of a command with i SG entries */
+		size = i + MINIMUM_TRANSFER_BLOCKS;
+		b = num_buckets; /* Assume the biggest bucket */
+		/* Find the bucket that is just big enough */
+		for (j = 0; j < 8; j++) {
+			if (bucket[j] >= size) {
+				b = j;
+				break;
+			}
+		}
+		/* for a command with i SG entries, use bucket b. */
+		bucket_map[i] = b;
+	}
+}
+
+static void __devinit cciss_wait_for_mode_change_ack(ctlr_info_t *h)
+{
+	int i;
+
+	/* under certain very rare conditions, this can take awhile.
+	 * (e.g.: hot replace a failed 144GB drive in a RAID 5 set right
+	 * as we enter this code.) */
+	for (i = 0; i < MAX_CONFIG_WAIT; i++) {
+		if (!(readl(h->vaddr + SA5_DOORBELL) & CFGTBL_ChangeReq))
+			break;
+		usleep_range(10000, 20000);
+	}
+}
+
+static __devinit void cciss_enter_performant_mode(ctlr_info_t *h,
+	u32 use_short_tags)
+{
+	/* This is a bit complicated.  There are 8 registers on
+	 * the controller which we write to to tell it 8 different
+	 * sizes of commands which there may be.  It's a way of
+	 * reducing the DMA done to fetch each command.  Encoded into
+	 * each command's tag are 3 bits which communicate to the controller
+	 * which of the eight sizes that command fits within.  The size of
+	 * each command depends on how many scatter gather entries there are.
+	 * Each SG entry requires 16 bytes.  The eight registers are programmed
+	 * with the number of 16-byte blocks a command of that size requires.
+	 * The smallest command possible requires 5 such 16 byte blocks.
+	 * the largest command possible requires MAXSGENTRIES + 4 16-byte
+	 * blocks.  Note, this only extends to the SG entries contained
+	 * within the command block, and does not extend to chained blocks
+	 * of SG elements.   bft[] contains the eight values we write to
+	 * the registers.  They are not evenly distributed, but have more
+	 * sizes for small commands, and fewer sizes for larger commands.
+	 */
+	__u32 trans_offset;
+	int bft[8] = { 5, 6, 8, 10, 12, 20, 28, MAXSGENTRIES + 4};
+			/*
+			 *  5 = 1 s/g entry or 4k
+			 *  6 = 2 s/g entry or 8k
+			 *  8 = 4 s/g entry or 16k
+			 * 10 = 6 s/g entry or 24k
+			 */
+	unsigned long register_value;
+	BUILD_BUG_ON(28 > MAXSGENTRIES + 4);
+
+	h->reply_pool_wraparound = 1; /* spec: init to 1 */
+
+	/* Controller spec: zero out this buffer. */
+	memset(h->reply_pool, 0, h->max_commands * sizeof(__u64));
+	h->reply_pool_head = h->reply_pool;
+
+	trans_offset = readl(&(h->cfgtable->TransMethodOffset));
+	calc_bucket_map(bft, ARRAY_SIZE(bft), h->maxsgentries,
+				h->blockFetchTable);
+	writel(bft[0], &h->transtable->BlockFetch0);
+	writel(bft[1], &h->transtable->BlockFetch1);
+	writel(bft[2], &h->transtable->BlockFetch2);
+	writel(bft[3], &h->transtable->BlockFetch3);
+	writel(bft[4], &h->transtable->BlockFetch4);
+	writel(bft[5], &h->transtable->BlockFetch5);
+	writel(bft[6], &h->transtable->BlockFetch6);
+	writel(bft[7], &h->transtable->BlockFetch7);
+
+	/* size of controller ring buffer */
+	writel(h->max_commands, &h->transtable->RepQSize);
+	writel(1, &h->transtable->RepQCount);
+	writel(0, &h->transtable->RepQCtrAddrLow32);
+	writel(0, &h->transtable->RepQCtrAddrHigh32);
+	writel(h->reply_pool_dhandle, &h->transtable->RepQAddr0Low32);
+	writel(0, &h->transtable->RepQAddr0High32);
+	writel(CFGTBL_Trans_Performant | use_short_tags,
+			&(h->cfgtable->HostWrite.TransportRequest));
+
+	writel(CFGTBL_ChangeReq, h->vaddr + SA5_DOORBELL);
+	cciss_wait_for_mode_change_ack(h);
+	register_value = readl(&(h->cfgtable->TransportActive));
+	if (!(register_value & CFGTBL_Trans_Performant))
+		dev_warn(&h->pdev->dev, "cciss: unable to get board into"
+					" performant mode\n");
+}
+
+static void __devinit cciss_put_controller_into_performant_mode(ctlr_info_t *h)
+{
+	__u32 trans_support;
+
+	dev_dbg(&h->pdev->dev, "Trying to put board into Performant mode\n");
+	/* Attempt to put controller into performant mode if supported */
+	/* Does board support performant mode? */
+	trans_support = readl(&(h->cfgtable->TransportSupport));
+	if (!(trans_support & PERFORMANT_MODE))
+		return;
+
+	dev_dbg(&h->pdev->dev, "Placing controller into performant mode\n");
+	/* Performant mode demands commands on a 32 byte boundary
+	 * pci_alloc_consistent aligns on page boundarys already.
+	 * Just need to check if divisible by 32
+	 */
+	if ((sizeof(CommandList_struct) % 32) != 0) {
+		dev_warn(&h->pdev->dev, "%s %d %s\n",
+			"cciss info: command size[",
+			(int)sizeof(CommandList_struct),
+			"] not divisible by 32, no performant mode..\n");
+		return;
+	}
+
+	/* Performant mode ring buffer and supporting data structures */
+	h->reply_pool = (__u64 *)pci_alloc_consistent(
+		h->pdev, h->max_commands * sizeof(__u64),
+		&(h->reply_pool_dhandle));
+
+	/* Need a block fetch table for performant mode */
+	h->blockFetchTable = kmalloc(((h->maxsgentries+1) *
+		sizeof(__u32)), GFP_KERNEL);
+
+	if ((h->reply_pool == NULL) || (h->blockFetchTable == NULL))
+		goto clean_up;
+
+	cciss_enter_performant_mode(h,
+		trans_support & CFGTBL_Trans_use_short_tags);
+
+	/* Change the access methods to the performant access methods */
+	h->access = SA5_performant_access;
+	h->transMethod = CFGTBL_Trans_Performant;
+
+	return;
+clean_up:
+	kfree(h->blockFetchTable);
+	if (h->reply_pool)
+		pci_free_consistent(h->pdev,
+				h->max_commands * sizeof(__u64),
+				h->reply_pool,
+				h->reply_pool_dhandle);
+	return;
+
+} /* cciss_put_controller_into_performant_mode */
+
+/* If MSI/MSI-X is supported by the kernel we will try to enable it on
+ * controllers that are capable. If not, we use IO-APIC mode.
+ */
+
+static void __devinit cciss_interrupt_mode(ctlr_info_t *h)
+{
+#ifdef CONFIG_PCI_MSI
+	int err;
+	struct msix_entry cciss_msix_entries[4] = { {0, 0}, {0, 1},
+	{0, 2}, {0, 3}
+	};
+
+	/* Some boards advertise MSI but don't really support it */
+	if ((h->board_id == 0x40700E11) || (h->board_id == 0x40800E11) ||
+	    (h->board_id == 0x40820E11) || (h->board_id == 0x40830E11))
+		goto default_int_mode;
+
+	if (pci_find_capability(h->pdev, PCI_CAP_ID_MSIX)) {
+		err = pci_enable_msix(h->pdev, cciss_msix_entries, 4);
+		if (!err) {
+			h->intr[0] = cciss_msix_entries[0].vector;
+			h->intr[1] = cciss_msix_entries[1].vector;
+			h->intr[2] = cciss_msix_entries[2].vector;
+			h->intr[3] = cciss_msix_entries[3].vector;
+			h->msix_vector = 1;
+			return;
+		}
+		if (err > 0) {
+			dev_warn(&h->pdev->dev,
+				"only %d MSI-X vectors available\n", err);
+			goto default_int_mode;
+		} else {
+			dev_warn(&h->pdev->dev,
+				"MSI-X init failed %d\n", err);
+			goto default_int_mode;
+		}
+	}
+	if (pci_find_capability(h->pdev, PCI_CAP_ID_MSI)) {
+		if (!pci_enable_msi(h->pdev))
+			h->msi_vector = 1;
+		else
+			dev_warn(&h->pdev->dev, "MSI init failed\n");
+	}
+default_int_mode:
+#endif				/* CONFIG_PCI_MSI */
+	/* if we get here we're going to use the default interrupt mode */
+	h->intr[PERF_MODE_INT] = h->pdev->irq;
+	return;
+}
+
+static int __devinit cciss_lookup_board_id(struct pci_dev *pdev, u32 *board_id)
+{
+	int i;
+	u32 subsystem_vendor_id, subsystem_device_id;
+
+	subsystem_vendor_id = pdev->subsystem_vendor;
+	subsystem_device_id = pdev->subsystem_device;
+	*board_id = ((subsystem_device_id << 16) & 0xffff0000) |
+			subsystem_vendor_id;
+
+	for (i = 0; i < ARRAY_SIZE(products); i++)
+		if (*board_id == products[i].board_id)
+			return i;
+	dev_warn(&pdev->dev, "unrecognized board ID: 0x%08x, ignoring.\n",
+		*board_id);
+	return -ENODEV;
+}
+
+static inline bool cciss_board_disabled(ctlr_info_t *h)
+{
+	u16 command;
+
+	(void) pci_read_config_word(h->pdev, PCI_COMMAND, &command);
+	return ((command & PCI_COMMAND_MEMORY) == 0);
+}
+
+static int __devinit cciss_pci_find_memory_BAR(struct pci_dev *pdev,
+	unsigned long *memory_bar)
+{
+	int i;
+
+	for (i = 0; i < DEVICE_COUNT_RESOURCE; i++)
+		if (pci_resource_flags(pdev, i) & IORESOURCE_MEM) {
+			/* addressing mode bits already removed */
+			*memory_bar = pci_resource_start(pdev, i);
+			dev_dbg(&pdev->dev, "memory BAR = %lx\n",
+				*memory_bar);
+			return 0;
+		}
+	dev_warn(&pdev->dev, "no memory BAR found\n");
+	return -ENODEV;
+}
+
+static int __devinit cciss_wait_for_board_state(struct pci_dev *pdev,
+	void __iomem *vaddr, int wait_for_ready)
+#define BOARD_READY 1
+#define BOARD_NOT_READY 0
+{
+	int i, iterations;
+	u32 scratchpad;
+
+	if (wait_for_ready)
+		iterations = CCISS_BOARD_READY_ITERATIONS;
+	else
+		iterations = CCISS_BOARD_NOT_READY_ITERATIONS;
+
+	for (i = 0; i < iterations; i++) {
+		scratchpad = readl(vaddr + SA5_SCRATCHPAD_OFFSET);
+		if (wait_for_ready) {
+			if (scratchpad == CCISS_FIRMWARE_READY)
+				return 0;
+		} else {
+			if (scratchpad != CCISS_FIRMWARE_READY)
+				return 0;
+		}
+		msleep(CCISS_BOARD_READY_POLL_INTERVAL_MSECS);
+	}
+	dev_warn(&pdev->dev, "board not ready, timed out.\n");
+	return -ENODEV;
+}
+
+static int __devinit cciss_find_cfg_addrs(struct pci_dev *pdev,
+	void __iomem *vaddr, u32 *cfg_base_addr, u64 *cfg_base_addr_index,
+	u64 *cfg_offset)
+{
+	*cfg_base_addr = readl(vaddr + SA5_CTCFG_OFFSET);
+	*cfg_offset = readl(vaddr + SA5_CTMEM_OFFSET);
+	*cfg_base_addr &= (u32) 0x0000ffff;
+	*cfg_base_addr_index = find_PCI_BAR_index(pdev, *cfg_base_addr);
+	if (*cfg_base_addr_index == -1) {
+		dev_warn(&pdev->dev, "cannot find cfg_base_addr_index, "
+			"*cfg_base_addr = 0x%08x\n", *cfg_base_addr);
+		return -ENODEV;
+	}
+	return 0;
+}
+
+static int __devinit cciss_find_cfgtables(ctlr_info_t *h)
+{
+	u64 cfg_offset;
+	u32 cfg_base_addr;
+	u64 cfg_base_addr_index;
+	u32 trans_offset;
+	int rc;
+
+	rc = cciss_find_cfg_addrs(h->pdev, h->vaddr, &cfg_base_addr,
+		&cfg_base_addr_index, &cfg_offset);
+	if (rc)
+		return rc;
+	h->cfgtable = remap_pci_mem(pci_resource_start(h->pdev,
+		cfg_base_addr_index) + cfg_offset, sizeof(h->cfgtable));
+	if (!h->cfgtable)
+		return -ENOMEM;
+	rc = write_driver_ver_to_cfgtable(h->cfgtable);
+	if (rc)
+		return rc;
+	/* Find performant mode table. */
+	trans_offset = readl(&h->cfgtable->TransMethodOffset);
+	h->transtable = remap_pci_mem(pci_resource_start(h->pdev,
+				cfg_base_addr_index)+cfg_offset+trans_offset,
+				sizeof(*h->transtable));
+	if (!h->transtable)
+		return -ENOMEM;
+	return 0;
+}
+
+static void __devinit cciss_get_max_perf_mode_cmds(struct ctlr_info *h)
+{
+	h->max_commands = readl(&(h->cfgtable->MaxPerformantModeCommands));
+
+	/* Limit commands in memory limited kdump scenario. */
+	if (reset_devices && h->max_commands > 32)
+		h->max_commands = 32;
+
+	if (h->max_commands < 16) {
+		dev_warn(&h->pdev->dev, "Controller reports "
+			"max supported commands of %d, an obvious lie. "
+			"Using 16.  Ensure that firmware is up to date.\n",
+			h->max_commands);
+		h->max_commands = 16;
+	}
+}
+
+/* Interrogate the hardware for some limits:
+ * max commands, max SG elements without chaining, and with chaining,
+ * SG chain block size, etc.
+ */
+static void __devinit cciss_find_board_params(ctlr_info_t *h)
+{
+	cciss_get_max_perf_mode_cmds(h);
+	h->nr_cmds = h->max_commands - 4 - cciss_tape_cmds;
+	h->maxsgentries = readl(&(h->cfgtable->MaxSGElements));
+	/*
+	 * Limit in-command s/g elements to 32 save dma'able memory.
+	 * Howvever spec says if 0, use 31
+	 */
+	h->max_cmd_sgentries = 31;
+	if (h->maxsgentries > 512) {
+		h->max_cmd_sgentries = 32;
+		h->chainsize = h->maxsgentries - h->max_cmd_sgentries + 1;
+		h->maxsgentries--; /* save one for chain pointer */
+	} else {
+		h->maxsgentries = 31; /* default to traditional values */
+		h->chainsize = 0;
+	}
+}
+
+static inline bool CISS_signature_present(ctlr_info_t *h)
+{
+	if ((readb(&h->cfgtable->Signature[0]) != 'C') ||
+	    (readb(&h->cfgtable->Signature[1]) != 'I') ||
+	    (readb(&h->cfgtable->Signature[2]) != 'S') ||
+	    (readb(&h->cfgtable->Signature[3]) != 'S')) {
+		dev_warn(&h->pdev->dev, "not a valid CISS config table\n");
+		return false;
+	}
+	return true;
+}
+
+/* Need to enable prefetch in the SCSI core for 6400 in x86 */
+static inline void cciss_enable_scsi_prefetch(ctlr_info_t *h)
+{
+#ifdef CONFIG_X86
+	u32 prefetch;
+
+	prefetch = readl(&(h->cfgtable->SCSI_Prefetch));
+	prefetch |= 0x100;
+	writel(prefetch, &(h->cfgtable->SCSI_Prefetch));
+#endif
+}
+
+/* Disable DMA prefetch for the P600.  Otherwise an ASIC bug may result
+ * in a prefetch beyond physical memory.
+ */
+static inline void cciss_p600_dma_prefetch_quirk(ctlr_info_t *h)
+{
+	u32 dma_prefetch;
+	__u32 dma_refetch;
+
+	if (h->board_id != 0x3225103C)
+		return;
+	dma_prefetch = readl(h->vaddr + I2O_DMA1_CFG);
+	dma_prefetch |= 0x8000;
+	writel(dma_prefetch, h->vaddr + I2O_DMA1_CFG);
+	pci_read_config_dword(h->pdev, PCI_COMMAND_PARITY, &dma_refetch);
+	dma_refetch |= 0x1;
+	pci_write_config_dword(h->pdev, PCI_COMMAND_PARITY, dma_refetch);
+}
+
+static int __devinit cciss_pci_init(ctlr_info_t *h)
+{
+	int prod_index, err;
+
+	prod_index = cciss_lookup_board_id(h->pdev, &h->board_id);
+	if (prod_index < 0)
+		return -ENODEV;
+	h->product_name = products[prod_index].product_name;
+	h->access = *(products[prod_index].access);
+
+	if (cciss_board_disabled(h)) {
+		dev_warn(&h->pdev->dev, "controller appears to be disabled\n");
+		return -ENODEV;
+	}
+	err = pci_enable_device(h->pdev);
+	if (err) {
+		dev_warn(&h->pdev->dev, "Unable to Enable PCI device\n");
+		return err;
+	}
+
+	err = pci_request_regions(h->pdev, "cciss");
+	if (err) {
+		dev_warn(&h->pdev->dev,
+			"Cannot obtain PCI resources, aborting\n");
+		return err;
+	}
+
+	dev_dbg(&h->pdev->dev, "irq = %x\n", h->pdev->irq);
+	dev_dbg(&h->pdev->dev, "board_id = %x\n", h->board_id);
+
+/* If the kernel supports MSI/MSI-X we will try to enable that functionality,
+ * else we use the IO-APIC interrupt assigned to us by system ROM.
+ */
+	cciss_interrupt_mode(h);
+	err = cciss_pci_find_memory_BAR(h->pdev, &h->paddr);
+	if (err)
+		goto err_out_free_res;
+	h->vaddr = remap_pci_mem(h->paddr, 0x250);
+	if (!h->vaddr) {
+		err = -ENOMEM;
+		goto err_out_free_res;
+	}
+	err = cciss_wait_for_board_state(h->pdev, h->vaddr, BOARD_READY);
+	if (err)
+		goto err_out_free_res;
+	err = cciss_find_cfgtables(h);
+	if (err)
+		goto err_out_free_res;
+	print_cfg_table(h);
+	cciss_find_board_params(h);
+
+	if (!CISS_signature_present(h)) {
+		err = -ENODEV;
+		goto err_out_free_res;
+	}
+	cciss_enable_scsi_prefetch(h);
+	cciss_p600_dma_prefetch_quirk(h);
+	cciss_put_controller_into_performant_mode(h);
+	return 0;
+
+err_out_free_res:
+	/*
+	 * Deliberately omit pci_disable_device(): it does something nasty to
+	 * Smart Array controllers that pci_enable_device does not undo
+	 */
+	if (h->transtable)
+		iounmap(h->transtable);
+	if (h->cfgtable)
+		iounmap(h->cfgtable);
+	if (h->vaddr)
+		iounmap(h->vaddr);
+	pci_release_regions(h->pdev);
+	return err;
+}
+
+/* Function to find the first free pointer into our hba[] array
+ * Returns -1 if no free entries are left.
+ */
+static int alloc_cciss_hba(struct pci_dev *pdev)
+{
+	int i;
+
+	for (i = 0; i < MAX_CTLR; i++) {
+		if (!hba[i]) {
+			ctlr_info_t *h;
+
+			h = kzalloc(sizeof(ctlr_info_t), GFP_KERNEL);
+			if (!h)
+				goto Enomem;
+			hba[i] = h;
+			return i;
+		}
+	}
+	dev_warn(&pdev->dev, "This driver supports a maximum"
+	       " of %d controllers.\n", MAX_CTLR);
+	return -1;
+Enomem:
+	dev_warn(&pdev->dev, "out of memory.\n");
+	return -1;
+}
+
+static void free_hba(ctlr_info_t *h)
+{
+	int i;
+
+	hba[h->ctlr] = NULL;
+	for (i = 0; i < h->highest_lun + 1; i++)
+		if (h->gendisk[i] != NULL)
+			put_disk(h->gendisk[i]);
+	kfree(h);
+}
+
+/* Send a message CDB to the firmware. */
+static __devinit int cciss_message(struct pci_dev *pdev, unsigned char opcode, unsigned char type)
+{
+	typedef struct {
+		CommandListHeader_struct CommandHeader;
+		RequestBlock_struct Request;
+		ErrDescriptor_struct ErrorDescriptor;
+	} Command;
+	static const size_t cmd_sz = sizeof(Command) + sizeof(ErrorInfo_struct);
+	Command *cmd;
+	dma_addr_t paddr64;
+	uint32_t paddr32, tag;
+	void __iomem *vaddr;
+	int i, err;
+
+	vaddr = ioremap_nocache(pci_resource_start(pdev, 0), pci_resource_len(pdev, 0));
+	if (vaddr == NULL)
+		return -ENOMEM;
+
+	/* The Inbound Post Queue only accepts 32-bit physical addresses for the
+	   CCISS commands, so they must be allocated from the lower 4GiB of
+	   memory. */
+	err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
+	if (err) {
+		iounmap(vaddr);
+		return -ENOMEM;
+	}
+
+	cmd = pci_alloc_consistent(pdev, cmd_sz, &paddr64);
+	if (cmd == NULL) {
+		iounmap(vaddr);
+		return -ENOMEM;
+	}
+
+	/* This must fit, because of the 32-bit consistent DMA mask.  Also,
+	   although there's no guarantee, we assume that the address is at
+	   least 4-byte aligned (most likely, it's page-aligned). */
+	paddr32 = paddr64;
+
+	cmd->CommandHeader.ReplyQueue = 0;
+	cmd->CommandHeader.SGList = 0;
+	cmd->CommandHeader.SGTotal = 0;
+	cmd->CommandHeader.Tag.lower = paddr32;
+	cmd->CommandHeader.Tag.upper = 0;
+	memset(&cmd->CommandHeader.LUN.LunAddrBytes, 0, 8);
+
+	cmd->Request.CDBLen = 16;
+	cmd->Request.Type.Type = TYPE_MSG;
+	cmd->Request.Type.Attribute = ATTR_HEADOFQUEUE;
+	cmd->Request.Type.Direction = XFER_NONE;
+	cmd->Request.Timeout = 0; /* Don't time out */
+	cmd->Request.CDB[0] = opcode;
+	cmd->Request.CDB[1] = type;
+	memset(&cmd->Request.CDB[2], 0, 14); /* the rest of the CDB is reserved */
+
+	cmd->ErrorDescriptor.Addr.lower = paddr32 + sizeof(Command);
+	cmd->ErrorDescriptor.Addr.upper = 0;
+	cmd->ErrorDescriptor.Len = sizeof(ErrorInfo_struct);
+
+	writel(paddr32, vaddr + SA5_REQUEST_PORT_OFFSET);
+
+	for (i = 0; i < 10; i++) {
+		tag = readl(vaddr + SA5_REPLY_PORT_OFFSET);
+		if ((tag & ~3) == paddr32)
+			break;
+		msleep(CCISS_POST_RESET_NOOP_TIMEOUT_MSECS);
+	}
+
+	iounmap(vaddr);
+
+	/* we leak the DMA buffer here ... no choice since the controller could
+	   still complete the command. */
+	if (i == 10) {
+		dev_err(&pdev->dev,
+			"controller message %02x:%02x timed out\n",
+			opcode, type);
+		return -ETIMEDOUT;
+	}
+
+	pci_free_consistent(pdev, cmd_sz, cmd, paddr64);
+
+	if (tag & 2) {
+		dev_err(&pdev->dev, "controller message %02x:%02x failed\n",
+			opcode, type);
+		return -EIO;
+	}
+
+	dev_info(&pdev->dev, "controller message %02x:%02x succeeded\n",
+		opcode, type);
+	return 0;
+}
+
+#define cciss_noop(p) cciss_message(p, 3, 0)
+
+static int cciss_controller_hard_reset(struct pci_dev *pdev,
+	void * __iomem vaddr, u32 use_doorbell)
+{
+	u16 pmcsr;
+	int pos;
+
+	if (use_doorbell) {
+		/* For everything after the P600, the PCI power state method
+		 * of resetting the controller doesn't work, so we have this
+		 * other way using the doorbell register.
+		 */
+		dev_info(&pdev->dev, "using doorbell to reset controller\n");
+		writel(use_doorbell, vaddr + SA5_DOORBELL);
+	} else { /* Try to do it the PCI power state way */
+
+		/* Quoting from the Open CISS Specification: "The Power
+		 * Management Control/Status Register (CSR) controls the power
+		 * state of the device.  The normal operating state is D0,
+		 * CSR=00h.  The software off state is D3, CSR=03h.  To reset
+		 * the controller, place the interface device in D3 then to D0,
+		 * this causes a secondary PCI reset which will reset the
+		 * controller." */
+
+		pos = pci_find_capability(pdev, PCI_CAP_ID_PM);
+		if (pos == 0) {
+			dev_err(&pdev->dev,
+				"cciss_controller_hard_reset: "
+				"PCI PM not supported\n");
+			return -ENODEV;
+		}
+		dev_info(&pdev->dev, "using PCI PM to reset controller\n");
+		/* enter the D3hot power management state */
+		pci_read_config_word(pdev, pos + PCI_PM_CTRL, &pmcsr);
+		pmcsr &= ~PCI_PM_CTRL_STATE_MASK;
+		pmcsr |= PCI_D3hot;
+		pci_write_config_word(pdev, pos + PCI_PM_CTRL, pmcsr);
+
+		msleep(500);
+
+		/* enter the D0 power management state */
+		pmcsr &= ~PCI_PM_CTRL_STATE_MASK;
+		pmcsr |= PCI_D0;
+		pci_write_config_word(pdev, pos + PCI_PM_CTRL, pmcsr);
+
+		/*
+		 * The P600 requires a small delay when changing states.
+		 * Otherwise we may think the board did not reset and we bail.
+		 * This for kdump only and is particular to the P600.
+		 */
+		msleep(500);
+	}
+	return 0;
+}
+
+static __devinit void init_driver_version(char *driver_version, int len)
+{
+	memset(driver_version, 0, len);
+	strncpy(driver_version, "cciss " DRIVER_NAME, len - 1);
+}
+
+static __devinit int write_driver_ver_to_cfgtable(
+	CfgTable_struct __iomem *cfgtable)
+{
+	char *driver_version;
+	int i, size = sizeof(cfgtable->driver_version);
+
+	driver_version = kmalloc(size, GFP_KERNEL);
+	if (!driver_version)
+		return -ENOMEM;
+
+	init_driver_version(driver_version, size);
+	for (i = 0; i < size; i++)
+		writeb(driver_version[i], &cfgtable->driver_version[i]);
+	kfree(driver_version);
+	return 0;
+}
+
+static __devinit void read_driver_ver_from_cfgtable(
+	CfgTable_struct __iomem *cfgtable, unsigned char *driver_ver)
+{
+	int i;
+
+	for (i = 0; i < sizeof(cfgtable->driver_version); i++)
+		driver_ver[i] = readb(&cfgtable->driver_version[i]);
+}
+
+static __devinit int controller_reset_failed(
+	CfgTable_struct __iomem *cfgtable)
+{
+
+	char *driver_ver, *old_driver_ver;
+	int rc, size = sizeof(cfgtable->driver_version);
+
+	old_driver_ver = kmalloc(2 * size, GFP_KERNEL);
+	if (!old_driver_ver)
+		return -ENOMEM;
+	driver_ver = old_driver_ver + size;
+
+	/* After a reset, the 32 bytes of "driver version" in the cfgtable
+	 * should have been changed, otherwise we know the reset failed.
+	 */
+	init_driver_version(old_driver_ver, size);
+	read_driver_ver_from_cfgtable(cfgtable, driver_ver);
+	rc = !memcmp(driver_ver, old_driver_ver, size);
+	kfree(old_driver_ver);
+	return rc;
+}
+
+/* This does a hard reset of the controller using PCI power management
+ * states or using the doorbell register. */
+static __devinit int cciss_kdump_hard_reset_controller(struct pci_dev *pdev)
+{
+	u64 cfg_offset;
+	u32 cfg_base_addr;
+	u64 cfg_base_addr_index;
+	void __iomem *vaddr;
+	unsigned long paddr;
+	u32 misc_fw_support;
+	int rc;
+	CfgTable_struct __iomem *cfgtable;
+	u32 use_doorbell;
+	u32 board_id;
+	u16 command_register;
+
+	/* For controllers as old a the p600, this is very nearly
+	 * the same thing as
+	 *
+	 * pci_save_state(pci_dev);
+	 * pci_set_power_state(pci_dev, PCI_D3hot);
+	 * pci_set_power_state(pci_dev, PCI_D0);
+	 * pci_restore_state(pci_dev);
+	 *
+	 * For controllers newer than the P600, the pci power state
+	 * method of resetting doesn't work so we have another way
+	 * using the doorbell register.
+	 */
+
+	/* Exclude 640x boards.  These are two pci devices in one slot
+	 * which share a battery backed cache module.  One controls the
+	 * cache, the other accesses the cache through the one that controls
+	 * it.  If we reset the one controlling the cache, the other will
+	 * likely not be happy.  Just forbid resetting this conjoined mess.
+	 */
+	cciss_lookup_board_id(pdev, &board_id);
+	if (!ctlr_is_resettable(board_id)) {
+		dev_warn(&pdev->dev, "Cannot reset Smart Array 640x "
+				"due to shared cache module.");
+		return -ENODEV;
+	}
+
+	/* if controller is soft- but not hard resettable... */
+	if (!ctlr_is_hard_resettable(board_id))
+		return -ENOTSUPP; /* try soft reset later. */
+
+	/* Save the PCI command register */
+	pci_read_config_word(pdev, 4, &command_register);
+	/* Turn the board off.  This is so that later pci_restore_state()
+	 * won't turn the board on before the rest of config space is ready.
+	 */
+	pci_disable_device(pdev);
+	pci_save_state(pdev);
+
+	/* find the first memory BAR, so we can find the cfg table */
+	rc = cciss_pci_find_memory_BAR(pdev, &paddr);
+	if (rc)
+		return rc;
+	vaddr = remap_pci_mem(paddr, 0x250);
+	if (!vaddr)
+		return -ENOMEM;
+
+	/* find cfgtable in order to check if reset via doorbell is supported */
+	rc = cciss_find_cfg_addrs(pdev, vaddr, &cfg_base_addr,
+					&cfg_base_addr_index, &cfg_offset);
+	if (rc)
+		goto unmap_vaddr;
+	cfgtable = remap_pci_mem(pci_resource_start(pdev,
+		       cfg_base_addr_index) + cfg_offset, sizeof(*cfgtable));
+	if (!cfgtable) {
+		rc = -ENOMEM;
+		goto unmap_vaddr;
+	}
+	rc = write_driver_ver_to_cfgtable(cfgtable);
+	if (rc)
+		goto unmap_vaddr;
+
+	/* If reset via doorbell register is supported, use that.
+	 * There are two such methods.  Favor the newest method.
+	 */
+	misc_fw_support = readl(&cfgtable->misc_fw_support);
+	use_doorbell = misc_fw_support & MISC_FW_DOORBELL_RESET2;
+	if (use_doorbell) {
+		use_doorbell = DOORBELL_CTLR_RESET2;
+	} else {
+		use_doorbell = misc_fw_support & MISC_FW_DOORBELL_RESET;
+		if (use_doorbell) {
+			dev_warn(&pdev->dev, "Controller claims that "
+				"'Bit 2 doorbell reset' is "
+				"supported, but not 'bit 5 doorbell reset'.  "
+				"Firmware update is recommended.\n");
+			rc = -ENOTSUPP; /* use the soft reset */
+			goto unmap_cfgtable;
+		}
+	}
+
+	rc = cciss_controller_hard_reset(pdev, vaddr, use_doorbell);
+	if (rc)
+		goto unmap_cfgtable;
+	pci_restore_state(pdev);
+	rc = pci_enable_device(pdev);
+	if (rc) {
+		dev_warn(&pdev->dev, "failed to enable device.\n");
+		goto unmap_cfgtable;
+	}
+	pci_write_config_word(pdev, 4, command_register);
+
+	/* Some devices (notably the HP Smart Array 5i Controller)
+	   need a little pause here */
+	msleep(CCISS_POST_RESET_PAUSE_MSECS);
+
+	/* Wait for board to become not ready, then ready. */
+	dev_info(&pdev->dev, "Waiting for board to reset.\n");
+	rc = cciss_wait_for_board_state(pdev, vaddr, BOARD_NOT_READY);
+	if (rc) {
+		dev_warn(&pdev->dev, "Failed waiting for board to hard reset."
+				"  Will try soft reset.\n");
+		rc = -ENOTSUPP; /* Not expected, but try soft reset later */
+		goto unmap_cfgtable;
+	}
+	rc = cciss_wait_for_board_state(pdev, vaddr, BOARD_READY);
+	if (rc) {
+		dev_warn(&pdev->dev,
+			"failed waiting for board to become ready "
+			"after hard reset\n");
+		goto unmap_cfgtable;
+	}
+
+	rc = controller_reset_failed(vaddr);
+	if (rc < 0)
+		goto unmap_cfgtable;
+	if (rc) {
+		dev_warn(&pdev->dev, "Unable to successfully hard reset "
+			"controller. Will try soft reset.\n");
+		rc = -ENOTSUPP; /* Not expected, but try soft reset later */
+	} else {
+		dev_info(&pdev->dev, "Board ready after hard reset.\n");
+	}
+
+unmap_cfgtable:
+	iounmap(cfgtable);
+
+unmap_vaddr:
+	iounmap(vaddr);
+	return rc;
+}
+
+static __devinit int cciss_init_reset_devices(struct pci_dev *pdev)
+{
+	int rc, i;
+
+	if (!reset_devices)
+		return 0;
+
+	/* Reset the controller with a PCI power-cycle or via doorbell */
+	rc = cciss_kdump_hard_reset_controller(pdev);
+
+	/* -ENOTSUPP here means we cannot reset the controller
+	 * but it's already (and still) up and running in
+	 * "performant mode".  Or, it might be 640x, which can't reset
+	 * due to concerns about shared bbwc between 6402/6404 pair.
+	 */
+	if (rc == -ENOTSUPP)
+		return rc; /* just try to do the kdump anyhow. */
+	if (rc)
+		return -ENODEV;
+
+	/* Now try to get the controller to respond to a no-op */
+	dev_warn(&pdev->dev, "Waiting for controller to respond to no-op\n");
+	for (i = 0; i < CCISS_POST_RESET_NOOP_RETRIES; i++) {
+		if (cciss_noop(pdev) == 0)
+			break;
+		else
+			dev_warn(&pdev->dev, "no-op failed%s\n",
+				(i < CCISS_POST_RESET_NOOP_RETRIES - 1 ?
+					"; re-trying" : ""));
+		msleep(CCISS_POST_RESET_NOOP_INTERVAL_MSECS);
+	}
+	return 0;
+}
+
+static __devinit int cciss_allocate_cmd_pool(ctlr_info_t *h)
+{
+	h->cmd_pool_bits = kmalloc(
+		DIV_ROUND_UP(h->nr_cmds, BITS_PER_LONG) *
+		sizeof(unsigned long), GFP_KERNEL);
+	h->cmd_pool = pci_alloc_consistent(h->pdev,
+		h->nr_cmds * sizeof(CommandList_struct),
+		&(h->cmd_pool_dhandle));
+	h->errinfo_pool = pci_alloc_consistent(h->pdev,
+		h->nr_cmds * sizeof(ErrorInfo_struct),
+		&(h->errinfo_pool_dhandle));
+	if ((h->cmd_pool_bits == NULL)
+		|| (h->cmd_pool == NULL)
+		|| (h->errinfo_pool == NULL)) {
+		dev_err(&h->pdev->dev, "out of memory");
+		return -ENOMEM;
+	}
+	return 0;
+}
+
+static __devinit int cciss_allocate_scatterlists(ctlr_info_t *h)
+{
+	int i;
+
+	/* zero it, so that on free we need not know how many were alloc'ed */
+	h->scatter_list = kzalloc(h->max_commands *
+				sizeof(struct scatterlist *), GFP_KERNEL);
+	if (!h->scatter_list)
+		return -ENOMEM;
+
+	for (i = 0; i < h->nr_cmds; i++) {
+		h->scatter_list[i] = kmalloc(sizeof(struct scatterlist) *
+						h->maxsgentries, GFP_KERNEL);
+		if (h->scatter_list[i] == NULL) {
+			dev_err(&h->pdev->dev, "could not allocate "
+				"s/g lists\n");
+			return -ENOMEM;
+		}
+	}
+	return 0;
+}
+
+static void cciss_free_scatterlists(ctlr_info_t *h)
+{
+	int i;
+
+	if (h->scatter_list) {
+		for (i = 0; i < h->nr_cmds; i++)
+			kfree(h->scatter_list[i]);
+		kfree(h->scatter_list);
+	}
+}
+
+static void cciss_free_cmd_pool(ctlr_info_t *h)
+{
+	kfree(h->cmd_pool_bits);
+	if (h->cmd_pool)
+		pci_free_consistent(h->pdev,
+			h->nr_cmds * sizeof(CommandList_struct),
+			h->cmd_pool, h->cmd_pool_dhandle);
+	if (h->errinfo_pool)
+		pci_free_consistent(h->pdev,
+			h->nr_cmds * sizeof(ErrorInfo_struct),
+			h->errinfo_pool, h->errinfo_pool_dhandle);
+}
+
+static int cciss_request_irq(ctlr_info_t *h,
+	irqreturn_t (*msixhandler)(int, void *),
+	irqreturn_t (*intxhandler)(int, void *))
+{
+	if (h->msix_vector || h->msi_vector) {
+		if (!request_irq(h->intr[PERF_MODE_INT], msixhandler,
+				IRQF_DISABLED, h->devname, h))
+			return 0;
+		dev_err(&h->pdev->dev, "Unable to get msi irq %d"
+			" for %s\n", h->intr[PERF_MODE_INT],
+			h->devname);
+		return -1;
+	}
+
+	if (!request_irq(h->intr[PERF_MODE_INT], intxhandler,
+			IRQF_DISABLED, h->devname, h))
+		return 0;
+	dev_err(&h->pdev->dev, "Unable to get irq %d for %s\n",
+		h->intr[PERF_MODE_INT], h->devname);
+	return -1;
+}
+
+static int __devinit cciss_kdump_soft_reset(ctlr_info_t *h)
+{
+	if (cciss_send_reset(h, CTLR_LUNID, CCISS_RESET_TYPE_CONTROLLER)) {
+		dev_warn(&h->pdev->dev, "Resetting array controller failed.\n");
+		return -EIO;
+	}
+
+	dev_info(&h->pdev->dev, "Waiting for board to soft reset.\n");
+	if (cciss_wait_for_board_state(h->pdev, h->vaddr, BOARD_NOT_READY)) {
+		dev_warn(&h->pdev->dev, "Soft reset had no effect.\n");
+		return -1;
+	}
+
+	dev_info(&h->pdev->dev, "Board reset, awaiting READY status.\n");
+	if (cciss_wait_for_board_state(h->pdev, h->vaddr, BOARD_READY)) {
+		dev_warn(&h->pdev->dev, "Board failed to become ready "
+			"after soft reset.\n");
+		return -1;
+	}
+
+	return 0;
+}
+
+static void cciss_undo_allocations_after_kdump_soft_reset(ctlr_info_t *h)
+{
+	int ctlr = h->ctlr;
+
+	free_irq(h->intr[PERF_MODE_INT], h);
+#ifdef CONFIG_PCI_MSI
+	if (h->msix_vector)
+		pci_disable_msix(h->pdev);
+	else if (h->msi_vector)
+		pci_disable_msi(h->pdev);
+#endif /* CONFIG_PCI_MSI */
+	cciss_free_sg_chain_blocks(h->cmd_sg_list, h->nr_cmds);
+	cciss_free_scatterlists(h);
+	cciss_free_cmd_pool(h);
+	kfree(h->blockFetchTable);
+	if (h->reply_pool)
+		pci_free_consistent(h->pdev, h->max_commands * sizeof(__u64),
+				h->reply_pool, h->reply_pool_dhandle);
+	if (h->transtable)
+		iounmap(h->transtable);
+	if (h->cfgtable)
+		iounmap(h->cfgtable);
+	if (h->vaddr)
+		iounmap(h->vaddr);
+	unregister_blkdev(h->major, h->devname);
+	cciss_destroy_hba_sysfs_entry(h);
+	pci_release_regions(h->pdev);
+	kfree(h);
+	hba[ctlr] = NULL;
+}
+
+/*
+ *  This is it.  Find all the controllers and register them.  I really hate
+ *  stealing all these major device numbers.
+ *  returns the number of block devices registered.
+ */
+static int __devinit cciss_init_one(struct pci_dev *pdev,
+				    const struct pci_device_id *ent)
+{
+	int i;
+	int j = 0;
+	int rc;
+	int try_soft_reset = 0;
+	int dac, return_code;
+	InquiryData_struct *inq_buff;
+	ctlr_info_t *h;
+	unsigned long flags;
+
+	rc = cciss_init_reset_devices(pdev);
+	if (rc) {
+		if (rc != -ENOTSUPP)
+			return rc;
+		/* If the reset fails in a particular way (it has no way to do
+		 * a proper hard reset, so returns -ENOTSUPP) we can try to do
+		 * a soft reset once we get the controller configured up to the
+		 * point that it can accept a command.
+		 */
+		try_soft_reset = 1;
+		rc = 0;
+	}
+
+reinit_after_soft_reset:
+
+	i = alloc_cciss_hba(pdev);
+	if (i < 0)
+		return -1;
+
+	h = hba[i];
+	h->pdev = pdev;
+	h->busy_initializing = 1;
+	INIT_LIST_HEAD(&h->cmpQ);
+	INIT_LIST_HEAD(&h->reqQ);
+	mutex_init(&h->busy_shutting_down);
+
+	if (cciss_pci_init(h) != 0)
+		goto clean_no_release_regions;
+
+	sprintf(h->devname, "cciss%d", i);
+	h->ctlr = i;
+
+	if (cciss_tape_cmds < 2)
+		cciss_tape_cmds = 2;
+	if (cciss_tape_cmds > 16)
+		cciss_tape_cmds = 16;
+
+	init_completion(&h->scan_wait);
+
+	if (cciss_create_hba_sysfs_entry(h))
+		goto clean0;
+
+	/* configure PCI DMA stuff */
+	if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(64)))
+		dac = 1;
+	else if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(32)))
+		dac = 0;
+	else {
+		dev_err(&h->pdev->dev, "no suitable DMA available\n");
+		goto clean1;
+	}
+
+	/*
+	 * register with the major number, or get a dynamic major number
+	 * by passing 0 as argument.  This is done for greater than
+	 * 8 controller support.
+	 */
+	if (i < MAX_CTLR_ORIG)
+		h->major = COMPAQ_CISS_MAJOR + i;
+	rc = register_blkdev(h->major, h->devname);
+	if (rc == -EBUSY || rc == -EINVAL) {
+		dev_err(&h->pdev->dev,
+		       "Unable to get major number %d for %s "
+		       "on hba %d\n", h->major, h->devname, i);
+		goto clean1;
+	} else {
+		if (i >= MAX_CTLR_ORIG)
+			h->major = rc;
+	}
+
+	/* make sure the board interrupts are off */
+	h->access.set_intr_mask(h, CCISS_INTR_OFF);
+	rc = cciss_request_irq(h, do_cciss_msix_intr, do_cciss_intx);
+	if (rc)
+		goto clean2;
+
+	dev_info(&h->pdev->dev, "%s: <0x%x> at PCI %s IRQ %d%s using DAC\n",
+	       h->devname, pdev->device, pci_name(pdev),
+	       h->intr[PERF_MODE_INT], dac ? "" : " not");
+
+	if (cciss_allocate_cmd_pool(h))
+		goto clean4;
+
+	if (cciss_allocate_scatterlists(h))
+		goto clean4;
+
+	h->cmd_sg_list = cciss_allocate_sg_chain_blocks(h,
+		h->chainsize, h->nr_cmds);
+	if (!h->cmd_sg_list && h->chainsize > 0)
+		goto clean4;
+
+	spin_lock_init(&h->lock);
+
+	/* Initialize the pdev driver private data.
+	   have it point to h.  */
+	pci_set_drvdata(pdev, h);
+	/* command and error info recs zeroed out before
+	   they are used */
+	memset(h->cmd_pool_bits, 0,
+	       DIV_ROUND_UP(h->nr_cmds, BITS_PER_LONG)
+			* sizeof(unsigned long));
+
+	h->num_luns = 0;
+	h->highest_lun = -1;
+	for (j = 0; j < CISS_MAX_LUN; j++) {
+		h->drv[j] = NULL;
+		h->gendisk[j] = NULL;
+	}
+
+	/* At this point, the controller is ready to take commands.
+	 * Now, if reset_devices and the hard reset didn't work, try
+	 * the soft reset and see if that works.
+	 */
+	if (try_soft_reset) {
+
+		/* This is kind of gross.  We may or may not get a completion
+		 * from the soft reset command, and if we do, then the value
+		 * from the fifo may or may not be valid.  So, we wait 10 secs
+		 * after the reset throwing away any completions we get during
+		 * that time.  Unregister the interrupt handler and register
+		 * fake ones to scoop up any residual completions.
+		 */
+		spin_lock_irqsave(&h->lock, flags);
+		h->access.set_intr_mask(h, CCISS_INTR_OFF);
+		spin_unlock_irqrestore(&h->lock, flags);
+		free_irq(h->intr[PERF_MODE_INT], h);
+		rc = cciss_request_irq(h, cciss_msix_discard_completions,
+					cciss_intx_discard_completions);
+		if (rc) {
+			dev_warn(&h->pdev->dev, "Failed to request_irq after "
+				"soft reset.\n");
+			goto clean4;
+		}
+
+		rc = cciss_kdump_soft_reset(h);
+		if (rc) {
+			dev_warn(&h->pdev->dev, "Soft reset failed.\n");
+			goto clean4;
+		}
+
+		dev_info(&h->pdev->dev, "Board READY.\n");
+		dev_info(&h->pdev->dev,
+			"Waiting for stale completions to drain.\n");
+		h->access.set_intr_mask(h, CCISS_INTR_ON);
+		msleep(10000);
+		h->access.set_intr_mask(h, CCISS_INTR_OFF);
+
+		rc = controller_reset_failed(h->cfgtable);
+		if (rc)
+			dev_info(&h->pdev->dev,
+				"Soft reset appears to have failed.\n");
+
+		/* since the controller's reset, we have to go back and re-init
+		 * everything.  Easiest to just forget what we've done and do it
+		 * all over again.
+		 */
+		cciss_undo_allocations_after_kdump_soft_reset(h);
+		try_soft_reset = 0;
+		if (rc)
+			/* don't go to clean4, we already unallocated */
+			return -ENODEV;
+
+		goto reinit_after_soft_reset;
+	}
+
+	cciss_scsi_setup(h);
+
+	/* Turn the interrupts on so we can service requests */
+	h->access.set_intr_mask(h, CCISS_INTR_ON);
+
+	/* Get the firmware version */
+	inq_buff = kzalloc(sizeof(InquiryData_struct), GFP_KERNEL);
+	if (inq_buff == NULL) {
+		dev_err(&h->pdev->dev, "out of memory\n");
+		goto clean4;
+	}
+
+	return_code = sendcmd_withirq(h, CISS_INQUIRY, inq_buff,
+		sizeof(InquiryData_struct), 0, CTLR_LUNID, TYPE_CMD);
+	if (return_code == IO_OK) {
+		h->firm_ver[0] = inq_buff->data_byte[32];
+		h->firm_ver[1] = inq_buff->data_byte[33];
+		h->firm_ver[2] = inq_buff->data_byte[34];
+		h->firm_ver[3] = inq_buff->data_byte[35];
+	} else {	 /* send command failed */
+		dev_warn(&h->pdev->dev, "unable to determine firmware"
+			" version of controller\n");
+	}
+	kfree(inq_buff);
+
+	cciss_procinit(h);
+
+	h->cciss_max_sectors = 8192;
+
+	rebuild_lun_table(h, 1, 0);
+	h->busy_initializing = 0;
+	return 1;
+
+clean4:
+	cciss_free_cmd_pool(h);
+	cciss_free_scatterlists(h);
+	cciss_free_sg_chain_blocks(h->cmd_sg_list, h->nr_cmds);
+	free_irq(h->intr[PERF_MODE_INT], h);
+clean2:
+	unregister_blkdev(h->major, h->devname);
+clean1:
+	cciss_destroy_hba_sysfs_entry(h);
+clean0:
+	pci_release_regions(pdev);
+clean_no_release_regions:
+	h->busy_initializing = 0;
+
+	/*
+	 * Deliberately omit pci_disable_device(): it does something nasty to
+	 * Smart Array controllers that pci_enable_device does not undo
+	 */
+	pci_set_drvdata(pdev, NULL);
+	free_hba(h);
+	return -1;
+}
+
+static void cciss_shutdown(struct pci_dev *pdev)
+{
+	ctlr_info_t *h;
+	char *flush_buf;
+	int return_code;
+
+	h = pci_get_drvdata(pdev);
+	flush_buf = kzalloc(4, GFP_KERNEL);
+	if (!flush_buf) {
+		dev_warn(&h->pdev->dev, "cache not flushed, out of memory.\n");
+		return;
+	}
+	/* write all data in the battery backed cache to disk */
+	memset(flush_buf, 0, 4);
+	return_code = sendcmd_withirq(h, CCISS_CACHE_FLUSH, flush_buf,
+		4, 0, CTLR_LUNID, TYPE_CMD);
+	kfree(flush_buf);
+	if (return_code != IO_OK)
+		dev_warn(&h->pdev->dev, "Error flushing cache\n");
+	h->access.set_intr_mask(h, CCISS_INTR_OFF);
+	free_irq(h->intr[PERF_MODE_INT], h);
+}
+
+static void __devexit cciss_remove_one(struct pci_dev *pdev)
+{
+	ctlr_info_t *h;
+	int i, j;
+
+	if (pci_get_drvdata(pdev) == NULL) {
+		dev_err(&pdev->dev, "Unable to remove device\n");
+		return;
+	}
+
+	h = pci_get_drvdata(pdev);
+	i = h->ctlr;
+	if (hba[i] == NULL) {
+		dev_err(&pdev->dev, "device appears to already be removed\n");
+		return;
+	}
+
+	mutex_lock(&h->busy_shutting_down);
+
+	remove_from_scan_list(h);
+	remove_proc_entry(h->devname, proc_cciss);
+	unregister_blkdev(h->major, h->devname);
+
+	/* remove it from the disk list */
+	for (j = 0; j < CISS_MAX_LUN; j++) {
+		struct gendisk *disk = h->gendisk[j];
+		if (disk) {
+			struct request_queue *q = disk->queue;
+
+			if (disk->flags & GENHD_FL_UP) {
+				cciss_destroy_ld_sysfs_entry(h, j, 1);
+				del_gendisk(disk);
+			}
+			if (q)
+				blk_cleanup_queue(q);
+		}
+	}
+
+#ifdef CONFIG_CISS_SCSI_TAPE
+	cciss_unregister_scsi(h);	/* unhook from SCSI subsystem */
+#endif
+
+	cciss_shutdown(pdev);
+
+#ifdef CONFIG_PCI_MSI
+	if (h->msix_vector)
+		pci_disable_msix(h->pdev);
+	else if (h->msi_vector)
+		pci_disable_msi(h->pdev);
+#endif				/* CONFIG_PCI_MSI */
+
+	iounmap(h->transtable);
+	iounmap(h->cfgtable);
+	iounmap(h->vaddr);
+
+	cciss_free_cmd_pool(h);
+	/* Free up sg elements */
+	for (j = 0; j < h->nr_cmds; j++)
+		kfree(h->scatter_list[j]);
+	kfree(h->scatter_list);
+	cciss_free_sg_chain_blocks(h->cmd_sg_list, h->nr_cmds);
+	kfree(h->blockFetchTable);
+	if (h->reply_pool)
+		pci_free_consistent(h->pdev, h->max_commands * sizeof(__u64),
+				h->reply_pool, h->reply_pool_dhandle);
+	/*
+	 * Deliberately omit pci_disable_device(): it does something nasty to
+	 * Smart Array controllers that pci_enable_device does not undo
+	 */
+	pci_release_regions(pdev);
+	pci_set_drvdata(pdev, NULL);
+	cciss_destroy_hba_sysfs_entry(h);
+	mutex_unlock(&h->busy_shutting_down);
+	free_hba(h);
+}
+
+static struct pci_driver cciss_pci_driver = {
+	.name = "cciss",
+	.probe = cciss_init_one,
+	.remove = __devexit_p(cciss_remove_one),
+	.id_table = cciss_pci_device_id,	/* id_table */
+	.shutdown = cciss_shutdown,
+};
+
+/*
+ *  This is it.  Register the PCI driver information for the cards we control
+ *  the OS will call our registered routines when it finds one of our cards.
+ */
+static int __init cciss_init(void)
+{
+	int err;
+
+	/*
+	 * The hardware requires that commands are aligned on a 64-bit
+	 * boundary. Given that we use pci_alloc_consistent() to allocate an
+	 * array of them, the size must be a multiple of 8 bytes.
+	 */
+	BUILD_BUG_ON(sizeof(CommandList_struct) % COMMANDLIST_ALIGNMENT);
+	printk(KERN_INFO DRIVER_NAME "\n");
+
+	err = bus_register(&cciss_bus_type);
+	if (err)
+		return err;
+
+	/* Start the scan thread */
+	cciss_scan_thread = kthread_run(scan_thread, NULL, "cciss_scan");
+	if (IS_ERR(cciss_scan_thread)) {
+		err = PTR_ERR(cciss_scan_thread);
+		goto err_bus_unregister;
+	}
+
+	/* Register for our PCI devices */
+	err = pci_register_driver(&cciss_pci_driver);
+	if (err)
+		goto err_thread_stop;
+
+	return err;
+
+err_thread_stop:
+	kthread_stop(cciss_scan_thread);
+err_bus_unregister:
+	bus_unregister(&cciss_bus_type);
+
+	return err;
+}
+
+static void __exit cciss_cleanup(void)
+{
+	int i;
+
+	pci_unregister_driver(&cciss_pci_driver);
+	/* double check that all controller entrys have been removed */
+	for (i = 0; i < MAX_CTLR; i++) {
+		if (hba[i] != NULL) {
+			dev_warn(&hba[i]->pdev->dev,
+				"had to remove controller\n");
+			cciss_remove_one(hba[i]->pdev);
+		}
+	}
+	kthread_stop(cciss_scan_thread);
+	if (proc_cciss)
+		remove_proc_entry("driver/cciss", NULL);
+	bus_unregister(&cciss_bus_type);
+}
+
+module_init(cciss_init);
+module_exit(cciss_cleanup);
diff --git a/drivers/block/cciss.h b/drivers/block/cciss.h
new file mode 100644
index 00000000..c049548e
--- /dev/null
+++ b/drivers/block/cciss.h
@@ -0,0 +1,434 @@
+#ifndef CCISS_H
+#define CCISS_H
+
+#include <linux/genhd.h>
+#include <linux/mutex.h>
+
+#include "cciss_cmd.h"
+
+
+#define NWD_SHIFT	4
+#define MAX_PART	(1 << NWD_SHIFT)
+
+#define IO_OK		0
+#define IO_ERROR	1
+#define IO_NEEDS_RETRY  3
+
+#define VENDOR_LEN	8
+#define MODEL_LEN	16
+#define REV_LEN		4
+
+struct ctlr_info;
+typedef struct ctlr_info ctlr_info_t;
+
+struct access_method {
+	void (*submit_command)(ctlr_info_t *h, CommandList_struct *c);
+	void (*set_intr_mask)(ctlr_info_t *h, unsigned long val);
+	unsigned long (*fifo_full)(ctlr_info_t *h);
+	bool (*intr_pending)(ctlr_info_t *h);
+	unsigned long (*command_completed)(ctlr_info_t *h);
+};
+typedef struct _drive_info_struct
+{
+	unsigned char LunID[8];
+	int 	usage_count;
+	struct request_queue *queue;
+	sector_t nr_blocks;
+	int	block_size;
+	int 	heads;
+	int	sectors;
+	int 	cylinders;
+	int	raid_level; /* set to -1 to indicate that
+			     * the drive is not in use/configured
+			     */
+	int	busy_configuring; /* This is set when a drive is being removed
+				   * to prevent it from being opened or it's
+				   * queue from being started.
+				   */
+	struct	device dev;
+	__u8 serial_no[16]; /* from inquiry page 0x83,
+			     * not necc. null terminated.
+			     */
+	char vendor[VENDOR_LEN + 1]; /* SCSI vendor string */
+	char model[MODEL_LEN + 1];   /* SCSI model string */
+	char rev[REV_LEN + 1];       /* SCSI revision string */
+	char device_initialized;     /* indicates whether dev is initialized */
+} drive_info_struct;
+
+struct ctlr_info
+{
+	int	ctlr;
+	char	devname[8];
+	char    *product_name;
+	char	firm_ver[4]; /* Firmware version */
+	struct pci_dev *pdev;
+	__u32	board_id;
+	void __iomem *vaddr;
+	unsigned long paddr;
+	int 	nr_cmds; /* Number of commands allowed on this controller */
+	CfgTable_struct __iomem *cfgtable;
+	int	interrupts_enabled;
+	int	major;
+	int 	max_commands;
+	int	commands_outstanding;
+	int 	max_outstanding; /* Debug */ 
+	int	num_luns;
+	int 	highest_lun;
+	int	usage_count;  /* number of opens all all minor devices */
+	/* Need space for temp sg list
+	 * number of scatter/gathers supported
+	 * number of scatter/gathers in chained block
+	 */
+	struct	scatterlist **scatter_list;
+	int	maxsgentries;
+	int	chainsize;
+	int	max_cmd_sgentries;
+	SGDescriptor_struct **cmd_sg_list;
+
+#	define PERF_MODE_INT	0
+#	define DOORBELL_INT	1
+#	define SIMPLE_MODE_INT	2
+#	define MEMQ_MODE_INT	3
+	unsigned int intr[4];
+	unsigned int msix_vector;
+	unsigned int msi_vector;
+	int 	cciss_max_sectors;
+	BYTE	cciss_read;
+	BYTE	cciss_write;
+	BYTE	cciss_read_capacity;
+
+	/* information about each logical volume */
+	drive_info_struct *drv[CISS_MAX_LUN];
+
+	struct access_method access;
+
+	/* queue and queue Info */ 
+	struct list_head reqQ;
+	struct list_head cmpQ;
+	unsigned int Qdepth;
+	unsigned int maxQsinceinit;
+	unsigned int maxSG;
+	spinlock_t lock;
+
+	/* pointers to command and error info pool */
+	CommandList_struct 	*cmd_pool;
+	dma_addr_t		cmd_pool_dhandle; 
+	ErrorInfo_struct 	*errinfo_pool;
+	dma_addr_t		errinfo_pool_dhandle; 
+        unsigned long  		*cmd_pool_bits;
+	int			nr_allocs;
+	int			nr_frees; 
+	int			busy_configuring;
+	int			busy_initializing;
+	int			busy_scanning;
+	struct mutex		busy_shutting_down;
+
+	/* This element holds the zero based queue number of the last
+	 * queue to be started.  It is used for fairness.
+	*/
+	int			next_to_run;
+
+	/* Disk structures we need to pass back */
+	struct gendisk   *gendisk[CISS_MAX_LUN];
+#ifdef CONFIG_CISS_SCSI_TAPE
+	struct cciss_scsi_adapter_data_t *scsi_ctlr;
+#endif
+	unsigned char alive;
+	struct list_head scan_list;
+	struct completion scan_wait;
+	struct device dev;
+	/*
+	 * Performant mode tables.
+	 */
+	u32 trans_support;
+	u32 trans_offset;
+	struct TransTable_struct *transtable;
+	unsigned long transMethod;
+
+	/*
+	 * Performant mode completion buffer
+	 */
+	u64 *reply_pool;
+	dma_addr_t reply_pool_dhandle;
+	u64 *reply_pool_head;
+	size_t reply_pool_size;
+	unsigned char reply_pool_wraparound;
+	u32 *blockFetchTable;
+};
+
+/*  Defining the diffent access_methods
+ *
+ * Memory mapped FIFO interface (SMART 53xx cards)
+ */
+#define SA5_DOORBELL	0x20
+#define SA5_REQUEST_PORT_OFFSET	0x40
+#define SA5_REPLY_INTR_MASK_OFFSET	0x34
+#define SA5_REPLY_PORT_OFFSET		0x44
+#define SA5_INTR_STATUS		0x30
+#define SA5_SCRATCHPAD_OFFSET	0xB0
+
+#define SA5_CTCFG_OFFSET	0xB4
+#define SA5_CTMEM_OFFSET	0xB8
+
+#define SA5_INTR_OFF		0x08
+#define SA5B_INTR_OFF		0x04
+#define SA5_INTR_PENDING	0x08
+#define SA5B_INTR_PENDING	0x04
+#define FIFO_EMPTY		0xffffffff	
+#define CCISS_FIRMWARE_READY	0xffff0000 /* value in scratchpad register */
+/* Perf. mode flags */
+#define SA5_PERF_INTR_PENDING	0x04
+#define SA5_PERF_INTR_OFF	0x05
+#define SA5_OUTDB_STATUS_PERF_BIT	0x01
+#define SA5_OUTDB_CLEAR_PERF_BIT	0x01
+#define SA5_OUTDB_CLEAR         0xA0
+#define SA5_OUTDB_CLEAR_PERF_BIT        0x01
+#define SA5_OUTDB_STATUS        0x9C
+
+
+#define  CISS_ERROR_BIT		0x02
+
+#define CCISS_INTR_ON 	1 
+#define CCISS_INTR_OFF	0
+
+
+/* CCISS_BOARD_READY_WAIT_SECS is how long to wait for a board
+ * to become ready, in seconds, before giving up on it.
+ * CCISS_BOARD_READY_POLL_INTERVAL_MSECS * is how long to wait
+ * between polling the board to see if it is ready, in
+ * milliseconds.  CCISS_BOARD_READY_ITERATIONS is derived
+ * the above.
+ */
+#define CCISS_BOARD_READY_WAIT_SECS (120)
+#define CCISS_BOARD_NOT_READY_WAIT_SECS (100)
+#define CCISS_BOARD_READY_POLL_INTERVAL_MSECS (100)
+#define CCISS_BOARD_READY_ITERATIONS \
+	((CCISS_BOARD_READY_WAIT_SECS * 1000) / \
+		CCISS_BOARD_READY_POLL_INTERVAL_MSECS)
+#define CCISS_BOARD_NOT_READY_ITERATIONS \
+	((CCISS_BOARD_NOT_READY_WAIT_SECS * 1000) / \
+		CCISS_BOARD_READY_POLL_INTERVAL_MSECS)
+#define CCISS_POST_RESET_PAUSE_MSECS (3000)
+#define CCISS_POST_RESET_NOOP_INTERVAL_MSECS (4000)
+#define CCISS_POST_RESET_NOOP_RETRIES (12)
+#define CCISS_POST_RESET_NOOP_TIMEOUT_MSECS (10000)
+
+/* 
+	Send the command to the hardware 
+*/
+static void SA5_submit_command( ctlr_info_t *h, CommandList_struct *c) 
+{
+#ifdef CCISS_DEBUG
+	printk(KERN_WARNING "cciss%d: Sending %08x - down to controller\n",
+			h->ctlr, c->busaddr);
+#endif /* CCISS_DEBUG */
+         writel(c->busaddr, h->vaddr + SA5_REQUEST_PORT_OFFSET);
+	readl(h->vaddr + SA5_SCRATCHPAD_OFFSET);
+	 h->commands_outstanding++;
+	 if ( h->commands_outstanding > h->max_outstanding)
+		h->max_outstanding = h->commands_outstanding;
+}
+
+/*  
+ *  This card is the opposite of the other cards.  
+ *   0 turns interrupts on... 
+ *   0x08 turns them off... 
+ */
+static void SA5_intr_mask(ctlr_info_t *h, unsigned long val)
+{
+	if (val) 
+	{ /* Turn interrupts on */
+		h->interrupts_enabled = 1;
+		writel(0, h->vaddr + SA5_REPLY_INTR_MASK_OFFSET);
+		(void) readl(h->vaddr + SA5_REPLY_INTR_MASK_OFFSET);
+	} else /* Turn them off */
+	{
+		h->interrupts_enabled = 0;
+        	writel( SA5_INTR_OFF, 
+			h->vaddr + SA5_REPLY_INTR_MASK_OFFSET);
+		(void) readl(h->vaddr + SA5_REPLY_INTR_MASK_OFFSET);
+	}
+}
+/*
+ *  This card is the opposite of the other cards.
+ *   0 turns interrupts on...
+ *   0x04 turns them off...
+ */
+static void SA5B_intr_mask(ctlr_info_t *h, unsigned long val)
+{
+        if (val)
+        { /* Turn interrupts on */
+		h->interrupts_enabled = 1;
+                writel(0, h->vaddr + SA5_REPLY_INTR_MASK_OFFSET);
+		(void) readl(h->vaddr + SA5_REPLY_INTR_MASK_OFFSET);
+        } else /* Turn them off */
+        {
+		h->interrupts_enabled = 0;
+                writel( SA5B_INTR_OFF,
+                        h->vaddr + SA5_REPLY_INTR_MASK_OFFSET);
+		(void) readl(h->vaddr + SA5_REPLY_INTR_MASK_OFFSET);
+        }
+}
+
+/* Performant mode intr_mask */
+static void SA5_performant_intr_mask(ctlr_info_t *h, unsigned long val)
+{
+	if (val) { /* turn on interrupts */
+		h->interrupts_enabled = 1;
+		writel(0, h->vaddr + SA5_REPLY_INTR_MASK_OFFSET);
+		(void) readl(h->vaddr + SA5_REPLY_INTR_MASK_OFFSET);
+	} else {
+		h->interrupts_enabled = 0;
+		writel(SA5_PERF_INTR_OFF,
+				h->vaddr + SA5_REPLY_INTR_MASK_OFFSET);
+		(void) readl(h->vaddr + SA5_REPLY_INTR_MASK_OFFSET);
+	}
+}
+
+/*
+ *  Returns true if fifo is full.  
+ * 
+ */ 
+static unsigned long SA5_fifo_full(ctlr_info_t *h)
+{
+	if( h->commands_outstanding >= h->max_commands)
+		return(1);
+	else 
+		return(0);
+
+}
+/* 
+ *   returns value read from hardware. 
+ *     returns FIFO_EMPTY if there is nothing to read 
+ */ 
+static unsigned long SA5_completed(ctlr_info_t *h)
+{
+	unsigned long register_value 
+		= readl(h->vaddr + SA5_REPLY_PORT_OFFSET);
+	if(register_value != FIFO_EMPTY)
+	{
+		h->commands_outstanding--;
+#ifdef CCISS_DEBUG
+		printk("cciss:  Read %lx back from board\n", register_value);
+#endif /* CCISS_DEBUG */ 
+	} 
+#ifdef CCISS_DEBUG
+	else
+	{
+		printk("cciss:  FIFO Empty read\n");
+	}
+#endif 
+	return ( register_value); 
+
+}
+
+/* Performant mode command completed */
+static unsigned long SA5_performant_completed(ctlr_info_t *h)
+{
+	unsigned long register_value = FIFO_EMPTY;
+
+	/* flush the controller write of the reply queue by reading
+	 * outbound doorbell status register.
+	 */
+	register_value = readl(h->vaddr + SA5_OUTDB_STATUS);
+	/* msi auto clears the interrupt pending bit. */
+	if (!(h->msi_vector || h->msix_vector)) {
+		writel(SA5_OUTDB_CLEAR_PERF_BIT, h->vaddr + SA5_OUTDB_CLEAR);
+		/* Do a read in order to flush the write to the controller
+		 * (as per spec.)
+		 */
+		register_value = readl(h->vaddr + SA5_OUTDB_STATUS);
+	}
+
+	if ((*(h->reply_pool_head) & 1) == (h->reply_pool_wraparound)) {
+		register_value = *(h->reply_pool_head);
+		(h->reply_pool_head)++;
+		h->commands_outstanding--;
+	} else {
+		register_value = FIFO_EMPTY;
+	}
+	/* Check for wraparound */
+	if (h->reply_pool_head == (h->reply_pool + h->max_commands)) {
+		h->reply_pool_head = h->reply_pool;
+		h->reply_pool_wraparound ^= 1;
+	}
+
+	return register_value;
+}
+/*
+ *	Returns true if an interrupt is pending.. 
+ */
+static bool SA5_intr_pending(ctlr_info_t *h)
+{
+	unsigned long register_value  = 
+		readl(h->vaddr + SA5_INTR_STATUS);
+#ifdef CCISS_DEBUG
+	printk("cciss: intr_pending %lx\n", register_value);
+#endif  /* CCISS_DEBUG */
+	if( register_value &  SA5_INTR_PENDING) 
+		return  1;	
+	return 0 ;
+}
+
+/*
+ *      Returns true if an interrupt is pending..
+ */
+static bool SA5B_intr_pending(ctlr_info_t *h)
+{
+        unsigned long register_value  =
+                readl(h->vaddr + SA5_INTR_STATUS);
+#ifdef CCISS_DEBUG
+        printk("cciss: intr_pending %lx\n", register_value);
+#endif  /* CCISS_DEBUG */
+        if( register_value &  SA5B_INTR_PENDING)
+                return  1;
+        return 0 ;
+}
+
+static bool SA5_performant_intr_pending(ctlr_info_t *h)
+{
+	unsigned long register_value = readl(h->vaddr + SA5_INTR_STATUS);
+
+	if (!register_value)
+		return false;
+
+	if (h->msi_vector || h->msix_vector)
+		return true;
+
+	/* Read outbound doorbell to flush */
+	register_value = readl(h->vaddr + SA5_OUTDB_STATUS);
+	return register_value & SA5_OUTDB_STATUS_PERF_BIT;
+}
+
+static struct access_method SA5_access = {
+	SA5_submit_command,
+	SA5_intr_mask,
+	SA5_fifo_full,
+	SA5_intr_pending,
+	SA5_completed,
+};
+
+static struct access_method SA5B_access = {
+        SA5_submit_command,
+        SA5B_intr_mask,
+        SA5_fifo_full,
+        SA5B_intr_pending,
+        SA5_completed,
+};
+
+static struct access_method SA5_performant_access = {
+	SA5_submit_command,
+	SA5_performant_intr_mask,
+	SA5_fifo_full,
+	SA5_performant_intr_pending,
+	SA5_performant_completed,
+};
+
+struct board_type {
+	__u32	board_id;
+	char	*product_name;
+	struct access_method *access;
+	int nr_cmds; /* Max cmds this kind of ctlr can handle. */
+};
+
+#endif /* CCISS_H */
diff --git a/drivers/block/cciss_cmd.h b/drivers/block/cciss_cmd.h
new file mode 100644
index 00000000..d9be6b4d
--- /dev/null
+++ b/drivers/block/cciss_cmd.h
@@ -0,0 +1,269 @@
+#ifndef CCISS_CMD_H
+#define CCISS_CMD_H
+
+#include <linux/cciss_defs.h>
+
+/* DEFINES */
+#define CISS_VERSION "1.00"
+
+/* general boundary definitions */
+#define MAXSGENTRIES            32
+#define CCISS_SG_CHAIN          0x80000000
+#define MAXREPLYQS              256
+
+/* Unit Attentions ASC's as defined for the MSA2012sa */
+#define POWER_OR_RESET			0x29
+#define STATE_CHANGED			0x2a
+#define UNIT_ATTENTION_CLEARED		0x2f
+#define LUN_FAILED			0x3e
+#define REPORT_LUNS_CHANGED		0x3f
+
+/* Unit Attentions ASCQ's as defined for the MSA2012sa */
+
+	/* These ASCQ's defined for ASC = POWER_OR_RESET */
+#define POWER_ON_RESET			0x00
+#define POWER_ON_REBOOT			0x01
+#define SCSI_BUS_RESET			0x02
+#define MSA_TARGET_RESET		0x03
+#define CONTROLLER_FAILOVER		0x04
+#define TRANSCEIVER_SE			0x05
+#define TRANSCEIVER_LVD			0x06
+
+	/* These ASCQ's defined for ASC = STATE_CHANGED */
+#define RESERVATION_PREEMPTED		0x03
+#define ASYM_ACCESS_CHANGED		0x06
+#define LUN_CAPACITY_CHANGED		0x09
+
+/* config space register offsets */
+#define CFG_VENDORID            0x00
+#define CFG_DEVICEID            0x02
+#define CFG_I2OBAR              0x10
+#define CFG_MEM1BAR             0x14
+
+/* i2o space register offsets */
+#define I2O_IBDB_SET            0x20
+#define I2O_IBDB_CLEAR          0x70
+#define I2O_INT_STATUS          0x30
+#define I2O_INT_MASK            0x34
+#define I2O_IBPOST_Q            0x40
+#define I2O_OBPOST_Q            0x44
+#define I2O_DMA1_CFG		0x214
+
+/* Configuration Table */
+#define CFGTBL_ChangeReq        0x00000001l
+#define CFGTBL_AccCmds          0x00000001l
+#define DOORBELL_CTLR_RESET     0x00000004l
+#define DOORBELL_CTLR_RESET2    0x00000020l
+
+#define CFGTBL_Trans_Simple     0x00000002l
+#define CFGTBL_Trans_Performant 0x00000004l
+#define CFGTBL_Trans_use_short_tags 0x20000000l
+
+#define CFGTBL_BusType_Ultra2   0x00000001l
+#define CFGTBL_BusType_Ultra3   0x00000002l
+#define CFGTBL_BusType_Fibre1G  0x00000100l
+#define CFGTBL_BusType_Fibre2G  0x00000200l
+typedef struct _vals32
+{
+        __u32   lower;
+        __u32   upper;
+} vals32;
+
+typedef union _u64bit
+{
+   vals32	val32;
+   __u64	val;
+} u64bit;
+
+/* Type defs used in the following structs */
+#define QWORD vals32 
+
+/* STRUCTURES */
+#define CISS_MAX_PHYS_LUN	1024
+/* SCSI-3 Cmmands */
+
+#pragma pack(1)	
+
+#define CISS_INQUIRY 0x12
+/* Date returned */
+typedef struct _InquiryData_struct
+{
+  BYTE data_byte[36];
+} InquiryData_struct;
+
+#define CISS_REPORT_LOG 0xc2    /* Report Logical LUNs */
+#define CISS_REPORT_PHYS 0xc3   /* Report Physical LUNs */
+/* Data returned */
+typedef struct _ReportLUNdata_struct
+{
+  BYTE LUNListLength[4];
+  DWORD reserved;
+  BYTE LUN[CISS_MAX_LUN][8];
+} ReportLunData_struct;
+
+#define CCISS_READ_CAPACITY 0x25 /* Read Capacity */ 
+typedef struct _ReadCapdata_struct
+{
+  BYTE total_size[4];	/* Total size in blocks */
+  BYTE block_size[4];	/* Size of blocks in bytes */
+} ReadCapdata_struct;
+
+#define CCISS_READ_CAPACITY_16 0x9e /* Read Capacity 16 */
+
+/* service action to differentiate a 16 byte read capacity from
+   other commands that use the 0x9e SCSI op code */
+
+#define CCISS_READ_CAPACITY_16_SERVICE_ACT 0x10
+
+typedef struct _ReadCapdata_struct_16
+{
+	BYTE total_size[8];   /* Total size in blocks */
+	BYTE block_size[4];   /* Size of blocks in bytes */
+	BYTE prot_en:1;       /* protection enable bit */
+	BYTE rto_en:1;        /* reference tag own enable bit */
+	BYTE reserved:6;      /* reserved bits */
+	BYTE reserved2[18];   /* reserved bytes per spec */
+} ReadCapdata_struct_16;
+
+/* Define the supported read/write commands for cciss based controllers */
+
+#define CCISS_READ_10   0x28    /* Read(10)  */
+#define CCISS_WRITE_10  0x2a    /* Write(10) */
+#define CCISS_READ_16   0x88    /* Read(16)  */
+#define CCISS_WRITE_16  0x8a    /* Write(16) */
+
+/* Define the CDB lengths supported by cciss based controllers */
+
+#define CDB_LEN10	10
+#define CDB_LEN16	16
+
+/* BMIC commands */
+#define BMIC_READ 0x26
+#define BMIC_WRITE 0x27
+#define BMIC_CACHE_FLUSH 0xc2
+#define CCISS_CACHE_FLUSH 0x01	/* C2 was already being used by CCISS */
+
+#define CCISS_ABORT_MSG 0x00
+#define CCISS_RESET_MSG 0x01
+#define CCISS_RESET_TYPE_CONTROLLER 0x00
+#define CCISS_RESET_TYPE_BUS 0x01
+#define CCISS_RESET_TYPE_TARGET 0x03
+#define CCISS_RESET_TYPE_LUN 0x04
+#define CCISS_NOOP_MSG 0x03
+
+/* Command List Structure */
+#define CTLR_LUNID "\0\0\0\0\0\0\0\0"
+
+typedef struct _CommandListHeader_struct {
+  BYTE              ReplyQueue;
+  BYTE              SGList;
+  HWORD             SGTotal;
+  QWORD             Tag;
+  LUNAddr_struct    LUN;
+} CommandListHeader_struct;
+typedef struct _ErrDescriptor_struct {
+  QWORD  Addr;
+  DWORD  Len;
+} ErrDescriptor_struct;
+typedef struct _SGDescriptor_struct {
+  QWORD  Addr;
+  DWORD  Len;
+  DWORD  Ext;
+} SGDescriptor_struct;
+
+/* Command types */
+#define CMD_RWREQ       0x00
+#define CMD_IOCTL_PEND  0x01
+#define CMD_SCSI	0x03
+#define CMD_MSG_DONE	0x04
+#define CMD_MSG_TIMEOUT 0x05
+#define CMD_MSG_STALE	0xff
+
+/* This structure needs to be divisible by COMMANDLIST_ALIGNMENT
+ * because low bits of the address are used to to indicate that
+ * whether the tag contains an index or an address.  PAD_32 and
+ * PAD_64 can be adjusted independently as needed for 32-bit
+ * and 64-bits systems.
+ */
+#define COMMANDLIST_ALIGNMENT (32)
+#define IS_64_BIT ((sizeof(long) - 4)/4)
+#define IS_32_BIT (!IS_64_BIT)
+#define PAD_32 (0)
+#define PAD_64 (4)
+#define PADSIZE (IS_32_BIT * PAD_32 + IS_64_BIT * PAD_64)
+#define DIRECT_LOOKUP_BIT 0x10
+#define DIRECT_LOOKUP_SHIFT 5
+
+typedef struct _CommandList_struct {
+  CommandListHeader_struct Header;
+  RequestBlock_struct      Request;
+  ErrDescriptor_struct     ErrDesc;
+  SGDescriptor_struct      SG[MAXSGENTRIES];
+	/* information associated with the command */ 
+  __u32			   busaddr; /* physical address of this record */
+  ErrorInfo_struct * 	   err_info; /* pointer to the allocated mem */ 
+  int			   ctlr;
+  int			   cmd_type; 
+  long			   cmdindex;
+  struct list_head list;
+  struct request *	   rq;
+  struct completion *waiting;
+  int	 retry_count;
+  void * scsi_cmd;
+  char pad[PADSIZE];
+} CommandList_struct;
+
+/* Configuration Table Structure */
+typedef struct _HostWrite_struct {
+  DWORD TransportRequest;
+  DWORD Reserved;
+  DWORD CoalIntDelay;
+  DWORD CoalIntCount;
+} HostWrite_struct;
+
+typedef struct _CfgTable_struct {
+  BYTE             Signature[4];
+  DWORD            SpecValence;
+#define SIMPLE_MODE	0x02
+#define PERFORMANT_MODE	0x04
+#define MEMQ_MODE	0x08
+  DWORD            TransportSupport;
+  DWORD            TransportActive;
+  HostWrite_struct HostWrite;
+  DWORD            CmdsOutMax;
+  DWORD            BusTypes;
+  DWORD            TransMethodOffset;
+  BYTE             ServerName[16];
+  DWORD            HeartBeat;
+  DWORD            SCSI_Prefetch;
+  DWORD            MaxSGElements;
+  DWORD            MaxLogicalUnits;
+  DWORD            MaxPhysicalDrives;
+  DWORD            MaxPhysicalDrivesPerLogicalUnit;
+  DWORD            MaxPerformantModeCommands;
+  u8		   reserved[0x78 - 0x58];
+  u32		   misc_fw_support; /* offset 0x78 */
+#define MISC_FW_DOORBELL_RESET (0x02)
+#define MISC_FW_DOORBELL_RESET2 (0x10)
+	u8	   driver_version[32];
+} CfgTable_struct;
+
+struct TransTable_struct {
+  u32 BlockFetch0;
+  u32 BlockFetch1;
+  u32 BlockFetch2;
+  u32 BlockFetch3;
+  u32 BlockFetch4;
+  u32 BlockFetch5;
+  u32 BlockFetch6;
+  u32 BlockFetch7;
+  u32 RepQSize;
+  u32 RepQCount;
+  u32 RepQCtrAddrLow32;
+  u32 RepQCtrAddrHigh32;
+  u32 RepQAddr0Low32;
+  u32 RepQAddr0High32;
+};
+
+#pragma pack()	 
+#endif /* CCISS_CMD_H */
diff --git a/drivers/block/cciss_scsi.c b/drivers/block/cciss_scsi.c
new file mode 100644
index 00000000..a552cab8
--- /dev/null
+++ b/drivers/block/cciss_scsi.c
@@ -0,0 +1,1725 @@
+/*
+ *    Disk Array driver for HP Smart Array controllers, SCSI Tape module.
+ *    (C) Copyright 2001, 2007 Hewlett-Packard Development Company, L.P.
+ *
+ *    This program is free software; you can redistribute it and/or modify
+ *    it under the terms of the GNU General Public License as published by
+ *    the Free Software Foundation; version 2 of the License.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ *    General Public License for more details.
+ *
+ *    You should have received a copy of the GNU General Public License
+ *    along with this program; if not, write to the Free Software
+ *    Foundation, Inc., 59 Temple Place, Suite 300, Boston, MA
+ *    02111-1307, USA.
+ *
+ *    Questions/Comments/Bugfixes to iss_storagedev@hp.com
+ *    
+ *    Author: Stephen M. Cameron
+ */
+#ifdef CONFIG_CISS_SCSI_TAPE
+
+/* Here we have code to present the driver as a scsi driver 
+   as it is simultaneously presented as a block driver.  The 
+   reason for doing this is to allow access to SCSI tape drives
+   through the array controller.  Note in particular, neither 
+   physical nor logical disks are presented through the scsi layer. */
+
+#include <linux/timer.h>
+#include <linux/completion.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+
+#include <asm/atomic.h>
+
+#include <scsi/scsi_cmnd.h>
+#include <scsi/scsi_device.h>
+#include <scsi/scsi_host.h> 
+
+#include "cciss_scsi.h"
+
+#define CCISS_ABORT_MSG 0x00
+#define CCISS_RESET_MSG 0x01
+
+static int fill_cmd(ctlr_info_t *h, CommandList_struct *c, __u8 cmd, void *buff,
+	size_t size,
+	__u8 page_code, unsigned char *scsi3addr,
+	int cmd_type);
+
+static CommandList_struct *cmd_alloc(ctlr_info_t *h);
+static CommandList_struct *cmd_special_alloc(ctlr_info_t *h);
+static void cmd_free(ctlr_info_t *h, CommandList_struct *c);
+static void cmd_special_free(ctlr_info_t *h, CommandList_struct *c);
+
+static int cciss_scsi_proc_info(
+		struct Scsi_Host *sh,
+		char *buffer, /* data buffer */
+		char **start, 	   /* where data in buffer starts */
+		off_t offset,	   /* offset from start of imaginary file */
+		int length, 	   /* length of data in buffer */
+		int func);	   /* 0 == read, 1 == write */
+
+static int cciss_scsi_queue_command (struct Scsi_Host *h,
+				     struct scsi_cmnd *cmd);
+static int cciss_eh_device_reset_handler(struct scsi_cmnd *);
+static int cciss_eh_abort_handler(struct scsi_cmnd *);
+
+static struct cciss_scsi_hba_t ccissscsi[MAX_CTLR] = {
+	{ .name = "cciss0", .ndevices = 0 },
+	{ .name = "cciss1", .ndevices = 0 },
+	{ .name = "cciss2", .ndevices = 0 },
+	{ .name = "cciss3", .ndevices = 0 },
+	{ .name = "cciss4", .ndevices = 0 },
+	{ .name = "cciss5", .ndevices = 0 },
+	{ .name = "cciss6", .ndevices = 0 },
+	{ .name = "cciss7", .ndevices = 0 },
+};
+
+static struct scsi_host_template cciss_driver_template = {
+	.module			= THIS_MODULE,
+	.name			= "cciss",
+	.proc_name		= "cciss",
+	.proc_info		= cciss_scsi_proc_info,
+	.queuecommand		= cciss_scsi_queue_command,
+	.this_id		= 7,
+	.cmd_per_lun		= 1,
+	.use_clustering		= DISABLE_CLUSTERING,
+	/* Can't have eh_bus_reset_handler or eh_host_reset_handler for cciss */
+	.eh_device_reset_handler= cciss_eh_device_reset_handler,
+	.eh_abort_handler	= cciss_eh_abort_handler,
+};
+
+#pragma pack(1)
+
+#define SCSI_PAD_32 8
+#define SCSI_PAD_64 8
+
+struct cciss_scsi_cmd_stack_elem_t {
+	CommandList_struct cmd;
+	ErrorInfo_struct Err;
+	__u32 busaddr;
+	int cmdindex;
+	u8 pad[IS_32_BIT * SCSI_PAD_32 + IS_64_BIT * SCSI_PAD_64];
+};
+
+#pragma pack()
+
+#pragma pack(1)
+struct cciss_scsi_cmd_stack_t {
+	struct cciss_scsi_cmd_stack_elem_t *pool;
+	struct cciss_scsi_cmd_stack_elem_t **elem;
+	dma_addr_t cmd_pool_handle;
+	int top;
+	int nelems;
+};
+#pragma pack()
+
+struct cciss_scsi_adapter_data_t {
+	struct Scsi_Host *scsi_host;
+	struct cciss_scsi_cmd_stack_t cmd_stack;
+	SGDescriptor_struct **cmd_sg_list;
+	int registered;
+	spinlock_t lock; // to protect ccissscsi[ctlr]; 
+};
+
+#define CPQ_TAPE_LOCK(h, flags) spin_lock_irqsave( \
+	&h->scsi_ctlr->lock, flags);
+#define CPQ_TAPE_UNLOCK(h, flags) spin_unlock_irqrestore( \
+	&h->scsi_ctlr->lock, flags);
+
+static CommandList_struct *
+scsi_cmd_alloc(ctlr_info_t *h)
+{
+	/* assume only one process in here at a time, locking done by caller. */
+	/* use h->lock */
+	/* might be better to rewrite how we allocate scsi commands in a way that */
+	/* needs no locking at all. */
+
+	/* take the top memory chunk off the stack and return it, if any. */
+	struct cciss_scsi_cmd_stack_elem_t *c;
+	struct cciss_scsi_adapter_data_t *sa;
+	struct cciss_scsi_cmd_stack_t *stk;
+	u64bit temp64;
+
+	sa = h->scsi_ctlr;
+	stk = &sa->cmd_stack; 
+
+	if (stk->top < 0) 
+		return NULL;
+	c = stk->elem[stk->top]; 	
+	/* memset(c, 0, sizeof(*c)); */
+	memset(&c->cmd, 0, sizeof(c->cmd));
+	memset(&c->Err, 0, sizeof(c->Err));
+	/* set physical addr of cmd and addr of scsi parameters */
+	c->cmd.busaddr = c->busaddr; 
+	c->cmd.cmdindex = c->cmdindex;
+	/* (__u32) (stk->cmd_pool_handle + 
+		(sizeof(struct cciss_scsi_cmd_stack_elem_t)*stk->top)); */
+
+	temp64.val = (__u64) (c->busaddr + sizeof(CommandList_struct));
+	/* (__u64) (stk->cmd_pool_handle + 
+		(sizeof(struct cciss_scsi_cmd_stack_elem_t)*stk->top) +
+		 sizeof(CommandList_struct)); */
+	stk->top--;
+	c->cmd.ErrDesc.Addr.lower = temp64.val32.lower;
+	c->cmd.ErrDesc.Addr.upper = temp64.val32.upper;
+	c->cmd.ErrDesc.Len = sizeof(ErrorInfo_struct);
+	
+	c->cmd.ctlr = h->ctlr;
+	c->cmd.err_info = &c->Err;
+
+	return (CommandList_struct *) c;
+}
+
+static void 
+scsi_cmd_free(ctlr_info_t *h, CommandList_struct *c)
+{
+	/* assume only one process in here at a time, locking done by caller. */
+	/* use h->lock */
+	/* drop the free memory chunk on top of the stack. */
+
+	struct cciss_scsi_adapter_data_t *sa;
+	struct cciss_scsi_cmd_stack_t *stk;
+
+	sa = h->scsi_ctlr;
+	stk = &sa->cmd_stack; 
+	stk->top++;
+	if (stk->top >= stk->nelems) {
+		dev_err(&h->pdev->dev,
+			"scsi_cmd_free called too many times.\n");
+		BUG();
+	}
+	stk->elem[stk->top] = (struct cciss_scsi_cmd_stack_elem_t *) c;
+}
+
+static int
+scsi_cmd_stack_setup(ctlr_info_t *h, struct cciss_scsi_adapter_data_t *sa)
+{
+	int i;
+	struct cciss_scsi_cmd_stack_t *stk;
+	size_t size;
+
+	stk = &sa->cmd_stack;
+	stk->nelems = cciss_tape_cmds + 2;
+	sa->cmd_sg_list = cciss_allocate_sg_chain_blocks(h,
+		h->chainsize, stk->nelems);
+	if (!sa->cmd_sg_list && h->chainsize > 0)
+		return -ENOMEM;
+
+	size = sizeof(struct cciss_scsi_cmd_stack_elem_t) * stk->nelems;
+
+	/* Check alignment, see cciss_cmd.h near CommandList_struct def. */
+	BUILD_BUG_ON((sizeof(*stk->pool) % COMMANDLIST_ALIGNMENT) != 0);
+	/* pci_alloc_consistent guarantees 32-bit DMA address will be used */
+	stk->pool = (struct cciss_scsi_cmd_stack_elem_t *)
+		pci_alloc_consistent(h->pdev, size, &stk->cmd_pool_handle);
+
+	if (stk->pool == NULL) {
+		cciss_free_sg_chain_blocks(sa->cmd_sg_list, stk->nelems);
+		sa->cmd_sg_list = NULL;
+		return -ENOMEM;
+	}
+	stk->elem = kmalloc(sizeof(stk->elem[0]) * stk->nelems, GFP_KERNEL);
+	if (!stk->elem) {
+		pci_free_consistent(h->pdev, size, stk->pool,
+		stk->cmd_pool_handle);
+		return -1;
+	}
+	for (i = 0; i < stk->nelems; i++) {
+		stk->elem[i] = &stk->pool[i];
+		stk->elem[i]->busaddr = (__u32) (stk->cmd_pool_handle + 
+			(sizeof(struct cciss_scsi_cmd_stack_elem_t) * i));
+		stk->elem[i]->cmdindex = i;
+	}
+	stk->top = stk->nelems-1;
+	return 0;
+}
+
+static void
+scsi_cmd_stack_free(ctlr_info_t *h)
+{
+	struct cciss_scsi_adapter_data_t *sa;
+	struct cciss_scsi_cmd_stack_t *stk;
+	size_t size;
+
+	sa = h->scsi_ctlr;
+	stk = &sa->cmd_stack; 
+	if (stk->top != stk->nelems-1) {
+		dev_warn(&h->pdev->dev,
+			"bug: %d scsi commands are still outstanding.\n",
+			stk->nelems - stk->top);
+	}
+	size = sizeof(struct cciss_scsi_cmd_stack_elem_t) * stk->nelems;
+
+	pci_free_consistent(h->pdev, size, stk->pool, stk->cmd_pool_handle);
+	stk->pool = NULL;
+	cciss_free_sg_chain_blocks(sa->cmd_sg_list, stk->nelems);
+	kfree(stk->elem);
+	stk->elem = NULL;
+}
+
+#if 0
+static int xmargin=8;
+static int amargin=60;
+
+static void
+print_bytes (unsigned char *c, int len, int hex, int ascii)
+{
+
+	int i;
+	unsigned char *x;
+
+	if (hex)
+	{
+		x = c;
+		for (i=0;i<len;i++)
+		{
+			if ((i % xmargin) == 0 && i>0) printk("\n");
+			if ((i % xmargin) == 0) printk("0x%04x:", i);
+			printk(" %02x", *x);
+			x++;
+		}
+		printk("\n");
+	}
+	if (ascii)
+	{
+		x = c;
+		for (i=0;i<len;i++)
+		{
+			if ((i % amargin) == 0 && i>0) printk("\n");
+			if ((i % amargin) == 0) printk("0x%04x:", i);
+			if (*x > 26 && *x < 128) printk("%c", *x);
+			else printk(".");
+			x++;
+		}
+		printk("\n");
+	}
+}
+
+static void
+print_cmd(CommandList_struct *cp)
+{
+	printk("queue:%d\n", cp->Header.ReplyQueue);
+	printk("sglist:%d\n", cp->Header.SGList);
+	printk("sgtot:%d\n", cp->Header.SGTotal);
+	printk("Tag:0x%08x/0x%08x\n", cp->Header.Tag.upper, 
+			cp->Header.Tag.lower);
+	printk("LUN:0x%02x%02x%02x%02x%02x%02x%02x%02x\n",
+		cp->Header.LUN.LunAddrBytes[0],
+		cp->Header.LUN.LunAddrBytes[1],
+		cp->Header.LUN.LunAddrBytes[2],
+		cp->Header.LUN.LunAddrBytes[3],
+		cp->Header.LUN.LunAddrBytes[4],
+		cp->Header.LUN.LunAddrBytes[5],
+		cp->Header.LUN.LunAddrBytes[6],
+		cp->Header.LUN.LunAddrBytes[7]);
+	printk("CDBLen:%d\n", cp->Request.CDBLen);
+	printk("Type:%d\n",cp->Request.Type.Type);
+	printk("Attr:%d\n",cp->Request.Type.Attribute);
+	printk(" Dir:%d\n",cp->Request.Type.Direction);
+	printk("Timeout:%d\n",cp->Request.Timeout);
+	printk( "CDB: %02x %02x %02x %02x %02x %02x %02x %02x"
+		" %02x %02x %02x %02x %02x %02x %02x %02x\n",
+		cp->Request.CDB[0], cp->Request.CDB[1],
+		cp->Request.CDB[2], cp->Request.CDB[3],
+		cp->Request.CDB[4], cp->Request.CDB[5],
+		cp->Request.CDB[6], cp->Request.CDB[7],
+		cp->Request.CDB[8], cp->Request.CDB[9],
+		cp->Request.CDB[10], cp->Request.CDB[11],
+		cp->Request.CDB[12], cp->Request.CDB[13],
+		cp->Request.CDB[14], cp->Request.CDB[15]),
+	printk("edesc.Addr: 0x%08x/0%08x, Len  = %d\n", 
+		cp->ErrDesc.Addr.upper, cp->ErrDesc.Addr.lower, 
+			cp->ErrDesc.Len);
+	printk("sgs..........Errorinfo:\n");
+	printk("scsistatus:%d\n", cp->err_info->ScsiStatus);
+	printk("senselen:%d\n", cp->err_info->SenseLen);
+	printk("cmd status:%d\n", cp->err_info->CommandStatus);
+	printk("resid cnt:%d\n", cp->err_info->ResidualCnt);
+	printk("offense size:%d\n", cp->err_info->MoreErrInfo.Invalid_Cmd.offense_size);
+	printk("offense byte:%d\n", cp->err_info->MoreErrInfo.Invalid_Cmd.offense_num);
+	printk("offense value:%d\n", cp->err_info->MoreErrInfo.Invalid_Cmd.offense_value);
+			
+}
+
+#endif
+
+static int 
+find_bus_target_lun(ctlr_info_t *h, int *bus, int *target, int *lun)
+{
+	/* finds an unused bus, target, lun for a new device */
+	/* assumes h->scsi_ctlr->lock is held */
+	int i, found=0;
+	unsigned char target_taken[CCISS_MAX_SCSI_DEVS_PER_HBA];
+
+	memset(&target_taken[0], 0, CCISS_MAX_SCSI_DEVS_PER_HBA);
+
+	target_taken[SELF_SCSI_ID] = 1;	
+	for (i = 0; i < ccissscsi[h->ctlr].ndevices; i++)
+		target_taken[ccissscsi[h->ctlr].dev[i].target] = 1;
+	
+	for (i = 0; i < CCISS_MAX_SCSI_DEVS_PER_HBA; i++) {
+		if (!target_taken[i]) {
+			*bus = 0; *target=i; *lun = 0; found=1;
+			break;
+		}
+	}
+	return (!found);	
+}
+struct scsi2map {
+	char scsi3addr[8];
+	int bus, target, lun;
+};
+
+static int 
+cciss_scsi_add_entry(ctlr_info_t *h, int hostno,
+		struct cciss_scsi_dev_t *device,
+		struct scsi2map *added, int *nadded)
+{
+	/* assumes h->scsi_ctlr->lock is held */
+	int n = ccissscsi[h->ctlr].ndevices;
+	struct cciss_scsi_dev_t *sd;
+	int i, bus, target, lun;
+	unsigned char addr1[8], addr2[8];
+
+	if (n >= CCISS_MAX_SCSI_DEVS_PER_HBA) {
+		dev_warn(&h->pdev->dev, "Too many devices, "
+			"some will be inaccessible.\n");
+		return -1;
+	}
+
+	bus = target = -1;
+	lun = 0;
+	/* Is this device a non-zero lun of a multi-lun device */
+	/* byte 4 of the 8-byte LUN addr will contain the logical unit no. */
+	if (device->scsi3addr[4] != 0) {
+		/* Search through our list and find the device which */
+		/* has the same 8 byte LUN address, excepting byte 4. */
+		/* Assign the same bus and target for this new LUN. */
+		/* Use the logical unit number from the firmware. */
+		memcpy(addr1, device->scsi3addr, 8);
+		addr1[4] = 0;
+		for (i = 0; i < n; i++) {
+			sd = &ccissscsi[h->ctlr].dev[i];
+			memcpy(addr2, sd->scsi3addr, 8);
+			addr2[4] = 0;
+			/* differ only in byte 4? */
+			if (memcmp(addr1, addr2, 8) == 0) {
+				bus = sd->bus;
+				target = sd->target;
+				lun = device->scsi3addr[4];
+				break;
+			}
+		}
+	}
+
+	sd = &ccissscsi[h->ctlr].dev[n];
+	if (lun == 0) {
+		if (find_bus_target_lun(h,
+			&sd->bus, &sd->target, &sd->lun) != 0)
+			return -1;
+	} else {
+		sd->bus = bus;
+		sd->target = target;
+		sd->lun = lun;
+	}
+	added[*nadded].bus = sd->bus;
+	added[*nadded].target = sd->target;
+	added[*nadded].lun = sd->lun;
+	(*nadded)++;
+
+	memcpy(sd->scsi3addr, device->scsi3addr, 8);
+	memcpy(sd->vendor, device->vendor, sizeof(sd->vendor));
+	memcpy(sd->revision, device->revision, sizeof(sd->revision));
+	memcpy(sd->device_id, device->device_id, sizeof(sd->device_id));
+	sd->devtype = device->devtype;
+
+	ccissscsi[h->ctlr].ndevices++;
+
+	/* initially, (before registering with scsi layer) we don't 
+	   know our hostno and we don't want to print anything first 
+	   time anyway (the scsi layer's inquiries will show that info) */
+	if (hostno != -1)
+		dev_info(&h->pdev->dev, "%s device c%db%dt%dl%d added.\n",
+			scsi_device_type(sd->devtype), hostno,
+			sd->bus, sd->target, sd->lun);
+	return 0;
+}
+
+static void
+cciss_scsi_remove_entry(ctlr_info_t *h, int hostno, int entry,
+	struct scsi2map *removed, int *nremoved)
+{
+	/* assumes h->ctlr]->scsi_ctlr->lock is held */
+	int i;
+	struct cciss_scsi_dev_t sd;
+
+	if (entry < 0 || entry >= CCISS_MAX_SCSI_DEVS_PER_HBA) return;
+	sd = ccissscsi[h->ctlr].dev[entry];
+	removed[*nremoved].bus    = sd.bus;
+	removed[*nremoved].target = sd.target;
+	removed[*nremoved].lun    = sd.lun;
+	(*nremoved)++;
+	for (i = entry; i < ccissscsi[h->ctlr].ndevices-1; i++)
+		ccissscsi[h->ctlr].dev[i] = ccissscsi[h->ctlr].dev[i+1];
+	ccissscsi[h->ctlr].ndevices--;
+	dev_info(&h->pdev->dev, "%s device c%db%dt%dl%d removed.\n",
+		scsi_device_type(sd.devtype), hostno,
+			sd.bus, sd.target, sd.lun);
+}
+
+
+#define SCSI3ADDR_EQ(a,b) ( \
+	(a)[7] == (b)[7] && \
+	(a)[6] == (b)[6] && \
+	(a)[5] == (b)[5] && \
+	(a)[4] == (b)[4] && \
+	(a)[3] == (b)[3] && \
+	(a)[2] == (b)[2] && \
+	(a)[1] == (b)[1] && \
+	(a)[0] == (b)[0])
+
+static void fixup_botched_add(ctlr_info_t *h, char *scsi3addr)
+{
+	/* called when scsi_add_device fails in order to re-adjust */
+	/* ccissscsi[] to match the mid layer's view. */
+	unsigned long flags;
+	int i, j;
+	CPQ_TAPE_LOCK(h, flags);
+	for (i = 0; i < ccissscsi[h->ctlr].ndevices; i++) {
+		if (memcmp(scsi3addr,
+				ccissscsi[h->ctlr].dev[i].scsi3addr, 8) == 0) {
+			for (j = i; j < ccissscsi[h->ctlr].ndevices-1; j++)
+				ccissscsi[h->ctlr].dev[j] =
+					ccissscsi[h->ctlr].dev[j+1];
+			ccissscsi[h->ctlr].ndevices--;
+			break;
+		}
+	}
+	CPQ_TAPE_UNLOCK(h, flags);
+}
+
+static int device_is_the_same(struct cciss_scsi_dev_t *dev1,
+	struct cciss_scsi_dev_t *dev2)
+{
+	return dev1->devtype == dev2->devtype &&
+		memcmp(dev1->scsi3addr, dev2->scsi3addr,
+			sizeof(dev1->scsi3addr)) == 0 &&
+		memcmp(dev1->device_id, dev2->device_id,
+			sizeof(dev1->device_id)) == 0 &&
+		memcmp(dev1->vendor, dev2->vendor,
+			sizeof(dev1->vendor)) == 0 &&
+		memcmp(dev1->model, dev2->model,
+			sizeof(dev1->model)) == 0 &&
+		memcmp(dev1->revision, dev2->revision,
+			sizeof(dev1->revision)) == 0;
+}
+
+static int
+adjust_cciss_scsi_table(ctlr_info_t *h, int hostno,
+	struct cciss_scsi_dev_t sd[], int nsds)
+{
+	/* sd contains scsi3 addresses and devtypes, but
+	   bus target and lun are not filled in.  This funciton
+	   takes what's in sd to be the current and adjusts
+	   ccissscsi[] to be in line with what's in sd. */ 
+
+	int i,j, found, changes=0;
+	struct cciss_scsi_dev_t *csd;
+	unsigned long flags;
+	struct scsi2map *added, *removed;
+	int nadded, nremoved;
+	struct Scsi_Host *sh = NULL;
+
+	added = kzalloc(sizeof(*added) * CCISS_MAX_SCSI_DEVS_PER_HBA,
+			GFP_KERNEL);
+	removed = kzalloc(sizeof(*removed) * CCISS_MAX_SCSI_DEVS_PER_HBA,
+			GFP_KERNEL);
+
+	if (!added || !removed) {
+		dev_warn(&h->pdev->dev,
+			"Out of memory in adjust_cciss_scsi_table\n");
+		goto free_and_out;
+	}
+
+	CPQ_TAPE_LOCK(h, flags);
+
+	if (hostno != -1)  /* if it's not the first time... */
+		sh = h->scsi_ctlr->scsi_host;
+
+	/* find any devices in ccissscsi[] that are not in 
+	   sd[] and remove them from ccissscsi[] */
+
+	i = 0;
+	nremoved = 0;
+	nadded = 0;
+	while (i < ccissscsi[h->ctlr].ndevices) {
+		csd = &ccissscsi[h->ctlr].dev[i];
+		found=0;
+		for (j=0;j<nsds;j++) {
+			if (SCSI3ADDR_EQ(sd[j].scsi3addr,
+				csd->scsi3addr)) {
+				if (device_is_the_same(&sd[j], csd))
+					found=2;
+				else
+					found=1;
+				break;
+			}
+		}
+
+		if (found == 0) { /* device no longer present. */ 
+			changes++;
+			cciss_scsi_remove_entry(h, hostno, i,
+				removed, &nremoved);
+			/* remove ^^^, hence i not incremented */
+		} else if (found == 1) { /* device is different in some way */
+			changes++;
+			dev_info(&h->pdev->dev,
+				"device c%db%dt%dl%d has changed.\n",
+				hostno, csd->bus, csd->target, csd->lun);
+			cciss_scsi_remove_entry(h, hostno, i,
+				removed, &nremoved);
+			/* remove ^^^, hence i not incremented */
+			if (cciss_scsi_add_entry(h, hostno, &sd[j],
+				added, &nadded) != 0)
+				/* we just removed one, so add can't fail. */
+					BUG();
+			csd->devtype = sd[j].devtype;
+			memcpy(csd->device_id, sd[j].device_id,
+				sizeof(csd->device_id));
+			memcpy(csd->vendor, sd[j].vendor,
+				sizeof(csd->vendor));
+			memcpy(csd->model, sd[j].model,
+				sizeof(csd->model));
+			memcpy(csd->revision, sd[j].revision,
+				sizeof(csd->revision));
+		} else 		/* device is same as it ever was, */
+			i++;	/* so just move along. */
+	}
+
+	/* Now, make sure every device listed in sd[] is also
+ 	   listed in ccissscsi[], adding them if they aren't found */
+
+	for (i=0;i<nsds;i++) {
+		found=0;
+		for (j = 0; j < ccissscsi[h->ctlr].ndevices; j++) {
+			csd = &ccissscsi[h->ctlr].dev[j];
+			if (SCSI3ADDR_EQ(sd[i].scsi3addr,
+				csd->scsi3addr)) {
+				if (device_is_the_same(&sd[i], csd))
+					found=2;	/* found device */
+				else
+					found=1; 	/* found a bug. */
+				break;
+			}
+		}
+		if (!found) {
+			changes++;
+			if (cciss_scsi_add_entry(h, hostno, &sd[i],
+				added, &nadded) != 0)
+				break;
+		} else if (found == 1) {
+			/* should never happen... */
+			changes++;
+			dev_warn(&h->pdev->dev,
+				"device unexpectedly changed\n");
+			/* but if it does happen, we just ignore that device */
+		}
+	}
+	CPQ_TAPE_UNLOCK(h, flags);
+
+	/* Don't notify scsi mid layer of any changes the first time through */
+	/* (or if there are no changes) scsi_scan_host will do it later the */
+	/* first time through. */
+	if (hostno == -1 || !changes)
+		goto free_and_out;
+
+	/* Notify scsi mid layer of any removed devices */
+	for (i = 0; i < nremoved; i++) {
+		struct scsi_device *sdev =
+			scsi_device_lookup(sh, removed[i].bus,
+				removed[i].target, removed[i].lun);
+		if (sdev != NULL) {
+			scsi_remove_device(sdev);
+			scsi_device_put(sdev);
+		} else {
+			/* We don't expect to get here. */
+			/* future cmds to this device will get selection */
+			/* timeout as if the device was gone. */
+			dev_warn(&h->pdev->dev, "didn't find "
+				"c%db%dt%dl%d\n for removal.",
+				hostno, removed[i].bus,
+				removed[i].target, removed[i].lun);
+		}
+	}
+
+	/* Notify scsi mid layer of any added devices */
+	for (i = 0; i < nadded; i++) {
+		int rc;
+		rc = scsi_add_device(sh, added[i].bus,
+			added[i].target, added[i].lun);
+		if (rc == 0)
+			continue;
+		dev_warn(&h->pdev->dev, "scsi_add_device "
+			"c%db%dt%dl%d failed, device not added.\n",
+			hostno, added[i].bus, added[i].target, added[i].lun);
+		/* now we have to remove it from ccissscsi, */
+		/* since it didn't get added to scsi mid layer */
+		fixup_botched_add(h, added[i].scsi3addr);
+	}
+
+free_and_out:
+	kfree(added);
+	kfree(removed);
+	return 0;
+}
+
+static int
+lookup_scsi3addr(ctlr_info_t *h, int bus, int target, int lun, char *scsi3addr)
+{
+	int i;
+	struct cciss_scsi_dev_t *sd;
+	unsigned long flags;
+
+	CPQ_TAPE_LOCK(h, flags);
+	for (i = 0; i < ccissscsi[h->ctlr].ndevices; i++) {
+		sd = &ccissscsi[h->ctlr].dev[i];
+		if (sd->bus == bus &&
+		    sd->target == target &&
+		    sd->lun == lun) {
+			memcpy(scsi3addr, &sd->scsi3addr[0], 8);
+			CPQ_TAPE_UNLOCK(h, flags);
+			return 0;
+		}
+	}
+	CPQ_TAPE_UNLOCK(h, flags);
+	return -1;
+}
+
+static void 
+cciss_scsi_setup(ctlr_info_t *h)
+{
+	struct cciss_scsi_adapter_data_t * shba;
+
+	ccissscsi[h->ctlr].ndevices = 0;
+	shba = (struct cciss_scsi_adapter_data_t *)
+		kmalloc(sizeof(*shba), GFP_KERNEL);	
+	if (shba == NULL)
+		return;
+	shba->scsi_host = NULL;
+	spin_lock_init(&shba->lock);
+	shba->registered = 0;
+	if (scsi_cmd_stack_setup(h, shba) != 0) {
+		kfree(shba);
+		shba = NULL;
+	}
+	h->scsi_ctlr = shba;
+	return;
+}
+
+static void complete_scsi_command(CommandList_struct *c, int timeout,
+	__u32 tag)
+{
+	struct scsi_cmnd *cmd;
+	ctlr_info_t *h;
+	ErrorInfo_struct *ei;
+
+	ei = c->err_info;
+
+	/* First, see if it was a message rather than a command */
+	if (c->Request.Type.Type == TYPE_MSG)  {
+		c->cmd_type = CMD_MSG_DONE;
+		return;
+	}
+
+	cmd = (struct scsi_cmnd *) c->scsi_cmd;
+	h = hba[c->ctlr];
+
+	scsi_dma_unmap(cmd);
+	if (c->Header.SGTotal > h->max_cmd_sgentries)
+		cciss_unmap_sg_chain_block(h, c);
+
+	cmd->result = (DID_OK << 16); 		/* host byte */
+	cmd->result |= (COMMAND_COMPLETE << 8);	/* msg byte */
+	/* cmd->result |= (GOOD < 1); */		/* status byte */
+
+	cmd->result |= (ei->ScsiStatus);
+	/* printk("Scsistatus is 0x%02x\n", ei->ScsiStatus);  */
+
+	/* copy the sense data whether we need to or not. */
+
+	memcpy(cmd->sense_buffer, ei->SenseInfo, 
+		ei->SenseLen > SCSI_SENSE_BUFFERSIZE ?
+			SCSI_SENSE_BUFFERSIZE : 
+			ei->SenseLen);
+	scsi_set_resid(cmd, ei->ResidualCnt);
+
+	if(ei->CommandStatus != 0) 
+	{ /* an error has occurred */ 
+		switch(ei->CommandStatus)
+		{
+			case CMD_TARGET_STATUS:
+				/* Pass it up to the upper layers... */
+				if( ei->ScsiStatus)
+                		{
+#if 0
+                    			printk(KERN_WARNING "cciss: cmd %p "
+						"has SCSI Status = %x\n",
+						c, ei->ScsiStatus);
+#endif
+					cmd->result |= (ei->ScsiStatus << 1);
+                		}
+				else {  /* scsi status is zero??? How??? */
+					
+	/* Ordinarily, this case should never happen, but there is a bug
+	   in some released firmware revisions that allows it to happen
+	   if, for example, a 4100 backplane loses power and the tape
+	   drive is in it.  We assume that it's a fatal error of some
+	   kind because we can't show that it wasn't. We will make it
+	   look like selection timeout since that is the most common
+	   reason for this to occur, and it's severe enough. */
+
+					cmd->result = DID_NO_CONNECT << 16;
+				}
+			break;
+			case CMD_DATA_UNDERRUN: /* let mid layer handle it. */
+			break;
+			case CMD_DATA_OVERRUN:
+				dev_warn(&h->pdev->dev, "%p has"
+					" completed with data overrun "
+					"reported\n", c);
+			break;
+			case CMD_INVALID: {
+				/* print_bytes(c, sizeof(*c), 1, 0);
+				print_cmd(c); */
+     /* We get CMD_INVALID if you address a non-existent tape drive instead
+	of a selection timeout (no response).  You will see this if you yank 
+	out a tape drive, then try to access it. This is kind of a shame
+	because it means that any other CMD_INVALID (e.g. driver bug) will
+	get interpreted as a missing target. */
+				cmd->result = DID_NO_CONNECT << 16;
+				}
+			break;
+			case CMD_PROTOCOL_ERR:
+				dev_warn(&h->pdev->dev,
+					"%p has protocol error\n", c);
+                        break;
+			case CMD_HARDWARE_ERR:
+				cmd->result = DID_ERROR << 16;
+				dev_warn(&h->pdev->dev,
+					"%p had hardware error\n", c);
+                        break;
+			case CMD_CONNECTION_LOST:
+				cmd->result = DID_ERROR << 16;
+				dev_warn(&h->pdev->dev,
+					"%p had connection lost\n", c);
+			break;
+			case CMD_ABORTED:
+				cmd->result = DID_ABORT << 16;
+				dev_warn(&h->pdev->dev, "%p was aborted\n", c);
+			break;
+			case CMD_ABORT_FAILED:
+				cmd->result = DID_ERROR << 16;
+				dev_warn(&h->pdev->dev,
+					"%p reports abort failed\n", c);
+			break;
+			case CMD_UNSOLICITED_ABORT:
+				cmd->result = DID_ABORT << 16;
+				dev_warn(&h->pdev->dev, "%p aborted due to an "
+					"unsolicited abort\n", c);
+			break;
+			case CMD_TIMEOUT:
+				cmd->result = DID_TIME_OUT << 16;
+				dev_warn(&h->pdev->dev, "%p timedout\n", c);
+			break;
+			case CMD_UNABORTABLE:
+				cmd->result = DID_ERROR << 16;
+				dev_warn(&h->pdev->dev, "c %p command "
+					"unabortable\n", c);
+			break;
+			default:
+				cmd->result = DID_ERROR << 16;
+				dev_warn(&h->pdev->dev,
+					"%p returned unknown status %x\n", c,
+						ei->CommandStatus); 
+		}
+	}
+	cmd->scsi_done(cmd);
+	scsi_cmd_free(h, c);
+}
+
+static int
+cciss_scsi_detect(ctlr_info_t *h)
+{
+	struct Scsi_Host *sh;
+	int error;
+
+	sh = scsi_host_alloc(&cciss_driver_template, sizeof(struct ctlr_info *));
+	if (sh == NULL)
+		goto fail;
+	sh->io_port = 0;	// good enough?  FIXME, 
+	sh->n_io_port = 0;	// I don't think we use these two...
+	sh->this_id = SELF_SCSI_ID;  
+	sh->can_queue = cciss_tape_cmds;
+	sh->sg_tablesize = h->maxsgentries;
+	sh->max_cmd_len = MAX_COMMAND_SIZE;
+	sh->max_sectors = h->cciss_max_sectors;
+
+	((struct cciss_scsi_adapter_data_t *) 
+		h->scsi_ctlr)->scsi_host = sh;
+	sh->hostdata[0] = (unsigned long) h;
+	sh->irq = h->intr[SIMPLE_MODE_INT];
+	sh->unique_id = sh->irq;
+	error = scsi_add_host(sh, &h->pdev->dev);
+	if (error)
+		goto fail_host_put;
+	scsi_scan_host(sh);
+	return 1;
+
+ fail_host_put:
+	scsi_host_put(sh);
+ fail:
+	return 0;
+}
+
+static void
+cciss_unmap_one(struct pci_dev *pdev,
+		CommandList_struct *c,
+		size_t buflen,
+		int data_direction)
+{
+	u64bit addr64;
+
+	addr64.val32.lower = c->SG[0].Addr.lower;
+	addr64.val32.upper = c->SG[0].Addr.upper;
+	pci_unmap_single(pdev, (dma_addr_t) addr64.val, buflen, data_direction);
+}
+
+static void
+cciss_map_one(struct pci_dev *pdev,
+		CommandList_struct *c,
+		unsigned char *buf,
+		size_t buflen,
+		int data_direction)
+{
+	__u64 addr64;
+
+	addr64 = (__u64) pci_map_single(pdev, buf, buflen, data_direction);
+	c->SG[0].Addr.lower =
+	  (__u32) (addr64 & (__u64) 0x00000000FFFFFFFF);
+	c->SG[0].Addr.upper =
+	  (__u32) ((addr64 >> 32) & (__u64) 0x00000000FFFFFFFF);
+	c->SG[0].Len = buflen;
+	c->Header.SGList = (__u8) 1;   /* no. SGs contig in this cmd */
+	c->Header.SGTotal = (__u16) 1; /* total sgs in this cmd list */
+}
+
+static int
+cciss_scsi_do_simple_cmd(ctlr_info_t *h,
+			CommandList_struct *c,
+			unsigned char *scsi3addr, 
+			unsigned char *cdb,
+			unsigned char cdblen,
+			unsigned char *buf, int bufsize,
+			int direction)
+{
+	DECLARE_COMPLETION_ONSTACK(wait);
+
+	c->cmd_type = CMD_IOCTL_PEND; /* treat this like an ioctl */
+	c->scsi_cmd = NULL;
+	c->Header.ReplyQueue = 0;  /* unused in simple mode */
+	memcpy(&c->Header.LUN, scsi3addr, sizeof(c->Header.LUN));
+	c->Header.Tag.lower = c->busaddr;  /* Use k. address of cmd as tag */
+	// Fill in the request block...
+
+	/* printk("Using scsi3addr 0x%02x%0x2%0x2%0x2%0x2%0x2%0x2%0x2\n", 
+		scsi3addr[0], scsi3addr[1], scsi3addr[2], scsi3addr[3],
+		scsi3addr[4], scsi3addr[5], scsi3addr[6], scsi3addr[7]); */
+
+	memset(c->Request.CDB, 0, sizeof(c->Request.CDB));
+	memcpy(c->Request.CDB, cdb, cdblen);
+	c->Request.Timeout = 0;
+	c->Request.CDBLen = cdblen;
+	c->Request.Type.Type = TYPE_CMD;
+	c->Request.Type.Attribute = ATTR_SIMPLE;
+	c->Request.Type.Direction = direction;
+
+	/* Fill in the SG list and do dma mapping */
+	cciss_map_one(h->pdev, c, (unsigned char *) buf,
+			bufsize, DMA_FROM_DEVICE); 
+
+	c->waiting = &wait;
+	enqueue_cmd_and_start_io(h, c);
+	wait_for_completion(&wait);
+
+	/* undo the dma mapping */
+	cciss_unmap_one(h->pdev, c, bufsize, DMA_FROM_DEVICE);
+	return(0);
+}
+
+static void 
+cciss_scsi_interpret_error(ctlr_info_t *h, CommandList_struct *c)
+{
+	ErrorInfo_struct *ei;
+
+	ei = c->err_info;
+	switch(ei->CommandStatus)
+	{
+		case CMD_TARGET_STATUS:
+			dev_warn(&h->pdev->dev,
+				"cmd %p has completed with errors\n", c);
+			dev_warn(&h->pdev->dev,
+				"cmd %p has SCSI Status = %x\n",
+				c, ei->ScsiStatus);
+			if (ei->ScsiStatus == 0)
+				dev_warn(&h->pdev->dev,
+				"SCSI status is abnormally zero.  "
+				"(probably indicates selection timeout "
+				"reported incorrectly due to a known "
+				"firmware bug, circa July, 2001.)\n");
+		break;
+		case CMD_DATA_UNDERRUN: /* let mid layer handle it. */
+			dev_info(&h->pdev->dev, "UNDERRUN\n");
+		break;
+		case CMD_DATA_OVERRUN:
+			dev_warn(&h->pdev->dev, "%p has"
+				" completed with data overrun "
+				"reported\n", c);
+		break;
+		case CMD_INVALID: {
+			/* controller unfortunately reports SCSI passthru's */
+			/* to non-existent targets as invalid commands. */
+			dev_warn(&h->pdev->dev,
+				"%p is reported invalid (probably means "
+				"target device no longer present)\n", c);
+			/* print_bytes((unsigned char *) c, sizeof(*c), 1, 0);
+			print_cmd(c);  */
+			}
+		break;
+		case CMD_PROTOCOL_ERR:
+			dev_warn(&h->pdev->dev, "%p has protocol error\n", c);
+		break;
+		case CMD_HARDWARE_ERR:
+			/* cmd->result = DID_ERROR << 16; */
+			dev_warn(&h->pdev->dev, "%p had hardware error\n", c);
+		break;
+		case CMD_CONNECTION_LOST:
+			dev_warn(&h->pdev->dev, "%p had connection lost\n", c);
+		break;
+		case CMD_ABORTED:
+			dev_warn(&h->pdev->dev, "%p was aborted\n", c);
+		break;
+		case CMD_ABORT_FAILED:
+			dev_warn(&h->pdev->dev,
+				"%p reports abort failed\n", c);
+		break;
+		case CMD_UNSOLICITED_ABORT:
+			dev_warn(&h->pdev->dev,
+				"%p aborted due to an unsolicited abort\n", c);
+		break;
+		case CMD_TIMEOUT:
+			dev_warn(&h->pdev->dev, "%p timedout\n", c);
+		break;
+		case CMD_UNABORTABLE:
+			dev_warn(&h->pdev->dev,
+				"%p unabortable\n", c);
+		break;
+		default:
+			dev_warn(&h->pdev->dev,
+				"%p returned unknown status %x\n",
+				c, ei->CommandStatus);
+	}
+}
+
+static int
+cciss_scsi_do_inquiry(ctlr_info_t *h, unsigned char *scsi3addr,
+	unsigned char page, unsigned char *buf,
+	unsigned char bufsize)
+{
+	int rc;
+	CommandList_struct *c;
+	char cdb[6];
+	ErrorInfo_struct *ei;
+	unsigned long flags;
+
+	spin_lock_irqsave(&h->lock, flags);
+	c = scsi_cmd_alloc(h);
+	spin_unlock_irqrestore(&h->lock, flags);
+
+	if (c == NULL) {			/* trouble... */
+		printk("cmd_alloc returned NULL!\n");
+		return -1;
+	}
+
+	ei = c->err_info;
+
+	cdb[0] = CISS_INQUIRY;
+	cdb[1] = (page != 0);
+	cdb[2] = page;
+	cdb[3] = 0;
+	cdb[4] = bufsize;
+	cdb[5] = 0;
+	rc = cciss_scsi_do_simple_cmd(h, c, scsi3addr, cdb,
+				6, buf, bufsize, XFER_READ);
+
+	if (rc != 0) return rc; /* something went wrong */
+
+	if (ei->CommandStatus != 0 && 
+	    ei->CommandStatus != CMD_DATA_UNDERRUN) {
+		cciss_scsi_interpret_error(h, c);
+		rc = -1;
+	}
+	spin_lock_irqsave(&h->lock, flags);
+	scsi_cmd_free(h, c);
+	spin_unlock_irqrestore(&h->lock, flags);
+	return rc;	
+}
+
+/* Get the device id from inquiry page 0x83 */
+static int cciss_scsi_get_device_id(ctlr_info_t *h, unsigned char *scsi3addr,
+	unsigned char *device_id, int buflen)
+{
+	int rc;
+	unsigned char *buf;
+
+	if (buflen > 16)
+		buflen = 16;
+	buf = kzalloc(64, GFP_KERNEL);
+	if (!buf)
+		return -1;
+	rc = cciss_scsi_do_inquiry(h, scsi3addr, 0x83, buf, 64);
+	if (rc == 0)
+		memcpy(device_id, &buf[8], buflen);
+	kfree(buf);
+	return rc != 0;
+}
+
+static int
+cciss_scsi_do_report_phys_luns(ctlr_info_t *h,
+		ReportLunData_struct *buf, int bufsize)
+{
+	int rc;
+	CommandList_struct *c;
+	unsigned char cdb[12];
+	unsigned char scsi3addr[8]; 
+	ErrorInfo_struct *ei;
+	unsigned long flags;
+
+	spin_lock_irqsave(&h->lock, flags);
+	c = scsi_cmd_alloc(h);
+	spin_unlock_irqrestore(&h->lock, flags);
+	if (c == NULL) {			/* trouble... */
+		printk("cmd_alloc returned NULL!\n");
+		return -1;
+	}
+
+	memset(&scsi3addr[0], 0, 8); /* address the controller */
+	cdb[0] = CISS_REPORT_PHYS;
+	cdb[1] = 0;
+	cdb[2] = 0;
+	cdb[3] = 0;
+	cdb[4] = 0;
+	cdb[5] = 0;
+	cdb[6] = (bufsize >> 24) & 0xFF;  //MSB
+	cdb[7] = (bufsize >> 16) & 0xFF;
+	cdb[8] = (bufsize >> 8) & 0xFF;
+	cdb[9] = bufsize & 0xFF;
+	cdb[10] = 0;
+	cdb[11] = 0;
+
+	rc = cciss_scsi_do_simple_cmd(h, c, scsi3addr,
+				cdb, 12, 
+				(unsigned char *) buf, 
+				bufsize, XFER_READ);
+
+	if (rc != 0) return rc; /* something went wrong */
+
+	ei = c->err_info;
+	if (ei->CommandStatus != 0 && 
+	    ei->CommandStatus != CMD_DATA_UNDERRUN) {
+		cciss_scsi_interpret_error(h, c);
+		rc = -1;
+	}
+	spin_lock_irqsave(&h->lock, flags);
+	scsi_cmd_free(h, c);
+	spin_unlock_irqrestore(&h->lock, flags);
+	return rc;	
+}
+
+static void
+cciss_update_non_disk_devices(ctlr_info_t *h, int hostno)
+{
+	/* the idea here is we could get notified from /proc
+	   that some devices have changed, so we do a report 
+	   physical luns cmd, and adjust our list of devices 
+	   accordingly.  (We can't rely on the scsi-mid layer just
+	   doing inquiries, because the "busses" that the scsi 
+	   mid-layer probes are totally fabricated by this driver,
+	   so new devices wouldn't show up.
+
+	   the scsi3addr's of devices won't change so long as the 
+	   adapter is not reset.  That means we can rescan and 
+	   tell which devices we already know about, vs. new 
+	   devices, vs.  disappearing devices.
+
+	   Also, if you yank out a tape drive, then put in a disk
+	   in it's place, (say, a configured volume from another 
+	   array controller for instance)  _don't_ poke this driver 
+           (so it thinks it's still a tape, but _do_ poke the scsi 
+           mid layer, so it does an inquiry... the scsi mid layer 
+           will see the physical disk.  This would be bad.  Need to
+	   think about how to prevent that.  One idea would be to 
+	   snoop all scsi responses and if an inquiry repsonse comes
+	   back that reports a disk, chuck it an return selection
+	   timeout instead and adjust our table...  Not sure i like
+	   that though.  
+
+	 */
+#define OBDR_TAPE_INQ_SIZE 49
+#define OBDR_TAPE_SIG "$DR-10"
+	ReportLunData_struct *ld_buff;
+	unsigned char *inq_buff;
+	unsigned char scsi3addr[8];
+	__u32 num_luns=0;
+	unsigned char *ch;
+	struct cciss_scsi_dev_t *currentsd, *this_device;
+	int ncurrent=0;
+	int reportlunsize = sizeof(*ld_buff) + CISS_MAX_PHYS_LUN * 8;
+	int i;
+
+	ld_buff = kzalloc(reportlunsize, GFP_KERNEL);
+	inq_buff = kmalloc(OBDR_TAPE_INQ_SIZE, GFP_KERNEL);
+	currentsd = kzalloc(sizeof(*currentsd) *
+			(CCISS_MAX_SCSI_DEVS_PER_HBA+1), GFP_KERNEL);
+	if (ld_buff == NULL || inq_buff == NULL || currentsd == NULL) {
+		printk(KERN_ERR "cciss: out of memory\n");
+		goto out;
+	}
+	this_device = &currentsd[CCISS_MAX_SCSI_DEVS_PER_HBA];
+	if (cciss_scsi_do_report_phys_luns(h, ld_buff, reportlunsize) == 0) {
+		ch = &ld_buff->LUNListLength[0];
+		num_luns = ((ch[0]<<24) | (ch[1]<<16) | (ch[2]<<8) | ch[3]) / 8;
+		if (num_luns > CISS_MAX_PHYS_LUN) {
+			printk(KERN_WARNING 
+				"cciss: Maximum physical LUNs (%d) exceeded.  "
+				"%d LUNs ignored.\n", CISS_MAX_PHYS_LUN, 
+				num_luns - CISS_MAX_PHYS_LUN);
+			num_luns = CISS_MAX_PHYS_LUN;
+		}
+	}
+	else {
+		printk(KERN_ERR  "cciss: Report physical LUNs failed.\n");
+		goto out;
+	}
+
+
+	/* adjust our table of devices */	
+	for (i = 0; i < num_luns; i++) {
+		/* for each physical lun, do an inquiry */
+		if (ld_buff->LUN[i][3] & 0xC0) continue;
+		memset(inq_buff, 0, OBDR_TAPE_INQ_SIZE);
+		memcpy(&scsi3addr[0], &ld_buff->LUN[i][0], 8);
+
+		if (cciss_scsi_do_inquiry(h, scsi3addr, 0, inq_buff,
+			(unsigned char) OBDR_TAPE_INQ_SIZE) != 0)
+			/* Inquiry failed (msg printed already) */
+			continue; /* so we will skip this device. */
+
+		this_device->devtype = (inq_buff[0] & 0x1f);
+		this_device->bus = -1;
+		this_device->target = -1;
+		this_device->lun = -1;
+		memcpy(this_device->scsi3addr, scsi3addr, 8);
+		memcpy(this_device->vendor, &inq_buff[8],
+			sizeof(this_device->vendor));
+		memcpy(this_device->model, &inq_buff[16],
+			sizeof(this_device->model));
+		memcpy(this_device->revision, &inq_buff[32],
+			sizeof(this_device->revision));
+		memset(this_device->device_id, 0,
+			sizeof(this_device->device_id));
+		cciss_scsi_get_device_id(h, scsi3addr,
+			this_device->device_id, sizeof(this_device->device_id));
+
+		switch (this_device->devtype)
+		{
+		  case 0x05: /* CD-ROM */ {
+
+			/* We don't *really* support actual CD-ROM devices,
+			 * just this "One Button Disaster Recovery" tape drive
+			 * which temporarily pretends to be a CD-ROM drive.
+			 * So we check that the device is really an OBDR tape
+			 * device by checking for "$DR-10" in bytes 43-48 of
+			 * the inquiry data.
+			 */
+				char obdr_sig[7];
+
+				strncpy(obdr_sig, &inq_buff[43], 6);
+				obdr_sig[6] = '\0';
+				if (strncmp(obdr_sig, OBDR_TAPE_SIG, 6) != 0)
+					/* Not OBDR device, ignore it. */
+					break;
+			}
+			/* fall through . . . */
+		  case 0x01: /* sequential access, (tape) */
+		  case 0x08: /* medium changer */
+			if (ncurrent >= CCISS_MAX_SCSI_DEVS_PER_HBA) {
+				printk(KERN_INFO "cciss%d: %s ignored, "
+					"too many devices.\n", h->ctlr,
+					scsi_device_type(this_device->devtype));
+				break;
+			}
+			currentsd[ncurrent] = *this_device;
+			ncurrent++;
+			break;
+		  default: 
+			break;
+		}
+	}
+
+	adjust_cciss_scsi_table(h, hostno, currentsd, ncurrent);
+out:
+	kfree(inq_buff);
+	kfree(ld_buff);
+	kfree(currentsd);
+	return;
+}
+
+static int
+is_keyword(char *ptr, int len, char *verb)  // Thanks to ncr53c8xx.c
+{
+	int verb_len = strlen(verb);
+	if (len >= verb_len && !memcmp(verb,ptr,verb_len))
+		return verb_len;
+	else
+		return 0;
+}
+
+static int
+cciss_scsi_user_command(ctlr_info_t *h, int hostno, char *buffer, int length)
+{
+	int arg_len;
+
+	if ((arg_len = is_keyword(buffer, length, "rescan")) != 0)
+		cciss_update_non_disk_devices(h, hostno);
+	else
+		return -EINVAL;
+	return length;
+}
+
+
+static int
+cciss_scsi_proc_info(struct Scsi_Host *sh,
+		char *buffer, /* data buffer */
+		char **start, 	   /* where data in buffer starts */
+		off_t offset,	   /* offset from start of imaginary file */
+		int length, 	   /* length of data in buffer */
+		int func)	   /* 0 == read, 1 == write */
+{
+
+	int buflen, datalen;
+	ctlr_info_t *h;
+	int i;
+
+	h = (ctlr_info_t *) sh->hostdata[0];
+	if (h == NULL)  /* This really shouldn't ever happen. */
+		return -EINVAL;
+
+	if (func == 0) {	/* User is reading from /proc/scsi/ciss*?/?*  */
+		buflen = sprintf(buffer, "cciss%d: SCSI host: %d\n",
+				h->ctlr, sh->host_no);
+
+		/* this information is needed by apps to know which cciss
+		   device corresponds to which scsi host number without
+		   having to open a scsi target device node.  The device
+		   information is not a duplicate of /proc/scsi/scsi because
+		   the two may be out of sync due to scsi hotplug, rather
+		   this info is for an app to be able to use to know how to
+		   get them back in sync. */
+
+		for (i = 0; i < ccissscsi[h->ctlr].ndevices; i++) {
+			struct cciss_scsi_dev_t *sd =
+				&ccissscsi[h->ctlr].dev[i];
+			buflen += sprintf(&buffer[buflen], "c%db%dt%dl%d %02d "
+				"0x%02x%02x%02x%02x%02x%02x%02x%02x\n",
+				sh->host_no, sd->bus, sd->target, sd->lun,
+				sd->devtype,
+				sd->scsi3addr[0], sd->scsi3addr[1],
+				sd->scsi3addr[2], sd->scsi3addr[3],
+				sd->scsi3addr[4], sd->scsi3addr[5],
+				sd->scsi3addr[6], sd->scsi3addr[7]);
+		}
+		datalen = buflen - offset;
+		if (datalen < 0) { 	/* they're reading past EOF. */
+			datalen = 0;
+			*start = buffer+buflen;	
+		} else
+			*start = buffer + offset;
+		return(datalen);
+	} else 	/* User is writing to /proc/scsi/cciss*?/?*  ... */
+		return cciss_scsi_user_command(h, sh->host_no,
+			buffer, length);	
+} 
+
+/* cciss_scatter_gather takes a struct scsi_cmnd, (cmd), and does the pci 
+   dma mapping  and fills in the scatter gather entries of the 
+   cciss command, c. */
+
+static void cciss_scatter_gather(ctlr_info_t *h, CommandList_struct *c,
+	struct scsi_cmnd *cmd)
+{
+	unsigned int len;
+	struct scatterlist *sg;
+	__u64 addr64;
+	int request_nsgs, i, chained, sg_index;
+	struct cciss_scsi_adapter_data_t *sa = h->scsi_ctlr;
+	SGDescriptor_struct *curr_sg;
+
+	BUG_ON(scsi_sg_count(cmd) > h->maxsgentries);
+
+	chained = 0;
+	sg_index = 0;
+	curr_sg = c->SG;
+	request_nsgs = scsi_dma_map(cmd);
+	if (request_nsgs) {
+		scsi_for_each_sg(cmd, sg, request_nsgs, i) {
+			if (sg_index + 1 == h->max_cmd_sgentries &&
+				!chained && request_nsgs - i > 1) {
+				chained = 1;
+				sg_index = 0;
+				curr_sg = sa->cmd_sg_list[c->cmdindex];
+			}
+			addr64 = (__u64) sg_dma_address(sg);
+			len  = sg_dma_len(sg);
+			curr_sg[sg_index].Addr.lower =
+				(__u32) (addr64 & 0x0FFFFFFFFULL);
+			curr_sg[sg_index].Addr.upper =
+				(__u32) ((addr64 >> 32) & 0x0FFFFFFFFULL);
+			curr_sg[sg_index].Len = len;
+			curr_sg[sg_index].Ext = 0;
+			++sg_index;
+		}
+		if (chained)
+			cciss_map_sg_chain_block(h, c,
+				sa->cmd_sg_list[c->cmdindex],
+				(request_nsgs - (h->max_cmd_sgentries - 1)) *
+					sizeof(SGDescriptor_struct));
+	}
+	/* track how many SG entries we are using */
+	if (request_nsgs > h->maxSG)
+		h->maxSG = request_nsgs;
+	c->Header.SGTotal = (u16) request_nsgs + chained;
+	if (request_nsgs > h->max_cmd_sgentries)
+		c->Header.SGList = h->max_cmd_sgentries;
+	else
+		c->Header.SGList = c->Header.SGTotal;
+	return;
+}
+
+
+static int
+cciss_scsi_queue_command_lck(struct scsi_cmnd *cmd, void (*done)(struct scsi_cmnd *))
+{
+	ctlr_info_t *h;
+	int rc;
+	unsigned char scsi3addr[8];
+	CommandList_struct *c;
+	unsigned long flags;
+
+	// Get the ptr to our adapter structure (hba[i]) out of cmd->host.
+	// We violate cmd->host privacy here.  (Is there another way?)
+	h = (ctlr_info_t *) cmd->device->host->hostdata[0];
+
+	rc = lookup_scsi3addr(h, cmd->device->channel, cmd->device->id,
+			cmd->device->lun, scsi3addr);
+	if (rc != 0) {
+		/* the scsi nexus does not match any that we presented... */
+		/* pretend to mid layer that we got selection timeout */
+		cmd->result = DID_NO_CONNECT << 16;
+		done(cmd);
+		/* we might want to think about registering controller itself
+		   as a processor device on the bus so sg binds to it. */
+		return 0;
+	}
+
+	/* Ok, we have a reasonable scsi nexus, so send the cmd down, and
+           see what the device thinks of it. */
+
+	spin_lock_irqsave(&h->lock, flags);
+	c = scsi_cmd_alloc(h);
+	spin_unlock_irqrestore(&h->lock, flags);
+	if (c == NULL) {			/* trouble... */
+		dev_warn(&h->pdev->dev, "scsi_cmd_alloc returned NULL!\n");
+		/* FIXME: next 3 lines are -> BAD! <- */
+		cmd->result = DID_NO_CONNECT << 16;
+		done(cmd);
+		return 0;
+	}
+
+	// Fill in the command list header
+
+	cmd->scsi_done = done;    // save this for use by completion code 
+
+	/* save c in case we have to abort it */
+	cmd->host_scribble = (unsigned char *) c;
+
+	c->cmd_type = CMD_SCSI;
+	c->scsi_cmd = cmd;
+	c->Header.ReplyQueue = 0;  /* unused in simple mode */
+	memcpy(&c->Header.LUN.LunAddrBytes[0], &scsi3addr[0], 8);
+	c->Header.Tag.lower = c->busaddr;  /* Use k. address of cmd as tag */
+	
+	// Fill in the request block...
+
+	c->Request.Timeout = 0;
+	memset(c->Request.CDB, 0, sizeof(c->Request.CDB));
+	BUG_ON(cmd->cmd_len > sizeof(c->Request.CDB));
+	c->Request.CDBLen = cmd->cmd_len;
+	memcpy(c->Request.CDB, cmd->cmnd, cmd->cmd_len);
+	c->Request.Type.Type = TYPE_CMD;
+	c->Request.Type.Attribute = ATTR_SIMPLE;
+	switch(cmd->sc_data_direction)
+	{
+	  case DMA_TO_DEVICE:
+		c->Request.Type.Direction = XFER_WRITE;
+		break;
+	  case DMA_FROM_DEVICE:
+		c->Request.Type.Direction = XFER_READ;
+		break;
+	  case DMA_NONE:
+		c->Request.Type.Direction = XFER_NONE;
+		break;
+	  case DMA_BIDIRECTIONAL:
+		// This can happen if a buggy application does a scsi passthru
+		// and sets both inlen and outlen to non-zero. ( see
+		// ../scsi/scsi_ioctl.c:scsi_ioctl_send_command() )
+
+		c->Request.Type.Direction = XFER_RSVD;
+		// This is technically wrong, and cciss controllers should
+		// reject it with CMD_INVALID, which is the most correct 
+		// response, but non-fibre backends appear to let it 
+		// slide by, and give the same results as if this field
+		// were set correctly.  Either way is acceptable for
+		// our purposes here.
+
+		break;
+
+	  default: 
+		dev_warn(&h->pdev->dev, "unknown data direction: %d\n",
+			cmd->sc_data_direction);
+		BUG();
+		break;
+	}
+	cciss_scatter_gather(h, c, cmd);
+	enqueue_cmd_and_start_io(h, c);
+	/* the cmd'll come back via intr handler in complete_scsi_command()  */
+	return 0;
+}
+
+static DEF_SCSI_QCMD(cciss_scsi_queue_command)
+
+static void cciss_unregister_scsi(ctlr_info_t *h)
+{
+	struct cciss_scsi_adapter_data_t *sa;
+	struct cciss_scsi_cmd_stack_t *stk;
+	unsigned long flags;
+
+	/* we are being forcibly unloaded, and may not refuse. */
+
+	spin_lock_irqsave(&h->lock, flags);
+	sa = h->scsi_ctlr;
+	stk = &sa->cmd_stack; 
+
+	/* if we weren't ever actually registered, don't unregister */ 
+	if (sa->registered) {
+		spin_unlock_irqrestore(&h->lock, flags);
+		scsi_remove_host(sa->scsi_host);
+		scsi_host_put(sa->scsi_host);
+		spin_lock_irqsave(&h->lock, flags);
+	}
+
+	/* set scsi_host to NULL so our detect routine will 
+	   find us on register */
+	sa->scsi_host = NULL;
+	spin_unlock_irqrestore(&h->lock, flags);
+	scsi_cmd_stack_free(h);
+	kfree(sa);
+}
+
+static int cciss_engage_scsi(ctlr_info_t *h)
+{
+	struct cciss_scsi_adapter_data_t *sa;
+	struct cciss_scsi_cmd_stack_t *stk;
+	unsigned long flags;
+
+	spin_lock_irqsave(&h->lock, flags);
+	sa = h->scsi_ctlr;
+	stk = &sa->cmd_stack; 
+
+	if (sa->registered) {
+		dev_info(&h->pdev->dev, "SCSI subsystem already engaged.\n");
+		spin_unlock_irqrestore(&h->lock, flags);
+		return -ENXIO;
+	}
+	sa->registered = 1;
+	spin_unlock_irqrestore(&h->lock, flags);
+	cciss_update_non_disk_devices(h, -1);
+	cciss_scsi_detect(h);
+	return 0;
+}
+
+static void
+cciss_seq_tape_report(struct seq_file *seq, ctlr_info_t *h)
+{
+	unsigned long flags;
+
+	CPQ_TAPE_LOCK(h, flags);
+	seq_printf(seq,
+		"Sequential access devices: %d\n\n",
+			ccissscsi[h->ctlr].ndevices);
+	CPQ_TAPE_UNLOCK(h, flags);
+}
+
+static int wait_for_device_to_become_ready(ctlr_info_t *h,
+	unsigned char lunaddr[])
+{
+	int rc;
+	int count = 0;
+	int waittime = HZ;
+	CommandList_struct *c;
+
+	c = cmd_alloc(h);
+	if (!c) {
+		dev_warn(&h->pdev->dev, "out of memory in "
+			"wait_for_device_to_become_ready.\n");
+		return IO_ERROR;
+	}
+
+	/* Send test unit ready until device ready, or give up. */
+	while (count < 20) {
+
+		/* Wait for a bit.  do this first, because if we send
+		 * the TUR right away, the reset will just abort it.
+		 */
+		schedule_timeout_uninterruptible(waittime);
+		count++;
+
+		/* Increase wait time with each try, up to a point. */
+		if (waittime < (HZ * 30))
+			waittime = waittime * 2;
+
+		/* Send the Test Unit Ready */
+		rc = fill_cmd(h, c, TEST_UNIT_READY, NULL, 0, 0,
+			lunaddr, TYPE_CMD);
+		if (rc == 0)
+			rc = sendcmd_withirq_core(h, c, 0);
+
+		(void) process_sendcmd_error(h, c);
+
+		if (rc != 0)
+			goto retry_tur;
+
+		if (c->err_info->CommandStatus == CMD_SUCCESS)
+			break;
+
+		if (c->err_info->CommandStatus == CMD_TARGET_STATUS &&
+			c->err_info->ScsiStatus == SAM_STAT_CHECK_CONDITION) {
+			if (c->err_info->SenseInfo[2] == NO_SENSE)
+				break;
+			if (c->err_info->SenseInfo[2] == UNIT_ATTENTION) {
+				unsigned char asc;
+				asc = c->err_info->SenseInfo[12];
+				check_for_unit_attention(h, c);
+				if (asc == POWER_OR_RESET)
+					break;
+			}
+		}
+retry_tur:
+		dev_warn(&h->pdev->dev, "Waiting %d secs "
+			"for device to become ready.\n",
+			waittime / HZ);
+		rc = 1; /* device not ready. */
+	}
+
+	if (rc)
+		dev_warn(&h->pdev->dev, "giving up on device.\n");
+	else
+		dev_warn(&h->pdev->dev, "device is ready.\n");
+
+	cmd_free(h, c);
+	return rc;
+}
+
+/* Need at least one of these error handlers to keep ../scsi/hosts.c from 
+ * complaining.  Doing a host- or bus-reset can't do anything good here. 
+ * Despite what it might say in scsi_error.c, there may well be commands
+ * on the controller, as the cciss driver registers twice, once as a block
+ * device for the logical drives, and once as a scsi device, for any tape
+ * drives.  So we know there are no commands out on the tape drives, but we
+ * don't know there are no commands on the controller, and it is likely 
+ * that there probably are, as the cciss block device is most commonly used
+ * as a boot device (embedded controller on HP/Compaq systems.)
+*/
+
+static int cciss_eh_device_reset_handler(struct scsi_cmnd *scsicmd)
+{
+	int rc;
+	CommandList_struct *cmd_in_trouble;
+	unsigned char lunaddr[8];
+	ctlr_info_t *h;
+
+	/* find the controller to which the command to be aborted was sent */
+	h = (ctlr_info_t *) scsicmd->device->host->hostdata[0];
+	if (h == NULL) /* paranoia */
+		return FAILED;
+	dev_warn(&h->pdev->dev, "resetting tape drive or medium changer.\n");
+	/* find the command that's giving us trouble */
+	cmd_in_trouble = (CommandList_struct *) scsicmd->host_scribble;
+	if (cmd_in_trouble == NULL) /* paranoia */
+		return FAILED;
+	memcpy(lunaddr, &cmd_in_trouble->Header.LUN.LunAddrBytes[0], 8);
+	/* send a reset to the SCSI LUN which the command was sent to */
+	rc = sendcmd_withirq(h, CCISS_RESET_MSG, NULL, 0, 0, lunaddr,
+		TYPE_MSG);
+	if (rc == 0 && wait_for_device_to_become_ready(h, lunaddr) == 0)
+		return SUCCESS;
+	dev_warn(&h->pdev->dev, "resetting device failed.\n");
+	return FAILED;
+}
+
+static int  cciss_eh_abort_handler(struct scsi_cmnd *scsicmd)
+{
+	int rc;
+	CommandList_struct *cmd_to_abort;
+	unsigned char lunaddr[8];
+	ctlr_info_t *h;
+
+	/* find the controller to which the command to be aborted was sent */
+	h = (ctlr_info_t *) scsicmd->device->host->hostdata[0];
+	if (h == NULL) /* paranoia */
+		return FAILED;
+	dev_warn(&h->pdev->dev, "aborting tardy SCSI cmd\n");
+
+	/* find the command to be aborted */
+	cmd_to_abort = (CommandList_struct *) scsicmd->host_scribble;
+	if (cmd_to_abort == NULL) /* paranoia */
+		return FAILED;
+	memcpy(lunaddr, &cmd_to_abort->Header.LUN.LunAddrBytes[0], 8);
+	rc = sendcmd_withirq(h, CCISS_ABORT_MSG, &cmd_to_abort->Header.Tag,
+		0, 0, lunaddr, TYPE_MSG);
+	if (rc == 0)
+		return SUCCESS;
+	return FAILED;
+
+}
+
+#else /* no CONFIG_CISS_SCSI_TAPE */
+
+/* If no tape support, then these become defined out of existence */
+
+#define cciss_scsi_setup(cntl_num)
+
+#endif /* CONFIG_CISS_SCSI_TAPE */
diff --git a/drivers/block/cciss_scsi.h b/drivers/block/cciss_scsi.h
new file mode 100644
index 00000000..e71d9867
--- /dev/null
+++ b/drivers/block/cciss_scsi.h
@@ -0,0 +1,79 @@
+/*
+ *    Disk Array driver for HP Smart Array controllers, SCSI Tape module.
+ *    (C) Copyright 2001, 2007 Hewlett-Packard Development Company, L.P.
+ *
+ *    This program is free software; you can redistribute it and/or modify
+ *    it under the terms of the GNU General Public License as published by
+ *    the Free Software Foundation; version 2 of the License.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ *    General Public License for more details.
+ *
+ *    You should have received a copy of the GNU General Public License
+ *    along with this program; if not, write to the Free Software
+ *    Foundation, Inc., 59 Temple Place, Suite 300, Boston, MA
+ *    02111-1307, USA.
+ *
+ *    Questions/Comments/Bugfixes to iss_storagedev@hp.com
+ *
+ */
+#ifdef CONFIG_CISS_SCSI_TAPE
+#ifndef _CCISS_SCSI_H_
+#define _CCISS_SCSI_H_
+
+#include <scsi/scsicam.h> /* possibly irrelevant, since we don't show disks */
+
+		/* the scsi id of the adapter... */
+#define SELF_SCSI_ID 15
+		/* 15 is somewhat arbitrary, since the scsi-2 bus
+		   that's presented by the driver to the OS is
+		   fabricated.  The "real" scsi-3 bus the
+		   hardware presents is fabricated too.
+		   The actual, honest-to-goodness physical
+		   bus that the devices are attached to is not
+		   addressible natively, and may in fact turn
+		   out to be not scsi at all. */
+
+
+/* 
+
+If the upper scsi layer tries to track how many commands we have 
+outstanding, it will be operating under the misapprehension that it is
+the only one sending us requests.  We also have the block interface,
+which is where most requests must surely come from, so the upper layer's
+notion of how many requests we have outstanding will be wrong most or
+all of the time. 
+
+Note, the normal SCSI mid-layer error handling doesn't work well
+for this driver because 1) it takes the io_request_lock before
+calling error handlers and uses a local variable to store flags,
+so the io_request_lock cannot be released and interrupts enabled
+inside the error handlers, and, the error handlers cannot poll
+for command completion because they might get commands from the
+block half of the driver completing, and not know what to do
+with them.  That's what we get for making a hybrid scsi/block
+driver, I suppose.
+
+*/
+
+struct cciss_scsi_dev_t {
+	int devtype;
+	int bus, target, lun;		/* as presented to the OS */
+	unsigned char scsi3addr[8];	/* as presented to the HW */
+	unsigned char device_id[16];	/* from inquiry pg. 0x83 */
+	unsigned char vendor[8];	/* bytes 8-15 of inquiry data */
+	unsigned char model[16];	/* bytes 16-31 of inquiry data */
+	unsigned char revision[4];	/* bytes 32-35 of inquiry data */
+};
+
+struct cciss_scsi_hba_t {
+	char *name;
+	int ndevices;
+#define CCISS_MAX_SCSI_DEVS_PER_HBA 16
+	struct cciss_scsi_dev_t dev[CCISS_MAX_SCSI_DEVS_PER_HBA];
+};
+
+#endif /* _CCISS_SCSI_H_ */
+#endif /* CONFIG_CISS_SCSI_TAPE */
diff --git a/drivers/block/cpqarray.c b/drivers/block/cpqarray.c
new file mode 100644
index 00000000..b2fceb53
--- /dev/null
+++ b/drivers/block/cpqarray.c
@@ -0,0 +1,1821 @@
+/*
+ *    Disk Array driver for Compaq SMART2 Controllers
+ *    Copyright 1998 Compaq Computer Corporation
+ *
+ *    This program is free software; you can redistribute it and/or modify
+ *    it under the terms of the GNU General Public License as published by
+ *    the Free Software Foundation; either version 2 of the License, or
+ *    (at your option) any later version.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *    NON INFRINGEMENT.  See the GNU General Public License for more details.
+ *
+ *    You should have received a copy of the GNU General Public License
+ *    along with this program; if not, write to the Free Software
+ *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ *    Questions/Comments/Bugfixes to iss_storagedev@hp.com
+ *
+ */
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/pci.h>
+#include <linux/bio.h>
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/delay.h>
+#include <linux/major.h>
+#include <linux/fs.h>
+#include <linux/blkpg.h>
+#include <linux/timer.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/init.h>
+#include <linux/hdreg.h>
+#include <linux/mutex.h>
+#include <linux/spinlock.h>
+#include <linux/blkdev.h>
+#include <linux/genhd.h>
+#include <linux/scatterlist.h>
+#include <asm/uaccess.h>
+#include <asm/io.h>
+
+
+#define SMART2_DRIVER_VERSION(maj,min,submin) ((maj<<16)|(min<<8)|(submin))
+
+#define DRIVER_NAME "Compaq SMART2 Driver (v 2.6.0)"
+#define DRIVER_VERSION SMART2_DRIVER_VERSION(2,6,0)
+
+/* Embedded module documentation macros - see modules.h */
+/* Original author Chris Frantz - Compaq Computer Corporation */
+MODULE_AUTHOR("Compaq Computer Corporation");
+MODULE_DESCRIPTION("Driver for Compaq Smart2 Array Controllers version 2.6.0");
+MODULE_LICENSE("GPL");
+
+#include "cpqarray.h"
+#include "ida_cmd.h"
+#include "smart1,2.h"
+#include "ida_ioctl.h"
+
+#define READ_AHEAD	128
+#define NR_CMDS		128 /* This could probably go as high as ~400 */
+
+#define MAX_CTLR	8
+#define CTLR_SHIFT	8
+
+#define CPQARRAY_DMA_MASK	0xFFFFFFFF	/* 32 bit DMA */
+
+static DEFINE_MUTEX(cpqarray_mutex);
+static int nr_ctlr;
+static ctlr_info_t *hba[MAX_CTLR];
+
+static int eisa[8];
+
+#define NR_PRODUCTS ARRAY_SIZE(products)
+
+/*  board_id = Subsystem Device ID & Vendor ID
+ *  product = Marketing Name for the board
+ *  access = Address of the struct of function pointers
+ */
+static struct board_type products[] = {
+	{ 0x0040110E, "IDA",			&smart1_access },
+	{ 0x0140110E, "IDA-2",			&smart1_access },
+	{ 0x1040110E, "IAES",			&smart1_access },
+	{ 0x2040110E, "SMART",			&smart1_access },
+	{ 0x3040110E, "SMART-2/E",		&smart2e_access },
+	{ 0x40300E11, "SMART-2/P",		&smart2_access },
+	{ 0x40310E11, "SMART-2SL",		&smart2_access },
+	{ 0x40320E11, "Smart Array 3200",	&smart2_access },
+	{ 0x40330E11, "Smart Array 3100ES",	&smart2_access },
+	{ 0x40340E11, "Smart Array 221",	&smart2_access },
+	{ 0x40400E11, "Integrated Array",	&smart4_access },
+	{ 0x40480E11, "Compaq Raid LC2",        &smart4_access },
+	{ 0x40500E11, "Smart Array 4200",	&smart4_access },
+	{ 0x40510E11, "Smart Array 4250ES",	&smart4_access },
+	{ 0x40580E11, "Smart Array 431",	&smart4_access },
+};
+
+/* define the PCI info for the PCI cards this driver can control */
+static const struct pci_device_id cpqarray_pci_device_id[] =
+{
+	{ PCI_VENDOR_ID_DEC, PCI_DEVICE_ID_COMPAQ_42XX,
+		0x0E11, 0x4058, 0, 0, 0},       /* SA431 */
+	{ PCI_VENDOR_ID_DEC, PCI_DEVICE_ID_COMPAQ_42XX,
+		0x0E11, 0x4051, 0, 0, 0},      /* SA4250ES */
+	{ PCI_VENDOR_ID_DEC, PCI_DEVICE_ID_COMPAQ_42XX,
+		0x0E11, 0x4050, 0, 0, 0},      /* SA4200 */
+	{ PCI_VENDOR_ID_NCR, PCI_DEVICE_ID_NCR_53C1510,
+		0x0E11, 0x4048, 0, 0, 0},       /* LC2 */
+	{ PCI_VENDOR_ID_NCR, PCI_DEVICE_ID_NCR_53C1510,
+		0x0E11, 0x4040, 0, 0, 0},      /* Integrated Array */
+	{ PCI_VENDOR_ID_COMPAQ, PCI_DEVICE_ID_COMPAQ_SMART2P,
+		0x0E11, 0x4034, 0, 0, 0},       /* SA 221 */
+	{ PCI_VENDOR_ID_COMPAQ, PCI_DEVICE_ID_COMPAQ_SMART2P,
+		0x0E11, 0x4033, 0, 0, 0},       /* SA 3100ES*/
+	{ PCI_VENDOR_ID_COMPAQ, PCI_DEVICE_ID_COMPAQ_SMART2P,
+		0x0E11, 0x4032, 0, 0, 0},       /* SA 3200*/
+	{ PCI_VENDOR_ID_COMPAQ, PCI_DEVICE_ID_COMPAQ_SMART2P,
+		0x0E11, 0x4031, 0, 0, 0},       /* SA 2SL*/
+	{ PCI_VENDOR_ID_COMPAQ, PCI_DEVICE_ID_COMPAQ_SMART2P,
+		0x0E11, 0x4030, 0, 0, 0},       /* SA 2P */
+	{ 0 }
+};
+
+MODULE_DEVICE_TABLE(pci, cpqarray_pci_device_id);
+
+static struct gendisk *ida_gendisk[MAX_CTLR][NWD];
+
+/* Debug... */
+#define DBG(s)	do { s } while(0)
+/* Debug (general info)... */
+#define DBGINFO(s) do { } while(0)
+/* Debug Paranoid... */
+#define DBGP(s)  do { } while(0)
+/* Debug Extra Paranoid... */
+#define DBGPX(s) do { } while(0)
+
+static int cpqarray_pci_init(ctlr_info_t *c, struct pci_dev *pdev);
+static void __iomem *remap_pci_mem(ulong base, ulong size);
+static int cpqarray_eisa_detect(void);
+static int pollcomplete(int ctlr);
+static void getgeometry(int ctlr);
+static void start_fwbk(int ctlr);
+
+static cmdlist_t * cmd_alloc(ctlr_info_t *h, int get_from_pool);
+static void cmd_free(ctlr_info_t *h, cmdlist_t *c, int got_from_pool);
+
+static void free_hba(int i);
+static int alloc_cpqarray_hba(void);
+
+static int sendcmd(
+	__u8	cmd,
+	int	ctlr,
+	void	*buff,
+	size_t	size,
+	unsigned int blk,
+	unsigned int blkcnt,
+	unsigned int log_unit );
+
+static int ida_unlocked_open(struct block_device *bdev, fmode_t mode);
+static int ida_release(struct gendisk *disk, fmode_t mode);
+static int ida_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, unsigned long arg);
+static int ida_getgeo(struct block_device *bdev, struct hd_geometry *geo);
+static int ida_ctlr_ioctl(ctlr_info_t *h, int dsk, ida_ioctl_t *io);
+
+static void do_ida_request(struct request_queue *q);
+static void start_io(ctlr_info_t *h);
+
+static inline void addQ(cmdlist_t **Qptr, cmdlist_t *c);
+static inline cmdlist_t *removeQ(cmdlist_t **Qptr, cmdlist_t *c);
+static inline void complete_command(cmdlist_t *cmd, int timeout);
+
+static irqreturn_t do_ida_intr(int irq, void *dev_id);
+static void ida_timer(unsigned long tdata);
+static int ida_revalidate(struct gendisk *disk);
+static int revalidate_allvol(ctlr_info_t *host);
+static int cpqarray_register_ctlr(int ctlr, struct pci_dev *pdev);
+
+#ifdef CONFIG_PROC_FS
+static void ida_procinit(int i);
+#else
+static void ida_procinit(int i) {}
+#endif
+
+static inline drv_info_t *get_drv(struct gendisk *disk)
+{
+	return disk->private_data;
+}
+
+static inline ctlr_info_t *get_host(struct gendisk *disk)
+{
+	return disk->queue->queuedata;
+}
+
+
+static const struct block_device_operations ida_fops  = {
+	.owner		= THIS_MODULE,
+	.open		= ida_unlocked_open,
+	.release	= ida_release,
+	.ioctl		= ida_ioctl,
+	.getgeo		= ida_getgeo,
+	.revalidate_disk= ida_revalidate,
+};
+
+
+#ifdef CONFIG_PROC_FS
+
+static struct proc_dir_entry *proc_array;
+static const struct file_operations ida_proc_fops;
+
+/*
+ * Get us a file in /proc/array that says something about each controller.
+ * Create /proc/array if it doesn't exist yet.
+ */
+static void __init ida_procinit(int i)
+{
+	if (proc_array == NULL) {
+		proc_array = proc_mkdir("driver/cpqarray", NULL);
+		if (!proc_array) return;
+	}
+
+	proc_create_data(hba[i]->devname, 0, proc_array, &ida_proc_fops, hba[i]);
+}
+
+/*
+ * Report information about this controller.
+ */
+static int ida_proc_show(struct seq_file *m, void *v)
+{
+	int i, ctlr;
+	ctlr_info_t *h = (ctlr_info_t*)m->private;
+	drv_info_t *drv;
+#ifdef CPQ_PROC_PRINT_QUEUES
+	cmdlist_t *c;
+	unsigned long flags;
+#endif
+
+	ctlr = h->ctlr;
+	seq_printf(m, "%s:  Compaq %s Controller\n"
+		"       Board ID: 0x%08lx\n"
+		"       Firmware Revision: %c%c%c%c\n"
+		"       Controller Sig: 0x%08lx\n"
+		"       Memory Address: 0x%08lx\n"
+		"       I/O Port: 0x%04x\n"
+		"       IRQ: %d\n"
+		"       Logical drives: %d\n"
+		"       Physical drives: %d\n\n"
+		"       Current Q depth: %d\n"
+		"       Max Q depth since init: %d\n\n",
+		h->devname, 
+		h->product_name,
+		(unsigned long)h->board_id,
+		h->firm_rev[0], h->firm_rev[1], h->firm_rev[2], h->firm_rev[3],
+		(unsigned long)h->ctlr_sig, (unsigned long)h->vaddr,
+		(unsigned int) h->io_mem_addr, (unsigned int)h->intr,
+		h->log_drives, h->phys_drives,
+		h->Qdepth, h->maxQsinceinit);
+
+	seq_puts(m, "Logical Drive Info:\n");
+
+	for(i=0; i<h->log_drives; i++) {
+		drv = &h->drv[i];
+		seq_printf(m, "ida/c%dd%d: blksz=%d nr_blks=%d\n",
+				ctlr, i, drv->blk_size, drv->nr_blks);
+	}
+
+#ifdef CPQ_PROC_PRINT_QUEUES
+	spin_lock_irqsave(IDA_LOCK(h->ctlr), flags); 
+	seq_puts(m, "\nCurrent Queues:\n");
+
+	c = h->reqQ;
+	seq_printf(m, "reqQ = %p", c);
+	if (c) c=c->next;
+	while(c && c != h->reqQ) {
+		seq_printf(m, "->%p", c);
+		c=c->next;
+	}
+
+	c = h->cmpQ;
+	seq_printf(m, "\ncmpQ = %p", c);
+	if (c) c=c->next;
+	while(c && c != h->cmpQ) {
+		seq_printf(m, "->%p", c);
+		c=c->next;
+	}
+
+	seq_putc(m, '\n');
+	spin_unlock_irqrestore(IDA_LOCK(h->ctlr), flags); 
+#endif
+	seq_printf(m, "nr_allocs = %d\nnr_frees = %d\n",
+			h->nr_allocs, h->nr_frees);
+	return 0;
+}
+
+static int ida_proc_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, ida_proc_show, PDE(inode)->data);
+}
+
+static const struct file_operations ida_proc_fops = {
+	.owner		= THIS_MODULE,
+	.open		= ida_proc_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+#endif /* CONFIG_PROC_FS */
+
+module_param_array(eisa, int, NULL, 0);
+
+static void release_io_mem(ctlr_info_t *c)
+{
+	/* if IO mem was not protected do nothing */
+	if( c->io_mem_addr == 0)
+		return;
+	release_region(c->io_mem_addr, c->io_mem_length);
+	c->io_mem_addr = 0;
+	c->io_mem_length = 0;
+}
+
+static void __devexit cpqarray_remove_one(int i)
+{
+	int j;
+	char buff[4];
+
+	/* sendcmd will turn off interrupt, and send the flush...
+	 * To write all data in the battery backed cache to disks
+	 * no data returned, but don't want to send NULL to sendcmd */
+	if( sendcmd(FLUSH_CACHE, i, buff, 4, 0, 0, 0))
+	{
+		printk(KERN_WARNING "Unable to flush cache on controller %d\n",
+				i);
+	}
+	free_irq(hba[i]->intr, hba[i]);
+	iounmap(hba[i]->vaddr);
+	unregister_blkdev(COMPAQ_SMART2_MAJOR+i, hba[i]->devname);
+	del_timer(&hba[i]->timer);
+	remove_proc_entry(hba[i]->devname, proc_array);
+	pci_free_consistent(hba[i]->pci_dev,
+			NR_CMDS * sizeof(cmdlist_t), (hba[i]->cmd_pool),
+			hba[i]->cmd_pool_dhandle);
+	kfree(hba[i]->cmd_pool_bits);
+	for(j = 0; j < NWD; j++) {
+		if (ida_gendisk[i][j]->flags & GENHD_FL_UP)
+			del_gendisk(ida_gendisk[i][j]);
+		put_disk(ida_gendisk[i][j]);
+	}
+	blk_cleanup_queue(hba[i]->queue);
+	release_io_mem(hba[i]);
+	free_hba(i);
+}
+
+static void __devexit cpqarray_remove_one_pci (struct pci_dev *pdev)
+{
+	int i;
+	ctlr_info_t *tmp_ptr;
+
+	if (pci_get_drvdata(pdev) == NULL) {
+		printk( KERN_ERR "cpqarray: Unable to remove device \n");
+		return;
+	}
+
+	tmp_ptr = pci_get_drvdata(pdev);
+	i = tmp_ptr->ctlr;
+	if (hba[i] == NULL) {
+		printk(KERN_ERR "cpqarray: controller %d appears to have"
+			"already been removed \n", i);
+		return;
+        }
+	pci_set_drvdata(pdev, NULL);
+
+	cpqarray_remove_one(i);
+}
+
+/* removing an instance that was not removed automatically..
+ * must be an eisa card.
+ */
+static void __devexit cpqarray_remove_one_eisa (int i)
+{
+	if (hba[i] == NULL) {
+		printk(KERN_ERR "cpqarray: controller %d appears to have"
+			"already been removed \n", i);
+		return;
+        }
+	cpqarray_remove_one(i);
+}
+
+/* pdev is NULL for eisa */
+static int __devinit cpqarray_register_ctlr( int i, struct pci_dev *pdev)
+{
+	struct request_queue *q;
+	int j;
+
+	/* 
+	 * register block devices
+	 * Find disks and fill in structs
+	 * Get an interrupt, set the Q depth and get into /proc
+	 */
+
+	/* If this successful it should insure that we are the only */
+	/* instance of the driver */
+	if (register_blkdev(COMPAQ_SMART2_MAJOR+i, hba[i]->devname)) {
+		goto Enomem4;
+	}
+	hba[i]->access.set_intr_mask(hba[i], 0);
+	if (request_irq(hba[i]->intr, do_ida_intr,
+		IRQF_DISABLED|IRQF_SHARED, hba[i]->devname, hba[i]))
+	{
+		printk(KERN_ERR "cpqarray: Unable to get irq %d for %s\n",
+				hba[i]->intr, hba[i]->devname);
+		goto Enomem3;
+	}
+		
+	for (j=0; j<NWD; j++) {
+		ida_gendisk[i][j] = alloc_disk(1 << NWD_SHIFT);
+		if (!ida_gendisk[i][j])
+			goto Enomem2;
+	}
+
+	hba[i]->cmd_pool = pci_alloc_consistent(
+		hba[i]->pci_dev, NR_CMDS * sizeof(cmdlist_t),
+		&(hba[i]->cmd_pool_dhandle));
+	hba[i]->cmd_pool_bits = kcalloc(
+		DIV_ROUND_UP(NR_CMDS, BITS_PER_LONG), sizeof(unsigned long),
+		GFP_KERNEL);
+
+	if (!hba[i]->cmd_pool_bits || !hba[i]->cmd_pool)
+			goto Enomem1;
+
+	memset(hba[i]->cmd_pool, 0, NR_CMDS * sizeof(cmdlist_t));
+	printk(KERN_INFO "cpqarray: Finding drives on %s",
+		hba[i]->devname);
+
+	spin_lock_init(&hba[i]->lock);
+	q = blk_init_queue(do_ida_request, &hba[i]->lock);
+	if (!q)
+		goto Enomem1;
+
+	hba[i]->queue = q;
+	q->queuedata = hba[i];
+
+	getgeometry(i);
+	start_fwbk(i);
+
+	ida_procinit(i);
+
+	if (pdev)
+		blk_queue_bounce_limit(q, hba[i]->pci_dev->dma_mask);
+
+	/* This is a hardware imposed limit. */
+	blk_queue_max_segments(q, SG_MAX);
+
+	init_timer(&hba[i]->timer);
+	hba[i]->timer.expires = jiffies + IDA_TIMER;
+	hba[i]->timer.data = (unsigned long)hba[i];
+	hba[i]->timer.function = ida_timer;
+	add_timer(&hba[i]->timer);
+
+	/* Enable IRQ now that spinlock and rate limit timer are set up */
+	hba[i]->access.set_intr_mask(hba[i], FIFO_NOT_EMPTY);
+
+	for(j=0; j<NWD; j++) {
+		struct gendisk *disk = ida_gendisk[i][j];
+		drv_info_t *drv = &hba[i]->drv[j];
+		sprintf(disk->disk_name, "ida/c%dd%d", i, j);
+		disk->major = COMPAQ_SMART2_MAJOR + i;
+		disk->first_minor = j<<NWD_SHIFT;
+		disk->fops = &ida_fops;
+		if (j && !drv->nr_blks)
+			continue;
+		blk_queue_logical_block_size(hba[i]->queue, drv->blk_size);
+		set_capacity(disk, drv->nr_blks);
+		disk->queue = hba[i]->queue;
+		disk->private_data = drv;
+		add_disk(disk);
+	}
+
+	/* done ! */
+	return(i);
+
+Enomem1:
+	nr_ctlr = i; 
+	kfree(hba[i]->cmd_pool_bits);
+	if (hba[i]->cmd_pool)
+		pci_free_consistent(hba[i]->pci_dev, NR_CMDS*sizeof(cmdlist_t), 
+				    hba[i]->cmd_pool, hba[i]->cmd_pool_dhandle);
+Enomem2:
+	while (j--) {
+		put_disk(ida_gendisk[i][j]);
+		ida_gendisk[i][j] = NULL;
+	}
+	free_irq(hba[i]->intr, hba[i]);
+Enomem3:
+	unregister_blkdev(COMPAQ_SMART2_MAJOR+i, hba[i]->devname);
+Enomem4:
+	if (pdev)
+		pci_set_drvdata(pdev, NULL);
+	release_io_mem(hba[i]);
+	free_hba(i);
+
+	printk( KERN_ERR "cpqarray: out of memory");
+
+	return -1;
+}
+
+static int __devinit cpqarray_init_one( struct pci_dev *pdev,
+	const struct pci_device_id *ent)
+{
+	int i;
+
+	printk(KERN_DEBUG "cpqarray: Device 0x%x has been found at"
+			" bus %d dev %d func %d\n",
+			pdev->device, pdev->bus->number, PCI_SLOT(pdev->devfn),
+			PCI_FUNC(pdev->devfn));
+	i = alloc_cpqarray_hba();
+	if( i < 0 )
+		return (-1);
+	memset(hba[i], 0, sizeof(ctlr_info_t));
+	sprintf(hba[i]->devname, "ida%d", i);
+	hba[i]->ctlr = i;
+	/* Initialize the pdev driver private data */
+	pci_set_drvdata(pdev, hba[i]);
+
+	if (cpqarray_pci_init(hba[i], pdev) != 0) {
+		pci_set_drvdata(pdev, NULL);
+		release_io_mem(hba[i]);
+		free_hba(i);
+		return -1;
+	}
+
+	return (cpqarray_register_ctlr(i, pdev));
+}
+
+static struct pci_driver cpqarray_pci_driver = {
+	.name = "cpqarray",
+	.probe = cpqarray_init_one,
+	.remove = __devexit_p(cpqarray_remove_one_pci),
+	.id_table = cpqarray_pci_device_id,
+};
+
+/*
+ *  This is it.  Find all the controllers and register them.
+ *  returns the number of block devices registered.
+ */
+static int __init cpqarray_init(void)
+{
+	int num_cntlrs_reg = 0;
+	int i;
+	int rc = 0;
+
+	/* detect controllers */
+	printk(DRIVER_NAME "\n");
+
+	rc = pci_register_driver(&cpqarray_pci_driver);
+	if (rc)
+		return rc;
+	cpqarray_eisa_detect();
+	
+	for (i=0; i < MAX_CTLR; i++) {
+		if (hba[i] != NULL)
+			num_cntlrs_reg++;
+	}
+
+	if (num_cntlrs_reg)
+		return 0;
+	else {
+		pci_unregister_driver(&cpqarray_pci_driver);
+		return -ENODEV;
+	}
+}
+
+/* Function to find the first free pointer into our hba[] array */
+/* Returns -1 if no free entries are left.  */
+static int alloc_cpqarray_hba(void)
+{
+	int i;
+
+	for(i=0; i< MAX_CTLR; i++) {
+		if (hba[i] == NULL) {
+			hba[i] = kmalloc(sizeof(ctlr_info_t), GFP_KERNEL);
+			if(hba[i]==NULL) {
+				printk(KERN_ERR "cpqarray: out of memory.\n");
+				return (-1);
+			}
+			return (i);
+		}
+	}
+	printk(KERN_WARNING "cpqarray: This driver supports a maximum"
+		" of 8 controllers.\n");
+	return(-1);
+}
+
+static void free_hba(int i)
+{
+	kfree(hba[i]);
+	hba[i]=NULL;
+}
+
+/*
+ * Find the IO address of the controller, its IRQ and so forth.  Fill
+ * in some basic stuff into the ctlr_info_t structure.
+ */
+static int cpqarray_pci_init(ctlr_info_t *c, struct pci_dev *pdev)
+{
+	ushort vendor_id, device_id, command;
+	unchar cache_line_size, latency_timer;
+	unchar irq, revision;
+	unsigned long addr[6];
+	__u32 board_id;
+
+	int i;
+
+	c->pci_dev = pdev;
+	pci_set_master(pdev);
+	if (pci_enable_device(pdev)) {
+		printk(KERN_ERR "cpqarray: Unable to Enable PCI device\n");
+		return -1;
+	}
+	vendor_id = pdev->vendor;
+	device_id = pdev->device;
+	irq = pdev->irq;
+
+	for(i=0; i<6; i++)
+		addr[i] = pci_resource_start(pdev, i);
+
+	if (pci_set_dma_mask(pdev, CPQARRAY_DMA_MASK) != 0)
+	{
+		printk(KERN_ERR "cpqarray: Unable to set DMA mask\n");
+		return -1;
+	}
+
+	pci_read_config_word(pdev, PCI_COMMAND, &command);
+	pci_read_config_byte(pdev, PCI_CLASS_REVISION, &revision);
+	pci_read_config_byte(pdev, PCI_CACHE_LINE_SIZE, &cache_line_size);
+	pci_read_config_byte(pdev, PCI_LATENCY_TIMER, &latency_timer);
+
+	pci_read_config_dword(pdev, 0x2c, &board_id);
+
+	/* check to see if controller has been disabled */
+	if(!(command & 0x02)) {
+		printk(KERN_WARNING
+			"cpqarray: controller appears to be disabled\n");
+		return(-1);
+	}
+
+DBGINFO(
+	printk("vendor_id = %x\n", vendor_id);
+	printk("device_id = %x\n", device_id);
+	printk("command = %x\n", command);
+	for(i=0; i<6; i++)
+		printk("addr[%d] = %lx\n", i, addr[i]);
+	printk("revision = %x\n", revision);
+	printk("irq = %x\n", irq);
+	printk("cache_line_size = %x\n", cache_line_size);
+	printk("latency_timer = %x\n", latency_timer);
+	printk("board_id = %x\n", board_id);
+);
+
+	c->intr = irq;
+
+	for(i=0; i<6; i++) {
+		if (pci_resource_flags(pdev, i) & PCI_BASE_ADDRESS_SPACE_IO)
+		{ /* IO space */
+			c->io_mem_addr = addr[i];
+			c->io_mem_length = pci_resource_end(pdev, i)
+				- pci_resource_start(pdev, i) + 1;
+			if(!request_region( c->io_mem_addr, c->io_mem_length,
+				"cpqarray"))
+			{
+				printk( KERN_WARNING "cpqarray I/O memory range already in use addr %lx length = %ld\n", c->io_mem_addr, c->io_mem_length);
+				c->io_mem_addr = 0;
+				c->io_mem_length = 0;
+			}
+			break;
+		}
+	}
+
+	c->paddr = 0;
+	for(i=0; i<6; i++)
+		if (!(pci_resource_flags(pdev, i) &
+				PCI_BASE_ADDRESS_SPACE_IO)) {
+			c->paddr = pci_resource_start (pdev, i);
+			break;
+		}
+	if (!c->paddr)
+		return -1;
+	c->vaddr = remap_pci_mem(c->paddr, 128);
+	if (!c->vaddr)
+		return -1;
+	c->board_id = board_id;
+
+	for(i=0; i<NR_PRODUCTS; i++) {
+		if (board_id == products[i].board_id) {
+			c->product_name = products[i].product_name;
+			c->access = *(products[i].access);
+			break;
+		}
+	}
+	if (i == NR_PRODUCTS) {
+		printk(KERN_WARNING "cpqarray: Sorry, I don't know how"
+			" to access the SMART Array controller %08lx\n", 
+				(unsigned long)board_id);
+		return -1;
+	}
+
+	return 0;
+}
+
+/*
+ * Map (physical) PCI mem into (virtual) kernel space
+ */
+static void __iomem *remap_pci_mem(ulong base, ulong size)
+{
+        ulong page_base        = ((ulong) base) & PAGE_MASK;
+        ulong page_offs        = ((ulong) base) - page_base;
+        void __iomem *page_remapped    = ioremap(page_base, page_offs+size);
+
+        return (page_remapped ? (page_remapped + page_offs) : NULL);
+}
+
+#ifndef MODULE
+/*
+ * Config string is a comma separated set of i/o addresses of EISA cards.
+ */
+static int cpqarray_setup(char *str)
+{
+	int i, ints[9];
+
+	(void)get_options(str, ARRAY_SIZE(ints), ints);
+
+	for(i=0; i<ints[0] && i<8; i++)
+		eisa[i] = ints[i+1];
+	return 1;
+}
+
+__setup("smart2=", cpqarray_setup);
+
+#endif
+
+/*
+ * Find an EISA controller's signature.  Set up an hba if we find it.
+ */
+static int __devinit cpqarray_eisa_detect(void)
+{
+	int i=0, j;
+	__u32 board_id;
+	int intr;
+	int ctlr;
+	int num_ctlr = 0;
+
+	while(i<8 && eisa[i]) {
+		ctlr = alloc_cpqarray_hba();
+		if(ctlr == -1)
+			break;
+		board_id = inl(eisa[i]+0xC80);
+		for(j=0; j < NR_PRODUCTS; j++)
+			if (board_id == products[j].board_id) 
+				break;
+
+		if (j == NR_PRODUCTS) {
+			printk(KERN_WARNING "cpqarray: Sorry, I don't know how"
+				" to access the SMART Array controller %08lx\n",				 (unsigned long)board_id);
+			continue;
+		}
+
+		memset(hba[ctlr], 0, sizeof(ctlr_info_t));
+		hba[ctlr]->io_mem_addr = eisa[i];
+		hba[ctlr]->io_mem_length = 0x7FF;
+		if(!request_region(hba[ctlr]->io_mem_addr,
+				hba[ctlr]->io_mem_length,
+				"cpqarray"))
+		{
+			printk(KERN_WARNING "cpqarray: I/O range already in "
+					"use addr = %lx length = %ld\n",
+					hba[ctlr]->io_mem_addr,
+					hba[ctlr]->io_mem_length);
+			free_hba(ctlr);
+			continue;
+		}
+
+		/*
+		 * Read the config register to find our interrupt
+		 */
+		intr = inb(eisa[i]+0xCC0) >> 4;
+		if (intr & 1) intr = 11;
+		else if (intr & 2) intr = 10;
+		else if (intr & 4) intr = 14;
+		else if (intr & 8) intr = 15;
+		
+		hba[ctlr]->intr = intr;
+		sprintf(hba[ctlr]->devname, "ida%d", nr_ctlr);
+		hba[ctlr]->product_name = products[j].product_name;
+		hba[ctlr]->access = *(products[j].access);
+		hba[ctlr]->ctlr = ctlr;
+		hba[ctlr]->board_id = board_id;
+		hba[ctlr]->pci_dev = NULL; /* not PCI */
+
+DBGINFO(
+	printk("i = %d, j = %d\n", i, j);
+	printk("irq = %x\n", intr);
+	printk("product name = %s\n", products[j].product_name);
+	printk("board_id = %x\n", board_id);
+);
+
+		num_ctlr++;
+		i++;
+
+		if (cpqarray_register_ctlr(ctlr, NULL) == -1)
+			printk(KERN_WARNING
+				"cpqarray: Can't register EISA controller %d\n",
+				ctlr);
+
+	}
+
+	return num_ctlr;
+}
+
+/*
+ * Open.  Make sure the device is really there.
+ */
+static int ida_open(struct block_device *bdev, fmode_t mode)
+{
+	drv_info_t *drv = get_drv(bdev->bd_disk);
+	ctlr_info_t *host = get_host(bdev->bd_disk);
+
+	DBGINFO(printk("ida_open %s\n", bdev->bd_disk->disk_name));
+	/*
+	 * Root is allowed to open raw volume zero even if it's not configured
+	 * so array config can still work.  I don't think I really like this,
+	 * but I'm already using way to many device nodes to claim another one
+	 * for "raw controller".
+	 */
+	if (!drv->nr_blks) {
+		if (!capable(CAP_SYS_RAWIO))
+			return -ENXIO;
+		if (!capable(CAP_SYS_ADMIN) && drv != host->drv)
+			return -ENXIO;
+	}
+	host->usage_count++;
+	return 0;
+}
+
+static int ida_unlocked_open(struct block_device *bdev, fmode_t mode)
+{
+	int ret;
+
+	mutex_lock(&cpqarray_mutex);
+	ret = ida_open(bdev, mode);
+	mutex_unlock(&cpqarray_mutex);
+
+	return ret;
+}
+
+/*
+ * Close.  Sync first.
+ */
+static int ida_release(struct gendisk *disk, fmode_t mode)
+{
+	ctlr_info_t *host;
+
+	mutex_lock(&cpqarray_mutex);
+	host = get_host(disk);
+	host->usage_count--;
+	mutex_unlock(&cpqarray_mutex);
+
+	return 0;
+}
+
+/*
+ * Enqueuing and dequeuing functions for cmdlists.
+ */
+static inline void addQ(cmdlist_t **Qptr, cmdlist_t *c)
+{
+	if (*Qptr == NULL) {
+		*Qptr = c;
+		c->next = c->prev = c;
+	} else {
+		c->prev = (*Qptr)->prev;
+		c->next = (*Qptr);
+		(*Qptr)->prev->next = c;
+		(*Qptr)->prev = c;
+	}
+}
+
+static inline cmdlist_t *removeQ(cmdlist_t **Qptr, cmdlist_t *c)
+{
+	if (c && c->next != c) {
+		if (*Qptr == c) *Qptr = c->next;
+		c->prev->next = c->next;
+		c->next->prev = c->prev;
+	} else {
+		*Qptr = NULL;
+	}
+	return c;
+}
+
+/*
+ * Get a request and submit it to the controller.
+ * This routine needs to grab all the requests it possibly can from the
+ * req Q and submit them.  Interrupts are off (and need to be off) when you
+ * are in here (either via the dummy do_ida_request functions or by being
+ * called from the interrupt handler
+ */
+static void do_ida_request(struct request_queue *q)
+{
+	ctlr_info_t *h = q->queuedata;
+	cmdlist_t *c;
+	struct request *creq;
+	struct scatterlist tmp_sg[SG_MAX];
+	int i, dir, seg;
+
+queue_next:
+	creq = blk_peek_request(q);
+	if (!creq)
+		goto startio;
+
+	BUG_ON(creq->nr_phys_segments > SG_MAX);
+
+	if ((c = cmd_alloc(h,1)) == NULL)
+		goto startio;
+
+	blk_start_request(creq);
+
+	c->ctlr = h->ctlr;
+	c->hdr.unit = (drv_info_t *)(creq->rq_disk->private_data) - h->drv;
+	c->hdr.size = sizeof(rblk_t) >> 2;
+	c->size += sizeof(rblk_t);
+
+	c->req.hdr.blk = blk_rq_pos(creq);
+	c->rq = creq;
+DBGPX(
+	printk("sector=%d, nr_sectors=%u\n",
+	       blk_rq_pos(creq), blk_rq_sectors(creq));
+);
+	sg_init_table(tmp_sg, SG_MAX);
+	seg = blk_rq_map_sg(q, creq, tmp_sg);
+
+	/* Now do all the DMA Mappings */
+	if (rq_data_dir(creq) == READ)
+		dir = PCI_DMA_FROMDEVICE;
+	else
+		dir = PCI_DMA_TODEVICE;
+	for( i=0; i < seg; i++)
+	{
+		c->req.sg[i].size = tmp_sg[i].length;
+		c->req.sg[i].addr = (__u32) pci_map_page(h->pci_dev,
+						 sg_page(&tmp_sg[i]),
+						 tmp_sg[i].offset,
+						 tmp_sg[i].length, dir);
+	}
+DBGPX(	printk("Submitting %u sectors in %d segments\n", blk_rq_sectors(creq), seg); );
+	c->req.hdr.sg_cnt = seg;
+	c->req.hdr.blk_cnt = blk_rq_sectors(creq);
+	c->req.hdr.cmd = (rq_data_dir(creq) == READ) ? IDA_READ : IDA_WRITE;
+	c->type = CMD_RWREQ;
+
+	/* Put the request on the tail of the request queue */
+	addQ(&h->reqQ, c);
+	h->Qdepth++;
+	if (h->Qdepth > h->maxQsinceinit) 
+		h->maxQsinceinit = h->Qdepth;
+
+	goto queue_next;
+
+startio:
+	start_io(h);
+}
+
+/* 
+ * start_io submits everything on a controller's request queue
+ * and moves it to the completion queue.
+ *
+ * Interrupts had better be off if you're in here
+ */
+static void start_io(ctlr_info_t *h)
+{
+	cmdlist_t *c;
+
+	while((c = h->reqQ) != NULL) {
+		/* Can't do anything if we're busy */
+		if (h->access.fifo_full(h) == 0)
+			return;
+
+		/* Get the first entry from the request Q */
+		removeQ(&h->reqQ, c);
+		h->Qdepth--;
+	
+		/* Tell the controller to do our bidding */
+		h->access.submit_command(h, c);
+
+		/* Get onto the completion Q */
+		addQ(&h->cmpQ, c);
+	}
+}
+
+/*
+ * Mark all buffers that cmd was responsible for
+ */
+static inline void complete_command(cmdlist_t *cmd, int timeout)
+{
+	struct request *rq = cmd->rq;
+	int error = 0;
+	int i, ddir;
+
+	if (cmd->req.hdr.rcode & RCODE_NONFATAL &&
+	   (hba[cmd->ctlr]->misc_tflags & MISC_NONFATAL_WARN) == 0) {
+		printk(KERN_NOTICE "Non Fatal error on ida/c%dd%d\n",
+				cmd->ctlr, cmd->hdr.unit);
+		hba[cmd->ctlr]->misc_tflags |= MISC_NONFATAL_WARN;
+	}
+	if (cmd->req.hdr.rcode & RCODE_FATAL) {
+		printk(KERN_WARNING "Fatal error on ida/c%dd%d\n",
+				cmd->ctlr, cmd->hdr.unit);
+		error = -EIO;
+	}
+	if (cmd->req.hdr.rcode & RCODE_INVREQ) {
+				printk(KERN_WARNING "Invalid request on ida/c%dd%d = (cmd=%x sect=%d cnt=%d sg=%d ret=%x)\n",
+				cmd->ctlr, cmd->hdr.unit, cmd->req.hdr.cmd,
+				cmd->req.hdr.blk, cmd->req.hdr.blk_cnt,
+				cmd->req.hdr.sg_cnt, cmd->req.hdr.rcode);
+		error = -EIO;
+	}
+	if (timeout)
+		error = -EIO;
+	/* unmap the DMA mapping for all the scatter gather elements */
+	if (cmd->req.hdr.cmd == IDA_READ)
+		ddir = PCI_DMA_FROMDEVICE;
+	else
+		ddir = PCI_DMA_TODEVICE;
+        for(i=0; i<cmd->req.hdr.sg_cnt; i++)
+                pci_unmap_page(hba[cmd->ctlr]->pci_dev, cmd->req.sg[i].addr,
+				cmd->req.sg[i].size, ddir);
+
+	DBGPX(printk("Done with %p\n", rq););
+	__blk_end_request_all(rq, error);
+}
+
+/*
+ *  The controller will interrupt us upon completion of commands.
+ *  Find the command on the completion queue, remove it, tell the OS and
+ *  try to queue up more IO
+ */
+static irqreturn_t do_ida_intr(int irq, void *dev_id)
+{
+	ctlr_info_t *h = dev_id;
+	cmdlist_t *c;
+	unsigned long istat;
+	unsigned long flags;
+	__u32 a,a1;
+
+	istat = h->access.intr_pending(h);
+	/* Is this interrupt for us? */
+	if (istat == 0)
+		return IRQ_NONE;
+
+	/*
+	 * If there are completed commands in the completion queue,
+	 * we had better do something about it.
+	 */
+	spin_lock_irqsave(IDA_LOCK(h->ctlr), flags);
+	if (istat & FIFO_NOT_EMPTY) {
+		while((a = h->access.command_completed(h))) {
+			a1 = a; a &= ~3;
+			if ((c = h->cmpQ) == NULL)
+			{  
+				printk(KERN_WARNING "cpqarray: Completion of %08lx ignored\n", (unsigned long)a1);
+				continue;	
+			} 
+			while(c->busaddr != a) {
+				c = c->next;
+				if (c == h->cmpQ) 
+					break;
+			}
+			/*
+			 * If we've found the command, take it off the
+			 * completion Q and free it
+			 */
+			if (c->busaddr == a) {
+				removeQ(&h->cmpQ, c);
+				/*  Check for invalid command.
+                                 *  Controller returns command error,
+                                 *  But rcode = 0.
+                                 */
+
+				if((a1 & 0x03) && (c->req.hdr.rcode == 0))
+                                {
+                                	c->req.hdr.rcode = RCODE_INVREQ;
+                                }
+				if (c->type == CMD_RWREQ) {
+					complete_command(c, 0);
+					cmd_free(h, c, 1);
+				} else if (c->type == CMD_IOCTL_PEND) {
+					c->type = CMD_IOCTL_DONE;
+				}
+				continue;
+			}
+		}
+	}
+
+	/*
+	 * See if we can queue up some more IO
+	 */
+	do_ida_request(h->queue);
+	spin_unlock_irqrestore(IDA_LOCK(h->ctlr), flags); 
+	return IRQ_HANDLED;
+}
+
+/*
+ * This timer was for timing out requests that haven't happened after
+ * IDA_TIMEOUT.  That wasn't such a good idea.  This timer is used to
+ * reset a flags structure so we don't flood the user with
+ * "Non-Fatal error" messages.
+ */
+static void ida_timer(unsigned long tdata)
+{
+	ctlr_info_t *h = (ctlr_info_t*)tdata;
+
+	h->timer.expires = jiffies + IDA_TIMER;
+	add_timer(&h->timer);
+	h->misc_tflags = 0;
+}
+
+static int ida_getgeo(struct block_device *bdev, struct hd_geometry *geo)
+{
+	drv_info_t *drv = get_drv(bdev->bd_disk);
+
+	if (drv->cylinders) {
+		geo->heads = drv->heads;
+		geo->sectors = drv->sectors;
+		geo->cylinders = drv->cylinders;
+	} else {
+		geo->heads = 0xff;
+		geo->sectors = 0x3f;
+		geo->cylinders = drv->nr_blks / (0xff*0x3f);
+	}
+
+	return 0;
+}
+
+/*
+ *  ida_ioctl does some miscellaneous stuff like reporting drive geometry,
+ *  setting readahead and submitting commands from userspace to the controller.
+ */
+static int ida_locked_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, unsigned long arg)
+{
+	drv_info_t *drv = get_drv(bdev->bd_disk);
+	ctlr_info_t *host = get_host(bdev->bd_disk);
+	int error;
+	ida_ioctl_t __user *io = (ida_ioctl_t __user *)arg;
+	ida_ioctl_t *my_io;
+
+	switch(cmd) {
+	case IDAGETDRVINFO:
+		if (copy_to_user(&io->c.drv, drv, sizeof(drv_info_t)))
+			return -EFAULT;
+		return 0;
+	case IDAPASSTHRU:
+		if (!capable(CAP_SYS_RAWIO))
+			return -EPERM;
+		my_io = kmalloc(sizeof(ida_ioctl_t), GFP_KERNEL);
+		if (!my_io)
+			return -ENOMEM;
+		error = -EFAULT;
+		if (copy_from_user(my_io, io, sizeof(*my_io)))
+			goto out_passthru;
+		error = ida_ctlr_ioctl(host, drv - host->drv, my_io);
+		if (error)
+			goto out_passthru;
+		error = -EFAULT;
+		if (copy_to_user(io, my_io, sizeof(*my_io)))
+			goto out_passthru;
+		error = 0;
+out_passthru:
+		kfree(my_io);
+		return error;
+	case IDAGETCTLRSIG:
+		if (!arg) return -EINVAL;
+		if (put_user(host->ctlr_sig, (int __user *)arg))
+			return -EFAULT;
+		return 0;
+	case IDAREVALIDATEVOLS:
+		if (MINOR(bdev->bd_dev) != 0)
+			return -ENXIO;
+		return revalidate_allvol(host);
+	case IDADRIVERVERSION:
+		if (!arg) return -EINVAL;
+		if (put_user(DRIVER_VERSION, (unsigned long __user *)arg))
+			return -EFAULT;
+		return 0;
+	case IDAGETPCIINFO:
+	{
+		
+		ida_pci_info_struct pciinfo;
+
+		if (!arg) return -EINVAL;
+		pciinfo.bus = host->pci_dev->bus->number;
+		pciinfo.dev_fn = host->pci_dev->devfn;
+		pciinfo.board_id = host->board_id;
+		if(copy_to_user((void __user *) arg, &pciinfo,  
+			sizeof( ida_pci_info_struct)))
+				return -EFAULT;
+		return(0);
+	}	
+
+	default:
+		return -EINVAL;
+	}
+		
+}
+
+static int ida_ioctl(struct block_device *bdev, fmode_t mode,
+			     unsigned int cmd, unsigned long param)
+{
+	int ret;
+
+	mutex_lock(&cpqarray_mutex);
+	ret = ida_locked_ioctl(bdev, mode, cmd, param);
+	mutex_unlock(&cpqarray_mutex);
+
+	return ret;
+}
+
+/*
+ * ida_ctlr_ioctl is for passing commands to the controller from userspace.
+ * The command block (io) has already been copied to kernel space for us,
+ * however, any elements in the sglist need to be copied to kernel space
+ * or copied back to userspace.
+ *
+ * Only root may perform a controller passthru command, however I'm not doing
+ * any serious sanity checking on the arguments.  Doing an IDA_WRITE_MEDIA and
+ * putting a 64M buffer in the sglist is probably a *bad* idea.
+ */
+static int ida_ctlr_ioctl(ctlr_info_t *h, int dsk, ida_ioctl_t *io)
+{
+	int ctlr = h->ctlr;
+	cmdlist_t *c;
+	void *p = NULL;
+	unsigned long flags;
+	int error;
+
+	if ((c = cmd_alloc(h, 0)) == NULL)
+		return -ENOMEM;
+	c->ctlr = ctlr;
+	c->hdr.unit = (io->unit & UNITVALID) ? (io->unit & ~UNITVALID) : dsk;
+	c->hdr.size = sizeof(rblk_t) >> 2;
+	c->size += sizeof(rblk_t);
+
+	c->req.hdr.cmd = io->cmd;
+	c->req.hdr.blk = io->blk;
+	c->req.hdr.blk_cnt = io->blk_cnt;
+	c->type = CMD_IOCTL_PEND;
+
+	/* Pre submit processing */
+	switch(io->cmd) {
+	case PASSTHRU_A:
+		p = memdup_user(io->sg[0].addr, io->sg[0].size);
+		if (IS_ERR(p)) {
+			error = PTR_ERR(p);
+			cmd_free(h, c, 0);
+			return error;
+		}
+		c->req.hdr.blk = pci_map_single(h->pci_dev, &(io->c), 
+				sizeof(ida_ioctl_t), 
+				PCI_DMA_BIDIRECTIONAL);
+		c->req.sg[0].size = io->sg[0].size;
+		c->req.sg[0].addr = pci_map_single(h->pci_dev, p, 
+			c->req.sg[0].size, PCI_DMA_BIDIRECTIONAL);
+		c->req.hdr.sg_cnt = 1;
+		break;
+	case IDA_READ:
+	case READ_FLASH_ROM:
+	case SENSE_CONTROLLER_PERFORMANCE:
+		p = kmalloc(io->sg[0].size, GFP_KERNEL);
+		if (!p) 
+		{ 
+                        error = -ENOMEM; 
+                        cmd_free(h, c, 0);
+                        return(error);
+                }
+
+		c->req.sg[0].size = io->sg[0].size;
+		c->req.sg[0].addr = pci_map_single(h->pci_dev, p, 
+			c->req.sg[0].size, PCI_DMA_BIDIRECTIONAL); 
+		c->req.hdr.sg_cnt = 1;
+		break;
+	case IDA_WRITE:
+	case IDA_WRITE_MEDIA:
+	case DIAG_PASS_THRU:
+	case COLLECT_BUFFER:
+	case WRITE_FLASH_ROM:
+		p = memdup_user(io->sg[0].addr, io->sg[0].size);
+		if (IS_ERR(p)) {
+			error = PTR_ERR(p);
+			cmd_free(h, c, 0);
+			return error;
+                }
+		c->req.sg[0].size = io->sg[0].size;
+		c->req.sg[0].addr = pci_map_single(h->pci_dev, p, 
+			c->req.sg[0].size, PCI_DMA_BIDIRECTIONAL); 
+		c->req.hdr.sg_cnt = 1;
+		break;
+	default:
+		c->req.sg[0].size = sizeof(io->c);
+		c->req.sg[0].addr = pci_map_single(h->pci_dev,&io->c, 
+			c->req.sg[0].size, PCI_DMA_BIDIRECTIONAL);
+		c->req.hdr.sg_cnt = 1;
+	}
+	
+	/* Put the request on the tail of the request queue */
+	spin_lock_irqsave(IDA_LOCK(ctlr), flags);
+	addQ(&h->reqQ, c);
+	h->Qdepth++;
+	start_io(h);
+	spin_unlock_irqrestore(IDA_LOCK(ctlr), flags);
+
+	/* Wait for completion */
+	while(c->type != CMD_IOCTL_DONE)
+		schedule();
+
+	/* Unmap the DMA  */
+	pci_unmap_single(h->pci_dev, c->req.sg[0].addr, c->req.sg[0].size, 
+		PCI_DMA_BIDIRECTIONAL);
+	/* Post submit processing */
+	switch(io->cmd) {
+	case PASSTHRU_A:
+		pci_unmap_single(h->pci_dev, c->req.hdr.blk,
+                                sizeof(ida_ioctl_t),
+                                PCI_DMA_BIDIRECTIONAL);
+	case IDA_READ:
+	case DIAG_PASS_THRU:
+	case SENSE_CONTROLLER_PERFORMANCE:
+	case READ_FLASH_ROM:
+		if (copy_to_user(io->sg[0].addr, p, io->sg[0].size)) {
+			kfree(p);
+			return -EFAULT;
+		}
+		/* fall through and free p */
+	case IDA_WRITE:
+	case IDA_WRITE_MEDIA:
+	case COLLECT_BUFFER:
+	case WRITE_FLASH_ROM:
+		kfree(p);
+		break;
+	default:;
+		/* Nothing to do */
+	}
+
+	io->rcode = c->req.hdr.rcode;
+	cmd_free(h, c, 0);
+	return(0);
+}
+
+/*
+ * Commands are pre-allocated in a large block.  Here we use a simple bitmap
+ * scheme to suballocte them to the driver.  Operations that are not time
+ * critical (and can wait for kmalloc and possibly sleep) can pass in NULL
+ * as the first argument to get a new command.
+ */
+static cmdlist_t * cmd_alloc(ctlr_info_t *h, int get_from_pool)
+{
+	cmdlist_t * c;
+	int i;
+	dma_addr_t cmd_dhandle;
+
+	if (!get_from_pool) {
+		c = (cmdlist_t*)pci_alloc_consistent(h->pci_dev, 
+			sizeof(cmdlist_t), &cmd_dhandle);
+		if(c==NULL)
+			return NULL;
+	} else {
+		do {
+			i = find_first_zero_bit(h->cmd_pool_bits, NR_CMDS);
+			if (i == NR_CMDS)
+				return NULL;
+		} while(test_and_set_bit(i&(BITS_PER_LONG-1), h->cmd_pool_bits+(i/BITS_PER_LONG)) != 0);
+		c = h->cmd_pool + i;
+		cmd_dhandle = h->cmd_pool_dhandle + i*sizeof(cmdlist_t);
+		h->nr_allocs++;
+	}
+
+	memset(c, 0, sizeof(cmdlist_t));
+	c->busaddr = cmd_dhandle; 
+	return c;
+}
+
+static void cmd_free(ctlr_info_t *h, cmdlist_t *c, int got_from_pool)
+{
+	int i;
+
+	if (!got_from_pool) {
+		pci_free_consistent(h->pci_dev, sizeof(cmdlist_t), c,
+			c->busaddr);
+	} else {
+		i = c - h->cmd_pool;
+		clear_bit(i&(BITS_PER_LONG-1), h->cmd_pool_bits+(i/BITS_PER_LONG));
+		h->nr_frees++;
+	}
+}
+
+/***********************************************************************
+    name:        sendcmd
+    Send a command to an IDA using the memory mapped FIFO interface
+    and wait for it to complete.  
+    This routine should only be called at init time.
+***********************************************************************/
+static int sendcmd(
+	__u8	cmd,
+	int	ctlr,
+	void	*buff,
+	size_t	size,
+	unsigned int blk,
+	unsigned int blkcnt,
+	unsigned int log_unit )
+{
+	cmdlist_t *c;
+	int complete;
+	unsigned long temp;
+	unsigned long i;
+	ctlr_info_t *info_p = hba[ctlr];
+
+	c = cmd_alloc(info_p, 1);
+	if(!c)
+		return IO_ERROR;
+	c->ctlr = ctlr;
+	c->hdr.unit = log_unit;
+	c->hdr.prio = 0;
+	c->hdr.size = sizeof(rblk_t) >> 2;
+	c->size += sizeof(rblk_t);
+
+	/* The request information. */
+	c->req.hdr.next = 0;
+	c->req.hdr.rcode = 0;
+	c->req.bp = 0;
+	c->req.hdr.sg_cnt = 1;
+	c->req.hdr.reserved = 0;
+	
+	if (size == 0)
+		c->req.sg[0].size = 512;
+	else
+		c->req.sg[0].size = size;
+
+	c->req.hdr.blk = blk;
+	c->req.hdr.blk_cnt = blkcnt;
+	c->req.hdr.cmd = (unsigned char) cmd;
+	c->req.sg[0].addr = (__u32) pci_map_single(info_p->pci_dev, 
+		buff, c->req.sg[0].size, PCI_DMA_BIDIRECTIONAL);
+	/*
+	 * Disable interrupt
+	 */
+	info_p->access.set_intr_mask(info_p, 0);
+	/* Make sure there is room in the command FIFO */
+	/* Actually it should be completely empty at this time. */
+	for (i = 200000; i > 0; i--) {
+		temp = info_p->access.fifo_full(info_p);
+		if (temp != 0) {
+			break;
+		}
+		udelay(10);
+DBG(
+		printk(KERN_WARNING "cpqarray ida%d: idaSendPciCmd FIFO full,"
+			" waiting!\n", ctlr);
+);
+	} 
+	/*
+	 * Send the cmd
+	 */
+	info_p->access.submit_command(info_p, c);
+	complete = pollcomplete(ctlr);
+	
+	pci_unmap_single(info_p->pci_dev, (dma_addr_t) c->req.sg[0].addr, 
+		c->req.sg[0].size, PCI_DMA_BIDIRECTIONAL);
+	if (complete != 1) {
+		if (complete != c->busaddr) {
+			printk( KERN_WARNING
+			"cpqarray ida%d: idaSendPciCmd "
+		      "Invalid command list address returned! (%08lx)\n",
+				ctlr, (unsigned long)complete);
+			cmd_free(info_p, c, 1);
+			return (IO_ERROR);
+		}
+	} else {
+		printk( KERN_WARNING
+			"cpqarray ida%d: idaSendPciCmd Timeout out, "
+			"No command list address returned!\n",
+			ctlr);
+		cmd_free(info_p, c, 1);
+		return (IO_ERROR);
+	}
+
+	if (c->req.hdr.rcode & 0x00FE) {
+		if (!(c->req.hdr.rcode & BIG_PROBLEM)) {
+			printk( KERN_WARNING
+			"cpqarray ida%d: idaSendPciCmd, error: "
+				"Controller failed at init time "
+				"cmd: 0x%x, return code = 0x%x\n",
+				ctlr, c->req.hdr.cmd, c->req.hdr.rcode);
+
+			cmd_free(info_p, c, 1);
+			return (IO_ERROR);
+		}
+	}
+	cmd_free(info_p, c, 1);
+	return (IO_OK);
+}
+
+/*
+ * revalidate_allvol is for online array config utilities.  After a
+ * utility reconfigures the drives in the array, it can use this function
+ * (through an ioctl) to make the driver zap any previous disk structs for
+ * that controller and get new ones.
+ *
+ * Right now I'm using the getgeometry() function to do this, but this
+ * function should probably be finer grained and allow you to revalidate one
+ * particualar logical volume (instead of all of them on a particular
+ * controller).
+ */
+static int revalidate_allvol(ctlr_info_t *host)
+{
+	int ctlr = host->ctlr;
+	int i;
+	unsigned long flags;
+
+	spin_lock_irqsave(IDA_LOCK(ctlr), flags);
+	if (host->usage_count > 1) {
+		spin_unlock_irqrestore(IDA_LOCK(ctlr), flags);
+		printk(KERN_WARNING "cpqarray: Device busy for volume"
+			" revalidation (usage=%d)\n", host->usage_count);
+		return -EBUSY;
+	}
+	host->usage_count++;
+	spin_unlock_irqrestore(IDA_LOCK(ctlr), flags);
+
+	/*
+	 * Set the partition and block size structures for all volumes
+	 * on this controller to zero.  We will reread all of this data
+	 */
+	set_capacity(ida_gendisk[ctlr][0], 0);
+	for (i = 1; i < NWD; i++) {
+		struct gendisk *disk = ida_gendisk[ctlr][i];
+		if (disk->flags & GENHD_FL_UP)
+			del_gendisk(disk);
+	}
+	memset(host->drv, 0, sizeof(drv_info_t)*NWD);
+
+	/*
+	 * Tell the array controller not to give us any interrupts while
+	 * we check the new geometry.  Then turn interrupts back on when
+	 * we're done.
+	 */
+	host->access.set_intr_mask(host, 0);
+	getgeometry(ctlr);
+	host->access.set_intr_mask(host, FIFO_NOT_EMPTY);
+
+	for(i=0; i<NWD; i++) {
+		struct gendisk *disk = ida_gendisk[ctlr][i];
+		drv_info_t *drv = &host->drv[i];
+		if (i && !drv->nr_blks)
+			continue;
+		blk_queue_logical_block_size(host->queue, drv->blk_size);
+		set_capacity(disk, drv->nr_blks);
+		disk->queue = host->queue;
+		disk->private_data = drv;
+		if (i)
+			add_disk(disk);
+	}
+
+	host->usage_count--;
+	return 0;
+}
+
+static int ida_revalidate(struct gendisk *disk)
+{
+	drv_info_t *drv = disk->private_data;
+	set_capacity(disk, drv->nr_blks);
+	return 0;
+}
+
+/********************************************************************
+    name: pollcomplete
+    Wait polling for a command to complete.
+    The memory mapped FIFO is polled for the completion.
+    Used only at init time, interrupts disabled.
+ ********************************************************************/
+static int pollcomplete(int ctlr)
+{
+	int done;
+	int i;
+
+	/* Wait (up to 2 seconds) for a command to complete */
+
+	for (i = 200000; i > 0; i--) {
+		done = hba[ctlr]->access.command_completed(hba[ctlr]);
+		if (done == 0) {
+			udelay(10);	/* a short fixed delay */
+		} else
+			return (done);
+	}
+	/* Invalid address to tell caller we ran out of time */
+	return 1;
+}
+/*****************************************************************
+    start_fwbk
+    Starts controller firmwares background processing. 
+    Currently only the Integrated Raid controller needs this done.
+    If the PCI mem address registers are written to after this, 
+	 data corruption may occur
+*****************************************************************/
+static void start_fwbk(int ctlr)
+{
+		id_ctlr_t *id_ctlr_buf; 
+	int ret_code;
+
+	if(	(hba[ctlr]->board_id != 0x40400E11)
+		&& (hba[ctlr]->board_id != 0x40480E11) )
+
+	/* Not a Integrated Raid, so there is nothing for us to do */
+		return;
+	printk(KERN_DEBUG "cpqarray: Starting firmware's background"
+		" processing\n");
+	/* Command does not return anything, but idasend command needs a 
+		buffer */
+	id_ctlr_buf = kmalloc(sizeof(id_ctlr_t), GFP_KERNEL);
+	if(id_ctlr_buf==NULL)
+	{
+		printk(KERN_WARNING "cpqarray: Out of memory. "
+			"Unable to start background processing.\n");
+		return;
+	}		
+	ret_code = sendcmd(RESUME_BACKGROUND_ACTIVITY, ctlr, 
+		id_ctlr_buf, 0, 0, 0, 0);
+	if(ret_code != IO_OK)
+		printk(KERN_WARNING "cpqarray: Unable to start"
+			" background processing\n");
+
+	kfree(id_ctlr_buf);
+}
+/*****************************************************************
+    getgeometry
+    Get ida logical volume geometry from the controller 
+    This is a large bit of code which once existed in two flavors,
+    It is used only at init time.
+*****************************************************************/
+static void getgeometry(int ctlr)
+{				
+	id_log_drv_t *id_ldrive;
+	id_ctlr_t *id_ctlr_buf;
+	sense_log_drv_stat_t *id_lstatus_buf;
+	config_t *sense_config_buf;
+	unsigned int log_unit, log_index;
+	int ret_code, size;
+	drv_info_t *drv;
+	ctlr_info_t *info_p = hba[ctlr];
+	int i;
+
+	info_p->log_drv_map = 0;	
+	
+	id_ldrive = kzalloc(sizeof(id_log_drv_t), GFP_KERNEL);
+	if (!id_ldrive)	{
+		printk( KERN_ERR "cpqarray:  out of memory.\n");
+		goto err_0;
+	}
+
+	id_ctlr_buf = kzalloc(sizeof(id_ctlr_t), GFP_KERNEL);
+	if (!id_ctlr_buf) {
+		printk( KERN_ERR "cpqarray:  out of memory.\n");
+		goto err_1;
+	}
+
+	id_lstatus_buf = kzalloc(sizeof(sense_log_drv_stat_t), GFP_KERNEL);
+	if (!id_lstatus_buf) {
+		printk( KERN_ERR "cpqarray:  out of memory.\n");
+		goto err_2;
+	}
+
+	sense_config_buf = kzalloc(sizeof(config_t), GFP_KERNEL);
+	if (!sense_config_buf) {
+		printk( KERN_ERR "cpqarray:  out of memory.\n");
+		goto err_3;
+	}
+
+	info_p->phys_drives = 0;
+	info_p->log_drv_map = 0;
+	info_p->drv_assign_map = 0;
+	info_p->drv_spare_map = 0;
+	info_p->mp_failed_drv_map = 0;	/* only initialized here */
+	/* Get controllers info for this logical drive */
+	ret_code = sendcmd(ID_CTLR, ctlr, id_ctlr_buf, 0, 0, 0, 0);
+	if (ret_code == IO_ERROR) {
+		/*
+		 * If can't get controller info, set the logical drive map to 0,
+		 * so the idastubopen will fail on all logical drives
+		 * on the controller.
+		 */
+		printk(KERN_ERR "cpqarray: error sending ID controller\n");
+                goto err_4;
+        }
+
+	info_p->log_drives = id_ctlr_buf->nr_drvs;
+	for(i=0;i<4;i++)
+		info_p->firm_rev[i] = id_ctlr_buf->firm_rev[i];
+	info_p->ctlr_sig = id_ctlr_buf->cfg_sig;
+
+	printk(" (%s)\n", info_p->product_name);
+	/*
+	 * Initialize logical drive map to zero
+	 */
+	log_index = 0;
+	/*
+	 * Get drive geometry for all logical drives
+	 */
+	if (id_ctlr_buf->nr_drvs > 16)
+		printk(KERN_WARNING "cpqarray ida%d:  This driver supports "
+			"16 logical drives per controller.\n.  "
+			" Additional drives will not be "
+			"detected\n", ctlr);
+
+	for (log_unit = 0;
+	     (log_index < id_ctlr_buf->nr_drvs)
+	     && (log_unit < NWD);
+	     log_unit++) {
+		size = sizeof(sense_log_drv_stat_t);
+
+		/*
+		   Send "Identify logical drive status" cmd
+		 */
+		ret_code = sendcmd(SENSE_LOG_DRV_STAT,
+			     ctlr, id_lstatus_buf, size, 0, 0, log_unit);
+		if (ret_code == IO_ERROR) {
+			/*
+			   If can't get logical drive status, set
+			   the logical drive map to 0, so the
+			   idastubopen will fail for all logical drives
+			   on the controller. 
+			 */
+			info_p->log_drv_map = 0;	
+			printk( KERN_WARNING
+			     "cpqarray ida%d: idaGetGeometry - Controller"
+				" failed to report status of logical drive %d\n"
+			 "Access to this controller has been disabled\n",
+				ctlr, log_unit);
+                	goto err_4;
+		}
+		/*
+		   Make sure the logical drive is configured
+		 */
+		if (id_lstatus_buf->status != LOG_NOT_CONF) {
+			ret_code = sendcmd(ID_LOG_DRV, ctlr, id_ldrive,
+			       sizeof(id_log_drv_t), 0, 0, log_unit);
+			/*
+			   If error, the bit for this
+			   logical drive won't be set and
+			   idastubopen will return error. 
+			 */
+			if (ret_code != IO_ERROR) {
+				drv = &info_p->drv[log_unit];
+				drv->blk_size = id_ldrive->blk_size;
+				drv->nr_blks = id_ldrive->nr_blks;
+				drv->cylinders = id_ldrive->drv.cyl;
+				drv->heads = id_ldrive->drv.heads;
+				drv->sectors = id_ldrive->drv.sect_per_track;
+				info_p->log_drv_map |=	(1 << log_unit);
+
+	printk(KERN_INFO "cpqarray ida/c%dd%d: blksz=%d nr_blks=%d\n",
+		ctlr, log_unit, drv->blk_size, drv->nr_blks);
+				ret_code = sendcmd(SENSE_CONFIG,
+						  ctlr, sense_config_buf,
+				 sizeof(config_t), 0, 0, log_unit);
+				if (ret_code == IO_ERROR) {
+					info_p->log_drv_map = 0;
+                			printk(KERN_ERR "cpqarray: error sending sense config\n");
+                			goto err_4;
+				}
+
+				info_p->phys_drives =
+				    sense_config_buf->ctlr_phys_drv;
+				info_p->drv_assign_map
+				    |= sense_config_buf->drv_asgn_map;
+				info_p->drv_assign_map
+				    |= sense_config_buf->spare_asgn_map;
+				info_p->drv_spare_map
+				    |= sense_config_buf->spare_asgn_map;
+			}	/* end of if no error on id_ldrive */
+			log_index = log_index + 1;
+		}		/* end of if logical drive configured */
+	}			/* end of for log_unit */
+
+	/* Free all the buffers and return */
+err_4:
+	kfree(sense_config_buf);
+err_3:
+  	kfree(id_lstatus_buf);
+err_2:
+	kfree(id_ctlr_buf);
+err_1:
+  	kfree(id_ldrive);
+err_0:
+	return;
+}
+
+static void __exit cpqarray_exit(void)
+{
+	int i;
+
+	pci_unregister_driver(&cpqarray_pci_driver);
+
+	/* Double check that all controller entries have been removed */
+	for(i=0; i<MAX_CTLR; i++) {
+		if (hba[i] != NULL) {
+			printk(KERN_WARNING "cpqarray: Removing EISA "
+					"controller %d\n", i);
+			cpqarray_remove_one_eisa(i);
+		}
+	}
+
+	remove_proc_entry("driver/cpqarray", NULL);
+}
+
+module_init(cpqarray_init)
+module_exit(cpqarray_exit)
diff --git a/drivers/block/cpqarray.h b/drivers/block/cpqarray.h
new file mode 100644
index 00000000..be73e9d5
--- /dev/null
+++ b/drivers/block/cpqarray.h
@@ -0,0 +1,126 @@
+/*
+ *    Disk Array driver for Compaq SMART2 Controllers
+ *    Copyright 1998 Compaq Computer Corporation
+ *
+ *    This program is free software; you can redistribute it and/or modify
+ *    it under the terms of the GNU General Public License as published by
+ *    the Free Software Foundation; either version 2 of the License, or
+ *    (at your option) any later version.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *    NON INFRINGEMENT.  See the GNU General Public License for more details.
+ *
+ *    You should have received a copy of the GNU General Public License
+ *    along with this program; if not, write to the Free Software
+ *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ *    Questions/Comments/Bugfixes to iss_storagedev@hp.com
+ *
+ *    If you want to make changes, improve or add functionality to this
+ *    driver, you'll probably need the Compaq Array Controller Interface
+ *    Specificiation (Document number ECG086/1198)
+ */
+#ifndef CPQARRAY_H
+#define CPQARRAY_H
+
+#ifdef __KERNEL__
+#include <linux/blkdev.h>
+#include <linux/slab.h>
+#include <linux/proc_fs.h>
+#include <linux/timer.h>
+#endif
+
+#include "ida_cmd.h"
+
+#define IO_OK		0
+#define IO_ERROR	1
+#define NWD		16
+#define NWD_SHIFT	4
+
+#define IDA_TIMER	(5*HZ)
+#define IDA_TIMEOUT	(10*HZ)
+
+#define MISC_NONFATAL_WARN	0x01
+
+typedef struct {
+	unsigned blk_size;
+	unsigned nr_blks;
+	unsigned cylinders;
+	unsigned heads;
+	unsigned sectors;
+	int usage_count;
+} drv_info_t;
+
+#ifdef __KERNEL__
+
+struct ctlr_info;
+typedef struct ctlr_info ctlr_info_t;
+
+struct access_method {
+	void (*submit_command)(ctlr_info_t *h, cmdlist_t *c);
+	void (*set_intr_mask)(ctlr_info_t *h, unsigned long val);
+	unsigned long (*fifo_full)(ctlr_info_t *h);
+	unsigned long (*intr_pending)(ctlr_info_t *h);
+	unsigned long (*command_completed)(ctlr_info_t *h);
+};
+
+struct board_type {
+	__u32	board_id;
+	char	*product_name;
+	struct access_method *access;
+};
+
+struct ctlr_info {
+	int	ctlr;
+	char	devname[8];
+	__u32	log_drv_map;
+	__u32	drv_assign_map;
+	__u32	drv_spare_map;
+	__u32	mp_failed_drv_map;
+
+	char	firm_rev[4];
+	int	ctlr_sig;
+
+	int	log_drives;
+	int	phys_drives;
+
+	struct pci_dev *pci_dev;    /* NULL if EISA */
+	__u32	board_id;
+	char	*product_name;	
+
+	void __iomem *vaddr;
+	unsigned long paddr;
+	unsigned long io_mem_addr;
+	unsigned long io_mem_length;
+	int	intr;
+	int	usage_count;
+	drv_info_t	drv[NWD];
+	struct proc_dir_entry *proc;
+
+	struct access_method access;
+
+	cmdlist_t *reqQ;
+	cmdlist_t *cmpQ;
+	cmdlist_t *cmd_pool;
+	dma_addr_t cmd_pool_dhandle;
+	unsigned long *cmd_pool_bits;
+	struct request_queue *queue;
+	spinlock_t lock;
+
+	unsigned int Qdepth;
+	unsigned int maxQsinceinit;
+
+	unsigned int nr_requests;
+	unsigned int nr_allocs;
+	unsigned int nr_frees;
+	struct timer_list timer;
+	unsigned int misc_tflags;
+};
+
+#define IDA_LOCK(i)	(&hba[i]->lock)
+
+#endif
+
+#endif /* CPQARRAY_H */
diff --git a/drivers/block/cryptoloop.c b/drivers/block/cryptoloop.c
new file mode 100644
index 00000000..8b6bb764
--- /dev/null
+++ b/drivers/block/cryptoloop.c
@@ -0,0 +1,216 @@
+/*
+   Linux loop encryption enabling module
+
+   Copyright (C)  2002 Herbert Valerio Riedel <hvr@gnu.org>
+   Copyright (C)  2003 Fruhwirth Clemens <clemens@endorphin.org>
+
+   This module is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2 of the License, or
+   (at your option) any later version.
+
+   This module is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this module; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include <linux/module.h>
+
+#include <linux/init.h>
+#include <linux/string.h>
+#include <linux/crypto.h>
+#include <linux/blkdev.h>
+#include <linux/loop.h>
+#include <linux/scatterlist.h>
+#include <asm/uaccess.h>
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("loop blockdevice transferfunction adaptor / CryptoAPI");
+MODULE_AUTHOR("Herbert Valerio Riedel <hvr@gnu.org>");
+
+#define LOOP_IV_SECTOR_BITS 9
+#define LOOP_IV_SECTOR_SIZE (1 << LOOP_IV_SECTOR_BITS)
+
+static int
+cryptoloop_init(struct loop_device *lo, const struct loop_info64 *info)
+{
+	int err = -EINVAL;
+	int cipher_len;
+	int mode_len;
+	char cms[LO_NAME_SIZE];			/* cipher-mode string */
+	char *cipher;
+	char *mode;
+	char *cmsp = cms;			/* c-m string pointer */
+	struct crypto_blkcipher *tfm;
+
+	/* encryption breaks for non sector aligned offsets */
+
+	if (info->lo_offset % LOOP_IV_SECTOR_SIZE)
+		goto out;
+
+	strncpy(cms, info->lo_crypt_name, LO_NAME_SIZE);
+	cms[LO_NAME_SIZE - 1] = 0;
+
+	cipher = cmsp;
+	cipher_len = strcspn(cmsp, "-");
+
+	mode = cmsp + cipher_len;
+	mode_len = 0;
+	if (*mode) {
+		mode++;
+		mode_len = strcspn(mode, "-");
+	}
+
+	if (!mode_len) {
+		mode = "cbc";
+		mode_len = 3;
+	}
+
+	if (cipher_len + mode_len + 3 > LO_NAME_SIZE)
+		return -EINVAL;
+
+	memmove(cms, mode, mode_len);
+	cmsp = cms + mode_len;
+	*cmsp++ = '(';
+	memcpy(cmsp, info->lo_crypt_name, cipher_len);
+	cmsp += cipher_len;
+	*cmsp++ = ')';
+	*cmsp = 0;
+
+	tfm = crypto_alloc_blkcipher(cms, 0, CRYPTO_ALG_ASYNC);
+	if (IS_ERR(tfm))
+		return PTR_ERR(tfm);
+
+	err = crypto_blkcipher_setkey(tfm, info->lo_encrypt_key,
+				      info->lo_encrypt_key_size);
+	
+	if (err != 0)
+		goto out_free_tfm;
+
+	lo->key_data = tfm;
+	return 0;
+
+ out_free_tfm:
+	crypto_free_blkcipher(tfm);
+
+ out:
+	return err;
+}
+
+
+typedef int (*encdec_cbc_t)(struct blkcipher_desc *desc,
+			struct scatterlist *sg_out,
+			struct scatterlist *sg_in,
+			unsigned int nsg);
+
+static int
+cryptoloop_transfer(struct loop_device *lo, int cmd,
+		    struct page *raw_page, unsigned raw_off,
+		    struct page *loop_page, unsigned loop_off,
+		    int size, sector_t IV)
+{
+	struct crypto_blkcipher *tfm = lo->key_data;
+	struct blkcipher_desc desc = {
+		.tfm = tfm,
+		.flags = CRYPTO_TFM_REQ_MAY_SLEEP,
+	};
+	struct scatterlist sg_out;
+	struct scatterlist sg_in;
+
+	encdec_cbc_t encdecfunc;
+	struct page *in_page, *out_page;
+	unsigned in_offs, out_offs;
+	int err;
+
+	sg_init_table(&sg_out, 1);
+	sg_init_table(&sg_in, 1);
+
+	if (cmd == READ) {
+		in_page = raw_page;
+		in_offs = raw_off;
+		out_page = loop_page;
+		out_offs = loop_off;
+		encdecfunc = crypto_blkcipher_crt(tfm)->decrypt;
+	} else {
+		in_page = loop_page;
+		in_offs = loop_off;
+		out_page = raw_page;
+		out_offs = raw_off;
+		encdecfunc = crypto_blkcipher_crt(tfm)->encrypt;
+	}
+
+	while (size > 0) {
+		const int sz = min(size, LOOP_IV_SECTOR_SIZE);
+		u32 iv[4] = { 0, };
+		iv[0] = cpu_to_le32(IV & 0xffffffff);
+
+		sg_set_page(&sg_in, in_page, sz, in_offs);
+		sg_set_page(&sg_out, out_page, sz, out_offs);
+
+		desc.info = iv;
+		err = encdecfunc(&desc, &sg_out, &sg_in, sz);
+		if (err)
+			return err;
+
+		IV++;
+		size -= sz;
+		in_offs += sz;
+		out_offs += sz;
+	}
+
+	return 0;
+}
+
+static int
+cryptoloop_ioctl(struct loop_device *lo, int cmd, unsigned long arg)
+{
+	return -EINVAL;
+}
+
+static int
+cryptoloop_release(struct loop_device *lo)
+{
+	struct crypto_blkcipher *tfm = lo->key_data;
+	if (tfm != NULL) {
+		crypto_free_blkcipher(tfm);
+		lo->key_data = NULL;
+		return 0;
+	}
+	printk(KERN_ERR "cryptoloop_release(): tfm == NULL?\n");
+	return -EINVAL;
+}
+
+static struct loop_func_table cryptoloop_funcs = {
+	.number = LO_CRYPT_CRYPTOAPI,
+	.init = cryptoloop_init,
+	.ioctl = cryptoloop_ioctl,
+	.transfer = cryptoloop_transfer,
+	.release = cryptoloop_release,
+	.owner = THIS_MODULE
+};
+
+static int __init
+init_cryptoloop(void)
+{
+	int rc = loop_register_transfer(&cryptoloop_funcs);
+
+	if (rc)
+		printk(KERN_ERR "cryptoloop: loop_register_transfer failed\n");
+	return rc;
+}
+
+static void __exit
+cleanup_cryptoloop(void)
+{
+	if (loop_unregister_transfer(LO_CRYPT_CRYPTOAPI))
+		printk(KERN_ERR
+			"cryptoloop: loop_unregister_transfer failed\n");
+}
+
+module_init(init_cryptoloop);
+module_exit(cleanup_cryptoloop);
diff --git a/drivers/block/drbd/Kconfig b/drivers/block/drbd/Kconfig
new file mode 100644
index 00000000..df098378
--- /dev/null
+++ b/drivers/block/drbd/Kconfig
@@ -0,0 +1,71 @@
+#
+# DRBD device driver configuration
+#
+
+comment "DRBD disabled because PROC_FS, INET or CONNECTOR not selected"
+	depends on PROC_FS='n' || INET='n' || CONNECTOR='n'
+
+config BLK_DEV_DRBD
+	tristate "DRBD Distributed Replicated Block Device support"
+	depends on PROC_FS && INET && CONNECTOR
+	select LRU_CACHE
+	default n
+	help
+
+	  NOTE: In order to authenticate connections you have to select
+	  CRYPTO_HMAC and a hash function as well.
+
+	  DRBD is a shared-nothing, synchronously replicated block device. It
+	  is designed to serve as a building block for high availability
+	  clusters and in this context, is a "drop-in" replacement for shared
+	  storage. Simplistically, you could see it as a network RAID 1.
+
+	  Each minor device has a role, which can be 'primary' or 'secondary'.
+	  On the node with the primary device the application is supposed to
+	  run and to access the device (/dev/drbdX). Every write is sent to
+	  the local 'lower level block device' and, across the network, to the
+	  node with the device in 'secondary' state.  The secondary device
+	  simply writes the data to its lower level block device.
+
+	  DRBD can also be used in dual-Primary mode (device writable on both
+	  nodes), which means it can exhibit shared disk semantics in a
+	  shared-nothing cluster.  Needless to say, on top of dual-Primary
+	  DRBD utilizing a cluster file system is necessary to maintain for
+	  cache coherency.
+
+	  For automatic failover you need a cluster manager (e.g. heartbeat).
+	  See also: http://www.drbd.org/, http://www.linux-ha.org
+
+	  If unsure, say N.
+
+config DRBD_FAULT_INJECTION
+	bool "DRBD fault injection"
+	depends on BLK_DEV_DRBD
+	help
+
+	  Say Y here if you want to simulate IO errors, in order to test DRBD's
+	  behavior.
+
+	  The actual simulation of IO errors is done by writing 3 values to
+	  /sys/module/drbd/parameters/
+
+	  enable_faults: bitmask of...
+	  1	meta data write
+	  2               read
+	  4	resync data write
+	  8	            read
+	  16	data write
+	  32	data read
+	  64	read ahead
+	  128	kmalloc of bitmap
+	  256	allocation of EE (epoch_entries)
+
+	  fault_devs: bitmask of minor numbers
+	  fault_rate: frequency in percent
+
+	  Example: Simulate data write errors on /dev/drbd0 with a probability of 5%.
+		echo 16 > /sys/module/drbd/parameters/enable_faults
+		echo 1 > /sys/module/drbd/parameters/fault_devs
+		echo 5 > /sys/module/drbd/parameters/fault_rate
+
+	  If unsure, say N.
diff --git a/drivers/block/drbd/Makefile b/drivers/block/drbd/Makefile
new file mode 100644
index 00000000..0d3f337f
--- /dev/null
+++ b/drivers/block/drbd/Makefile
@@ -0,0 +1,5 @@
+drbd-y := drbd_bitmap.o drbd_proc.o
+drbd-y += drbd_worker.o drbd_receiver.o drbd_req.o drbd_actlog.o
+drbd-y += drbd_main.o drbd_strings.o drbd_nl.o
+
+obj-$(CONFIG_BLK_DEV_DRBD)     += drbd.o
diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c
new file mode 100644
index 00000000..cf0e63dd
--- /dev/null
+++ b/drivers/block/drbd/drbd_actlog.c
@@ -0,0 +1,1261 @@
+/*
+   drbd_actlog.c
+
+   This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
+
+   Copyright (C) 2003-2008, LINBIT Information Technologies GmbH.
+   Copyright (C) 2003-2008, Philipp Reisner <philipp.reisner@linbit.com>.
+   Copyright (C) 2003-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
+
+   drbd is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2, or (at your option)
+   any later version.
+
+   drbd is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with drbd; see the file COPYING.  If not, write to
+   the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ */
+
+#include <linux/slab.h>
+#include <linux/drbd.h>
+#include "drbd_int.h"
+#include "drbd_wrappers.h"
+
+/* We maintain a trivial checksum in our on disk activity log.
+ * With that we can ensure correct operation even when the storage
+ * device might do a partial (last) sector write while losing power.
+ */
+struct __packed al_transaction {
+	u32       magic;
+	u32       tr_number;
+	struct __packed {
+		u32 pos;
+		u32 extent; } updates[1 + AL_EXTENTS_PT];
+	u32       xor_sum;
+};
+
+struct update_odbm_work {
+	struct drbd_work w;
+	unsigned int enr;
+};
+
+struct update_al_work {
+	struct drbd_work w;
+	struct lc_element *al_ext;
+	struct completion event;
+	unsigned int enr;
+	/* if old_enr != LC_FREE, write corresponding bitmap sector, too */
+	unsigned int old_enr;
+};
+
+struct drbd_atodb_wait {
+	atomic_t           count;
+	struct completion  io_done;
+	struct drbd_conf   *mdev;
+	int                error;
+};
+
+
+int w_al_write_transaction(struct drbd_conf *, struct drbd_work *, int);
+
+static int _drbd_md_sync_page_io(struct drbd_conf *mdev,
+				 struct drbd_backing_dev *bdev,
+				 struct page *page, sector_t sector,
+				 int rw, int size)
+{
+	struct bio *bio;
+	struct drbd_md_io md_io;
+	int ok;
+
+	md_io.mdev = mdev;
+	init_completion(&md_io.event);
+	md_io.error = 0;
+
+	if ((rw & WRITE) && !test_bit(MD_NO_FUA, &mdev->flags))
+		rw |= REQ_FUA | REQ_FLUSH;
+	rw |= REQ_SYNC;
+
+	bio = bio_alloc(GFP_NOIO, 1);
+	bio->bi_bdev = bdev->md_bdev;
+	bio->bi_sector = sector;
+	ok = (bio_add_page(bio, page, size, 0) == size);
+	if (!ok)
+		goto out;
+	bio->bi_private = &md_io;
+	bio->bi_end_io = drbd_md_io_complete;
+	bio->bi_rw = rw;
+
+	if (drbd_insert_fault(mdev, (rw & WRITE) ? DRBD_FAULT_MD_WR : DRBD_FAULT_MD_RD))
+		bio_endio(bio, -EIO);
+	else
+		submit_bio(rw, bio);
+	wait_for_completion(&md_io.event);
+	ok = bio_flagged(bio, BIO_UPTODATE) && md_io.error == 0;
+
+ out:
+	bio_put(bio);
+	return ok;
+}
+
+int drbd_md_sync_page_io(struct drbd_conf *mdev, struct drbd_backing_dev *bdev,
+			 sector_t sector, int rw)
+{
+	int logical_block_size, mask, ok;
+	int offset = 0;
+	struct page *iop = mdev->md_io_page;
+
+	D_ASSERT(mutex_is_locked(&mdev->md_io_mutex));
+
+	BUG_ON(!bdev->md_bdev);
+
+	logical_block_size = bdev_logical_block_size(bdev->md_bdev);
+	if (logical_block_size == 0)
+		logical_block_size = MD_SECTOR_SIZE;
+
+	/* in case logical_block_size != 512 [ s390 only? ] */
+	if (logical_block_size != MD_SECTOR_SIZE) {
+		mask = (logical_block_size / MD_SECTOR_SIZE) - 1;
+		D_ASSERT(mask == 1 || mask == 3 || mask == 7);
+		D_ASSERT(logical_block_size == (mask+1) * MD_SECTOR_SIZE);
+		offset = sector & mask;
+		sector = sector & ~mask;
+		iop = mdev->md_io_tmpp;
+
+		if (rw & WRITE) {
+			/* these are GFP_KERNEL pages, pre-allocated
+			 * on device initialization */
+			void *p = page_address(mdev->md_io_page);
+			void *hp = page_address(mdev->md_io_tmpp);
+
+			ok = _drbd_md_sync_page_io(mdev, bdev, iop, sector,
+					READ, logical_block_size);
+
+			if (unlikely(!ok)) {
+				dev_err(DEV, "drbd_md_sync_page_io(,%llus,"
+				    "READ [logical_block_size!=512]) failed!\n",
+				    (unsigned long long)sector);
+				return 0;
+			}
+
+			memcpy(hp + offset*MD_SECTOR_SIZE, p, MD_SECTOR_SIZE);
+		}
+	}
+
+	if (sector < drbd_md_first_sector(bdev) ||
+	    sector > drbd_md_last_sector(bdev))
+		dev_alert(DEV, "%s [%d]:%s(,%llus,%s) out of range md access!\n",
+		     current->comm, current->pid, __func__,
+		     (unsigned long long)sector, (rw & WRITE) ? "WRITE" : "READ");
+
+	ok = _drbd_md_sync_page_io(mdev, bdev, iop, sector, rw, logical_block_size);
+	if (unlikely(!ok)) {
+		dev_err(DEV, "drbd_md_sync_page_io(,%llus,%s) failed!\n",
+		    (unsigned long long)sector, (rw & WRITE) ? "WRITE" : "READ");
+		return 0;
+	}
+
+	if (logical_block_size != MD_SECTOR_SIZE && !(rw & WRITE)) {
+		void *p = page_address(mdev->md_io_page);
+		void *hp = page_address(mdev->md_io_tmpp);
+
+		memcpy(p, hp + offset*MD_SECTOR_SIZE, MD_SECTOR_SIZE);
+	}
+
+	return ok;
+}
+
+static struct lc_element *_al_get(struct drbd_conf *mdev, unsigned int enr)
+{
+	struct lc_element *al_ext;
+	struct lc_element *tmp;
+	unsigned long     al_flags = 0;
+	int wake;
+
+	spin_lock_irq(&mdev->al_lock);
+	tmp = lc_find(mdev->resync, enr/AL_EXT_PER_BM_SECT);
+	if (unlikely(tmp != NULL)) {
+		struct bm_extent  *bm_ext = lc_entry(tmp, struct bm_extent, lce);
+		if (test_bit(BME_NO_WRITES, &bm_ext->flags)) {
+			wake = !test_and_set_bit(BME_PRIORITY, &bm_ext->flags);
+			spin_unlock_irq(&mdev->al_lock);
+			if (wake)
+				wake_up(&mdev->al_wait);
+			return NULL;
+		}
+	}
+	al_ext   = lc_get(mdev->act_log, enr);
+	al_flags = mdev->act_log->flags;
+	spin_unlock_irq(&mdev->al_lock);
+
+	/*
+	if (!al_ext) {
+		if (al_flags & LC_STARVING)
+			dev_warn(DEV, "Have to wait for LRU element (AL too small?)\n");
+		if (al_flags & LC_DIRTY)
+			dev_warn(DEV, "Ongoing AL update (AL device too slow?)\n");
+	}
+	*/
+
+	return al_ext;
+}
+
+void drbd_al_begin_io(struct drbd_conf *mdev, sector_t sector)
+{
+	unsigned int enr = (sector >> (AL_EXTENT_SHIFT-9));
+	struct lc_element *al_ext;
+	struct update_al_work al_work;
+
+	D_ASSERT(atomic_read(&mdev->local_cnt) > 0);
+
+	wait_event(mdev->al_wait, (al_ext = _al_get(mdev, enr)));
+
+	if (al_ext->lc_number != enr) {
+		/* drbd_al_write_transaction(mdev,al_ext,enr);
+		 * recurses into generic_make_request(), which
+		 * disallows recursion, bios being serialized on the
+		 * current->bio_tail list now.
+		 * we have to delegate updates to the activity log
+		 * to the worker thread. */
+		init_completion(&al_work.event);
+		al_work.al_ext = al_ext;
+		al_work.enr = enr;
+		al_work.old_enr = al_ext->lc_number;
+		al_work.w.cb = w_al_write_transaction;
+		drbd_queue_work_front(&mdev->data.work, &al_work.w);
+		wait_for_completion(&al_work.event);
+
+		mdev->al_writ_cnt++;
+
+		spin_lock_irq(&mdev->al_lock);
+		lc_changed(mdev->act_log, al_ext);
+		spin_unlock_irq(&mdev->al_lock);
+		wake_up(&mdev->al_wait);
+	}
+}
+
+void drbd_al_complete_io(struct drbd_conf *mdev, sector_t sector)
+{
+	unsigned int enr = (sector >> (AL_EXTENT_SHIFT-9));
+	struct lc_element *extent;
+	unsigned long flags;
+
+	spin_lock_irqsave(&mdev->al_lock, flags);
+
+	extent = lc_find(mdev->act_log, enr);
+
+	if (!extent) {
+		spin_unlock_irqrestore(&mdev->al_lock, flags);
+		dev_err(DEV, "al_complete_io() called on inactive extent %u\n", enr);
+		return;
+	}
+
+	if (lc_put(mdev->act_log, extent) == 0)
+		wake_up(&mdev->al_wait);
+
+	spin_unlock_irqrestore(&mdev->al_lock, flags);
+}
+
+#if (PAGE_SHIFT + 3) < (AL_EXTENT_SHIFT - BM_BLOCK_SHIFT)
+/* Currently BM_BLOCK_SHIFT, BM_EXT_SHIFT and AL_EXTENT_SHIFT
+ * are still coupled, or assume too much about their relation.
+ * Code below will not work if this is violated.
+ * Will be cleaned up with some followup patch.
+ */
+# error FIXME
+#endif
+
+static unsigned int al_extent_to_bm_page(unsigned int al_enr)
+{
+	return al_enr >>
+		/* bit to page */
+		((PAGE_SHIFT + 3) -
+		/* al extent number to bit */
+		 (AL_EXTENT_SHIFT - BM_BLOCK_SHIFT));
+}
+
+static unsigned int rs_extent_to_bm_page(unsigned int rs_enr)
+{
+	return rs_enr >>
+		/* bit to page */
+		((PAGE_SHIFT + 3) -
+		/* al extent number to bit */
+		 (BM_EXT_SHIFT - BM_BLOCK_SHIFT));
+}
+
+int
+w_al_write_transaction(struct drbd_conf *mdev, struct drbd_work *w, int unused)
+{
+	struct update_al_work *aw = container_of(w, struct update_al_work, w);
+	struct lc_element *updated = aw->al_ext;
+	const unsigned int new_enr = aw->enr;
+	const unsigned int evicted = aw->old_enr;
+	struct al_transaction *buffer;
+	sector_t sector;
+	int i, n, mx;
+	unsigned int extent_nr;
+	u32 xor_sum = 0;
+
+	if (!get_ldev(mdev)) {
+		dev_err(DEV,
+			"disk is %s, cannot start al transaction (-%d +%d)\n",
+			drbd_disk_str(mdev->state.disk), evicted, new_enr);
+		complete(&((struct update_al_work *)w)->event);
+		return 1;
+	}
+	/* do we have to do a bitmap write, first?
+	 * TODO reduce maximum latency:
+	 * submit both bios, then wait for both,
+	 * instead of doing two synchronous sector writes.
+	 * For now, we must not write the transaction,
+	 * if we cannot write out the bitmap of the evicted extent. */
+	if (mdev->state.conn < C_CONNECTED && evicted != LC_FREE)
+		drbd_bm_write_page(mdev, al_extent_to_bm_page(evicted));
+
+	/* The bitmap write may have failed, causing a state change. */
+	if (mdev->state.disk < D_INCONSISTENT) {
+		dev_err(DEV,
+			"disk is %s, cannot write al transaction (-%d +%d)\n",
+			drbd_disk_str(mdev->state.disk), evicted, new_enr);
+		complete(&((struct update_al_work *)w)->event);
+		put_ldev(mdev);
+		return 1;
+	}
+
+	mutex_lock(&mdev->md_io_mutex); /* protects md_io_buffer, al_tr_cycle, ... */
+	buffer = (struct al_transaction *)page_address(mdev->md_io_page);
+
+	buffer->magic = __constant_cpu_to_be32(DRBD_MAGIC);
+	buffer->tr_number = cpu_to_be32(mdev->al_tr_number);
+
+	n = lc_index_of(mdev->act_log, updated);
+
+	buffer->updates[0].pos = cpu_to_be32(n);
+	buffer->updates[0].extent = cpu_to_be32(new_enr);
+
+	xor_sum ^= new_enr;
+
+	mx = min_t(int, AL_EXTENTS_PT,
+		   mdev->act_log->nr_elements - mdev->al_tr_cycle);
+	for (i = 0; i < mx; i++) {
+		unsigned idx = mdev->al_tr_cycle + i;
+		extent_nr = lc_element_by_index(mdev->act_log, idx)->lc_number;
+		buffer->updates[i+1].pos = cpu_to_be32(idx);
+		buffer->updates[i+1].extent = cpu_to_be32(extent_nr);
+		xor_sum ^= extent_nr;
+	}
+	for (; i < AL_EXTENTS_PT; i++) {
+		buffer->updates[i+1].pos = __constant_cpu_to_be32(-1);
+		buffer->updates[i+1].extent = __constant_cpu_to_be32(LC_FREE);
+		xor_sum ^= LC_FREE;
+	}
+	mdev->al_tr_cycle += AL_EXTENTS_PT;
+	if (mdev->al_tr_cycle >= mdev->act_log->nr_elements)
+		mdev->al_tr_cycle = 0;
+
+	buffer->xor_sum = cpu_to_be32(xor_sum);
+
+	sector =  mdev->ldev->md.md_offset
+		+ mdev->ldev->md.al_offset + mdev->al_tr_pos;
+
+	if (!drbd_md_sync_page_io(mdev, mdev->ldev, sector, WRITE))
+		drbd_chk_io_error(mdev, 1, true);
+
+	if (++mdev->al_tr_pos >
+	    div_ceil(mdev->act_log->nr_elements, AL_EXTENTS_PT))
+		mdev->al_tr_pos = 0;
+
+	D_ASSERT(mdev->al_tr_pos < MD_AL_MAX_SIZE);
+	mdev->al_tr_number++;
+
+	mutex_unlock(&mdev->md_io_mutex);
+
+	complete(&((struct update_al_work *)w)->event);
+	put_ldev(mdev);
+
+	return 1;
+}
+
+/**
+ * drbd_al_read_tr() - Read a single transaction from the on disk activity log
+ * @mdev:	DRBD device.
+ * @bdev:	Block device to read form.
+ * @b:		pointer to an al_transaction.
+ * @index:	On disk slot of the transaction to read.
+ *
+ * Returns -1 on IO error, 0 on checksum error and 1 upon success.
+ */
+static int drbd_al_read_tr(struct drbd_conf *mdev,
+			   struct drbd_backing_dev *bdev,
+			   struct al_transaction *b,
+			   int index)
+{
+	sector_t sector;
+	int rv, i;
+	u32 xor_sum = 0;
+
+	sector = bdev->md.md_offset + bdev->md.al_offset + index;
+
+	/* Dont process error normally,
+	 * as this is done before disk is attached! */
+	if (!drbd_md_sync_page_io(mdev, bdev, sector, READ))
+		return -1;
+
+	rv = (be32_to_cpu(b->magic) == DRBD_MAGIC);
+
+	for (i = 0; i < AL_EXTENTS_PT + 1; i++)
+		xor_sum ^= be32_to_cpu(b->updates[i].extent);
+	rv &= (xor_sum == be32_to_cpu(b->xor_sum));
+
+	return rv;
+}
+
+/**
+ * drbd_al_read_log() - Restores the activity log from its on disk representation.
+ * @mdev:	DRBD device.
+ * @bdev:	Block device to read form.
+ *
+ * Returns 1 on success, returns 0 when reading the log failed due to IO errors.
+ */
+int drbd_al_read_log(struct drbd_conf *mdev, struct drbd_backing_dev *bdev)
+{
+	struct al_transaction *buffer;
+	int i;
+	int rv;
+	int mx;
+	int active_extents = 0;
+	int transactions = 0;
+	int found_valid = 0;
+	int from = 0;
+	int to = 0;
+	u32 from_tnr = 0;
+	u32 to_tnr = 0;
+	u32 cnr;
+
+	mx = div_ceil(mdev->act_log->nr_elements, AL_EXTENTS_PT);
+
+	/* lock out all other meta data io for now,
+	 * and make sure the page is mapped.
+	 */
+	mutex_lock(&mdev->md_io_mutex);
+	buffer = page_address(mdev->md_io_page);
+
+	/* Find the valid transaction in the log */
+	for (i = 0; i <= mx; i++) {
+		rv = drbd_al_read_tr(mdev, bdev, buffer, i);
+		if (rv == 0)
+			continue;
+		if (rv == -1) {
+			mutex_unlock(&mdev->md_io_mutex);
+			return 0;
+		}
+		cnr = be32_to_cpu(buffer->tr_number);
+
+		if (++found_valid == 1) {
+			from = i;
+			to = i;
+			from_tnr = cnr;
+			to_tnr = cnr;
+			continue;
+		}
+		if ((int)cnr - (int)from_tnr < 0) {
+			D_ASSERT(from_tnr - cnr + i - from == mx+1);
+			from = i;
+			from_tnr = cnr;
+		}
+		if ((int)cnr - (int)to_tnr > 0) {
+			D_ASSERT(cnr - to_tnr == i - to);
+			to = i;
+			to_tnr = cnr;
+		}
+	}
+
+	if (!found_valid) {
+		dev_warn(DEV, "No usable activity log found.\n");
+		mutex_unlock(&mdev->md_io_mutex);
+		return 1;
+	}
+
+	/* Read the valid transactions.
+	 * dev_info(DEV, "Reading from %d to %d.\n",from,to); */
+	i = from;
+	while (1) {
+		int j, pos;
+		unsigned int extent_nr;
+		unsigned int trn;
+
+		rv = drbd_al_read_tr(mdev, bdev, buffer, i);
+		ERR_IF(rv == 0) goto cancel;
+		if (rv == -1) {
+			mutex_unlock(&mdev->md_io_mutex);
+			return 0;
+		}
+
+		trn = be32_to_cpu(buffer->tr_number);
+
+		spin_lock_irq(&mdev->al_lock);
+
+		/* This loop runs backwards because in the cyclic
+		   elements there might be an old version of the
+		   updated element (in slot 0). So the element in slot 0
+		   can overwrite old versions. */
+		for (j = AL_EXTENTS_PT; j >= 0; j--) {
+			pos = be32_to_cpu(buffer->updates[j].pos);
+			extent_nr = be32_to_cpu(buffer->updates[j].extent);
+
+			if (extent_nr == LC_FREE)
+				continue;
+
+			lc_set(mdev->act_log, extent_nr, pos);
+			active_extents++;
+		}
+		spin_unlock_irq(&mdev->al_lock);
+
+		transactions++;
+
+cancel:
+		if (i == to)
+			break;
+		i++;
+		if (i > mx)
+			i = 0;
+	}
+
+	mdev->al_tr_number = to_tnr+1;
+	mdev->al_tr_pos = to;
+	if (++mdev->al_tr_pos >
+	    div_ceil(mdev->act_log->nr_elements, AL_EXTENTS_PT))
+		mdev->al_tr_pos = 0;
+
+	/* ok, we are done with it */
+	mutex_unlock(&mdev->md_io_mutex);
+
+	dev_info(DEV, "Found %d transactions (%d active extents) in activity log.\n",
+	     transactions, active_extents);
+
+	return 1;
+}
+
+/**
+ * drbd_al_apply_to_bm() - Sets the bitmap to diry(1) where covered ba active AL extents
+ * @mdev:	DRBD device.
+ */
+void drbd_al_apply_to_bm(struct drbd_conf *mdev)
+{
+	unsigned int enr;
+	unsigned long add = 0;
+	char ppb[10];
+	int i, tmp;
+
+	wait_event(mdev->al_wait, lc_try_lock(mdev->act_log));
+
+	for (i = 0; i < mdev->act_log->nr_elements; i++) {
+		enr = lc_element_by_index(mdev->act_log, i)->lc_number;
+		if (enr == LC_FREE)
+			continue;
+		tmp = drbd_bm_ALe_set_all(mdev, enr);
+		dynamic_dev_dbg(DEV, "AL: set %d bits in extent %u\n", tmp, enr);
+		add += tmp;
+	}
+
+	lc_unlock(mdev->act_log);
+	wake_up(&mdev->al_wait);
+
+	dev_info(DEV, "Marked additional %s as out-of-sync based on AL.\n",
+	     ppsize(ppb, Bit2KB(add)));
+}
+
+static int _try_lc_del(struct drbd_conf *mdev, struct lc_element *al_ext)
+{
+	int rv;
+
+	spin_lock_irq(&mdev->al_lock);
+	rv = (al_ext->refcnt == 0);
+	if (likely(rv))
+		lc_del(mdev->act_log, al_ext);
+	spin_unlock_irq(&mdev->al_lock);
+
+	return rv;
+}
+
+/**
+ * drbd_al_shrink() - Removes all active extents form the activity log
+ * @mdev:	DRBD device.
+ *
+ * Removes all active extents form the activity log, waiting until
+ * the reference count of each entry dropped to 0 first, of course.
+ *
+ * You need to lock mdev->act_log with lc_try_lock() / lc_unlock()
+ */
+void drbd_al_shrink(struct drbd_conf *mdev)
+{
+	struct lc_element *al_ext;
+	int i;
+
+	D_ASSERT(test_bit(__LC_DIRTY, &mdev->act_log->flags));
+
+	for (i = 0; i < mdev->act_log->nr_elements; i++) {
+		al_ext = lc_element_by_index(mdev->act_log, i);
+		if (al_ext->lc_number == LC_FREE)
+			continue;
+		wait_event(mdev->al_wait, _try_lc_del(mdev, al_ext));
+	}
+
+	wake_up(&mdev->al_wait);
+}
+
+static int w_update_odbm(struct drbd_conf *mdev, struct drbd_work *w, int unused)
+{
+	struct update_odbm_work *udw = container_of(w, struct update_odbm_work, w);
+
+	if (!get_ldev(mdev)) {
+		if (__ratelimit(&drbd_ratelimit_state))
+			dev_warn(DEV, "Can not update on disk bitmap, local IO disabled.\n");
+		kfree(udw);
+		return 1;
+	}
+
+	drbd_bm_write_page(mdev, rs_extent_to_bm_page(udw->enr));
+	put_ldev(mdev);
+
+	kfree(udw);
+
+	if (drbd_bm_total_weight(mdev) <= mdev->rs_failed) {
+		switch (mdev->state.conn) {
+		case C_SYNC_SOURCE:  case C_SYNC_TARGET:
+		case C_PAUSED_SYNC_S: case C_PAUSED_SYNC_T:
+			drbd_resync_finished(mdev);
+		default:
+			/* nothing to do */
+			break;
+		}
+	}
+	drbd_bcast_sync_progress(mdev);
+
+	return 1;
+}
+
+
+/* ATTENTION. The AL's extents are 4MB each, while the extents in the
+ * resync LRU-cache are 16MB each.
+ * The caller of this function has to hold an get_ldev() reference.
+ *
+ * TODO will be obsoleted once we have a caching lru of the on disk bitmap
+ */
+static void drbd_try_clear_on_disk_bm(struct drbd_conf *mdev, sector_t sector,
+				      int count, int success)
+{
+	struct lc_element *e;
+	struct update_odbm_work *udw;
+
+	unsigned int enr;
+
+	D_ASSERT(atomic_read(&mdev->local_cnt));
+
+	/* I simply assume that a sector/size pair never crosses
+	 * a 16 MB extent border. (Currently this is true...) */
+	enr = BM_SECT_TO_EXT(sector);
+
+	e = lc_get(mdev->resync, enr);
+	if (e) {
+		struct bm_extent *ext = lc_entry(e, struct bm_extent, lce);
+		if (ext->lce.lc_number == enr) {
+			if (success)
+				ext->rs_left -= count;
+			else
+				ext->rs_failed += count;
+			if (ext->rs_left < ext->rs_failed) {
+				dev_err(DEV, "BAD! sector=%llus enr=%u rs_left=%d "
+				    "rs_failed=%d count=%d\n",
+				     (unsigned long long)sector,
+				     ext->lce.lc_number, ext->rs_left,
+				     ext->rs_failed, count);
+				dump_stack();
+
+				lc_put(mdev->resync, &ext->lce);
+				drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
+				return;
+			}
+		} else {
+			/* Normally this element should be in the cache,
+			 * since drbd_rs_begin_io() pulled it already in.
+			 *
+			 * But maybe an application write finished, and we set
+			 * something outside the resync lru_cache in sync.
+			 */
+			int rs_left = drbd_bm_e_weight(mdev, enr);
+			if (ext->flags != 0) {
+				dev_warn(DEV, "changing resync lce: %d[%u;%02lx]"
+				     " -> %d[%u;00]\n",
+				     ext->lce.lc_number, ext->rs_left,
+				     ext->flags, enr, rs_left);
+				ext->flags = 0;
+			}
+			if (ext->rs_failed) {
+				dev_warn(DEV, "Kicking resync_lru element enr=%u "
+				     "out with rs_failed=%d\n",
+				     ext->lce.lc_number, ext->rs_failed);
+			}
+			ext->rs_left = rs_left;
+			ext->rs_failed = success ? 0 : count;
+			lc_changed(mdev->resync, &ext->lce);
+		}
+		lc_put(mdev->resync, &ext->lce);
+		/* no race, we are within the al_lock! */
+
+		if (ext->rs_left == ext->rs_failed) {
+			ext->rs_failed = 0;
+
+			udw = kmalloc(sizeof(*udw), GFP_ATOMIC);
+			if (udw) {
+				udw->enr = ext->lce.lc_number;
+				udw->w.cb = w_update_odbm;
+				drbd_queue_work_front(&mdev->data.work, &udw->w);
+			} else {
+				dev_warn(DEV, "Could not kmalloc an udw\n");
+			}
+		}
+	} else {
+		dev_err(DEV, "lc_get() failed! locked=%d/%d flags=%lu\n",
+		    mdev->resync_locked,
+		    mdev->resync->nr_elements,
+		    mdev->resync->flags);
+	}
+}
+
+void drbd_advance_rs_marks(struct drbd_conf *mdev, unsigned long still_to_go)
+{
+	unsigned long now = jiffies;
+	unsigned long last = mdev->rs_mark_time[mdev->rs_last_mark];
+	int next = (mdev->rs_last_mark + 1) % DRBD_SYNC_MARKS;
+	if (time_after_eq(now, last + DRBD_SYNC_MARK_STEP)) {
+		if (mdev->rs_mark_left[mdev->rs_last_mark] != still_to_go &&
+		    mdev->state.conn != C_PAUSED_SYNC_T &&
+		    mdev->state.conn != C_PAUSED_SYNC_S) {
+			mdev->rs_mark_time[next] = now;
+			mdev->rs_mark_left[next] = still_to_go;
+			mdev->rs_last_mark = next;
+		}
+	}
+}
+
+/* clear the bit corresponding to the piece of storage in question:
+ * size byte of data starting from sector.  Only clear a bits of the affected
+ * one ore more _aligned_ BM_BLOCK_SIZE blocks.
+ *
+ * called by worker on C_SYNC_TARGET and receiver on SyncSource.
+ *
+ */
+void __drbd_set_in_sync(struct drbd_conf *mdev, sector_t sector, int size,
+		       const char *file, const unsigned int line)
+{
+	/* Is called from worker and receiver context _only_ */
+	unsigned long sbnr, ebnr, lbnr;
+	unsigned long count = 0;
+	sector_t esector, nr_sectors;
+	int wake_up = 0;
+	unsigned long flags;
+
+	if (size <= 0 || (size & 0x1ff) != 0 || size > DRBD_MAX_BIO_SIZE) {
+		dev_err(DEV, "drbd_set_in_sync: sector=%llus size=%d nonsense!\n",
+				(unsigned long long)sector, size);
+		return;
+	}
+	nr_sectors = drbd_get_capacity(mdev->this_bdev);
+	esector = sector + (size >> 9) - 1;
+
+	ERR_IF(sector >= nr_sectors) return;
+	ERR_IF(esector >= nr_sectors) esector = (nr_sectors-1);
+
+	lbnr = BM_SECT_TO_BIT(nr_sectors-1);
+
+	/* we clear it (in sync).
+	 * round up start sector, round down end sector.  we make sure we only
+	 * clear full, aligned, BM_BLOCK_SIZE (4K) blocks */
+	if (unlikely(esector < BM_SECT_PER_BIT-1))
+		return;
+	if (unlikely(esector == (nr_sectors-1)))
+		ebnr = lbnr;
+	else
+		ebnr = BM_SECT_TO_BIT(esector - (BM_SECT_PER_BIT-1));
+	sbnr = BM_SECT_TO_BIT(sector + BM_SECT_PER_BIT-1);
+
+	if (sbnr > ebnr)
+		return;
+
+	/*
+	 * ok, (capacity & 7) != 0 sometimes, but who cares...
+	 * we count rs_{total,left} in bits, not sectors.
+	 */
+	count = drbd_bm_clear_bits(mdev, sbnr, ebnr);
+	if (count && get_ldev(mdev)) {
+		drbd_advance_rs_marks(mdev, drbd_bm_total_weight(mdev));
+		spin_lock_irqsave(&mdev->al_lock, flags);
+		drbd_try_clear_on_disk_bm(mdev, sector, count, true);
+		spin_unlock_irqrestore(&mdev->al_lock, flags);
+
+		/* just wake_up unconditional now, various lc_chaged(),
+		 * lc_put() in drbd_try_clear_on_disk_bm(). */
+		wake_up = 1;
+		put_ldev(mdev);
+	}
+	if (wake_up)
+		wake_up(&mdev->al_wait);
+}
+
+/*
+ * this is intended to set one request worth of data out of sync.
+ * affects at least 1 bit,
+ * and at most 1+DRBD_MAX_BIO_SIZE/BM_BLOCK_SIZE bits.
+ *
+ * called by tl_clear and drbd_send_dblock (==drbd_make_request).
+ * so this can be _any_ process.
+ */
+int __drbd_set_out_of_sync(struct drbd_conf *mdev, sector_t sector, int size,
+			    const char *file, const unsigned int line)
+{
+	unsigned long sbnr, ebnr, lbnr, flags;
+	sector_t esector, nr_sectors;
+	unsigned int enr, count = 0;
+	struct lc_element *e;
+
+	if (size <= 0 || (size & 0x1ff) != 0 || size > DRBD_MAX_BIO_SIZE) {
+		dev_err(DEV, "sector: %llus, size: %d\n",
+			(unsigned long long)sector, size);
+		return 0;
+	}
+
+	if (!get_ldev(mdev))
+		return 0; /* no disk, no metadata, no bitmap to set bits in */
+
+	nr_sectors = drbd_get_capacity(mdev->this_bdev);
+	esector = sector + (size >> 9) - 1;
+
+	ERR_IF(sector >= nr_sectors)
+		goto out;
+	ERR_IF(esector >= nr_sectors)
+		esector = (nr_sectors-1);
+
+	lbnr = BM_SECT_TO_BIT(nr_sectors-1);
+
+	/* we set it out of sync,
+	 * we do not need to round anything here */
+	sbnr = BM_SECT_TO_BIT(sector);
+	ebnr = BM_SECT_TO_BIT(esector);
+
+	/* ok, (capacity & 7) != 0 sometimes, but who cares...
+	 * we count rs_{total,left} in bits, not sectors.  */
+	spin_lock_irqsave(&mdev->al_lock, flags);
+	count = drbd_bm_set_bits(mdev, sbnr, ebnr);
+
+	enr = BM_SECT_TO_EXT(sector);
+	e = lc_find(mdev->resync, enr);
+	if (e)
+		lc_entry(e, struct bm_extent, lce)->rs_left += count;
+	spin_unlock_irqrestore(&mdev->al_lock, flags);
+
+out:
+	put_ldev(mdev);
+
+	return count;
+}
+
+static
+struct bm_extent *_bme_get(struct drbd_conf *mdev, unsigned int enr)
+{
+	struct lc_element *e;
+	struct bm_extent *bm_ext;
+	int wakeup = 0;
+	unsigned long rs_flags;
+
+	spin_lock_irq(&mdev->al_lock);
+	if (mdev->resync_locked > mdev->resync->nr_elements/2) {
+		spin_unlock_irq(&mdev->al_lock);
+		return NULL;
+	}
+	e = lc_get(mdev->resync, enr);
+	bm_ext = e ? lc_entry(e, struct bm_extent, lce) : NULL;
+	if (bm_ext) {
+		if (bm_ext->lce.lc_number != enr) {
+			bm_ext->rs_left = drbd_bm_e_weight(mdev, enr);
+			bm_ext->rs_failed = 0;
+			lc_changed(mdev->resync, &bm_ext->lce);
+			wakeup = 1;
+		}
+		if (bm_ext->lce.refcnt == 1)
+			mdev->resync_locked++;
+		set_bit(BME_NO_WRITES, &bm_ext->flags);
+	}
+	rs_flags = mdev->resync->flags;
+	spin_unlock_irq(&mdev->al_lock);
+	if (wakeup)
+		wake_up(&mdev->al_wait);
+
+	if (!bm_ext) {
+		if (rs_flags & LC_STARVING)
+			dev_warn(DEV, "Have to wait for element"
+			     " (resync LRU too small?)\n");
+		BUG_ON(rs_flags & LC_DIRTY);
+	}
+
+	return bm_ext;
+}
+
+static int _is_in_al(struct drbd_conf *mdev, unsigned int enr)
+{
+	struct lc_element *al_ext;
+	int rv = 0;
+
+	spin_lock_irq(&mdev->al_lock);
+	if (unlikely(enr == mdev->act_log->new_number))
+		rv = 1;
+	else {
+		al_ext = lc_find(mdev->act_log, enr);
+		if (al_ext) {
+			if (al_ext->refcnt)
+				rv = 1;
+		}
+	}
+	spin_unlock_irq(&mdev->al_lock);
+
+	/*
+	if (unlikely(rv)) {
+		dev_info(DEV, "Delaying sync read until app's write is done\n");
+	}
+	*/
+	return rv;
+}
+
+/**
+ * drbd_rs_begin_io() - Gets an extent in the resync LRU cache and sets it to BME_LOCKED
+ * @mdev:	DRBD device.
+ * @sector:	The sector number.
+ *
+ * This functions sleeps on al_wait. Returns 0 on success, -EINTR if interrupted.
+ */
+int drbd_rs_begin_io(struct drbd_conf *mdev, sector_t sector)
+{
+	unsigned int enr = BM_SECT_TO_EXT(sector);
+	struct bm_extent *bm_ext;
+	int i, sig;
+	int sa = 200; /* Step aside 200 times, then grab the extent and let app-IO wait.
+			 200 times -> 20 seconds. */
+
+retry:
+	sig = wait_event_interruptible(mdev->al_wait,
+			(bm_ext = _bme_get(mdev, enr)));
+	if (sig)
+		return -EINTR;
+
+	if (test_bit(BME_LOCKED, &bm_ext->flags))
+		return 0;
+
+	for (i = 0; i < AL_EXT_PER_BM_SECT; i++) {
+		sig = wait_event_interruptible(mdev->al_wait,
+					       !_is_in_al(mdev, enr * AL_EXT_PER_BM_SECT + i) ||
+					       test_bit(BME_PRIORITY, &bm_ext->flags));
+
+		if (sig || (test_bit(BME_PRIORITY, &bm_ext->flags) && sa)) {
+			spin_lock_irq(&mdev->al_lock);
+			if (lc_put(mdev->resync, &bm_ext->lce) == 0) {
+				bm_ext->flags = 0; /* clears BME_NO_WRITES and eventually BME_PRIORITY */
+				mdev->resync_locked--;
+				wake_up(&mdev->al_wait);
+			}
+			spin_unlock_irq(&mdev->al_lock);
+			if (sig)
+				return -EINTR;
+			if (schedule_timeout_interruptible(HZ/10))
+				return -EINTR;
+			if (sa && --sa == 0)
+				dev_warn(DEV,"drbd_rs_begin_io() stepped aside for 20sec."
+					 "Resync stalled?\n");
+			goto retry;
+		}
+	}
+	set_bit(BME_LOCKED, &bm_ext->flags);
+	return 0;
+}
+
+/**
+ * drbd_try_rs_begin_io() - Gets an extent in the resync LRU cache, does not sleep
+ * @mdev:	DRBD device.
+ * @sector:	The sector number.
+ *
+ * Gets an extent in the resync LRU cache, sets it to BME_NO_WRITES, then
+ * tries to set it to BME_LOCKED. Returns 0 upon success, and -EAGAIN
+ * if there is still application IO going on in this area.
+ */
+int drbd_try_rs_begin_io(struct drbd_conf *mdev, sector_t sector)
+{
+	unsigned int enr = BM_SECT_TO_EXT(sector);
+	const unsigned int al_enr = enr*AL_EXT_PER_BM_SECT;
+	struct lc_element *e;
+	struct bm_extent *bm_ext;
+	int i;
+
+	spin_lock_irq(&mdev->al_lock);
+	if (mdev->resync_wenr != LC_FREE && mdev->resync_wenr != enr) {
+		/* in case you have very heavy scattered io, it may
+		 * stall the syncer undefined if we give up the ref count
+		 * when we try again and requeue.
+		 *
+		 * if we don't give up the refcount, but the next time
+		 * we are scheduled this extent has been "synced" by new
+		 * application writes, we'd miss the lc_put on the
+		 * extent we keep the refcount on.
+		 * so we remembered which extent we had to try again, and
+		 * if the next requested one is something else, we do
+		 * the lc_put here...
+		 * we also have to wake_up
+		 */
+		e = lc_find(mdev->resync, mdev->resync_wenr);
+		bm_ext = e ? lc_entry(e, struct bm_extent, lce) : NULL;
+		if (bm_ext) {
+			D_ASSERT(!test_bit(BME_LOCKED, &bm_ext->flags));
+			D_ASSERT(test_bit(BME_NO_WRITES, &bm_ext->flags));
+			clear_bit(BME_NO_WRITES, &bm_ext->flags);
+			mdev->resync_wenr = LC_FREE;
+			if (lc_put(mdev->resync, &bm_ext->lce) == 0)
+				mdev->resync_locked--;
+			wake_up(&mdev->al_wait);
+		} else {
+			dev_alert(DEV, "LOGIC BUG\n");
+		}
+	}
+	/* TRY. */
+	e = lc_try_get(mdev->resync, enr);
+	bm_ext = e ? lc_entry(e, struct bm_extent, lce) : NULL;
+	if (bm_ext) {
+		if (test_bit(BME_LOCKED, &bm_ext->flags))
+			goto proceed;
+		if (!test_and_set_bit(BME_NO_WRITES, &bm_ext->flags)) {
+			mdev->resync_locked++;
+		} else {
+			/* we did set the BME_NO_WRITES,
+			 * but then could not set BME_LOCKED,
+			 * so we tried again.
+			 * drop the extra reference. */
+			bm_ext->lce.refcnt--;
+			D_ASSERT(bm_ext->lce.refcnt > 0);
+		}
+		goto check_al;
+	} else {
+		/* do we rather want to try later? */
+		if (mdev->resync_locked > mdev->resync->nr_elements-3)
+			goto try_again;
+		/* Do or do not. There is no try. -- Yoda */
+		e = lc_get(mdev->resync, enr);
+		bm_ext = e ? lc_entry(e, struct bm_extent, lce) : NULL;
+		if (!bm_ext) {
+			const unsigned long rs_flags = mdev->resync->flags;
+			if (rs_flags & LC_STARVING)
+				dev_warn(DEV, "Have to wait for element"
+				     " (resync LRU too small?)\n");
+			BUG_ON(rs_flags & LC_DIRTY);
+			goto try_again;
+		}
+		if (bm_ext->lce.lc_number != enr) {
+			bm_ext->rs_left = drbd_bm_e_weight(mdev, enr);
+			bm_ext->rs_failed = 0;
+			lc_changed(mdev->resync, &bm_ext->lce);
+			wake_up(&mdev->al_wait);
+			D_ASSERT(test_bit(BME_LOCKED, &bm_ext->flags) == 0);
+		}
+		set_bit(BME_NO_WRITES, &bm_ext->flags);
+		D_ASSERT(bm_ext->lce.refcnt == 1);
+		mdev->resync_locked++;
+		goto check_al;
+	}
+check_al:
+	for (i = 0; i < AL_EXT_PER_BM_SECT; i++) {
+		if (unlikely(al_enr+i == mdev->act_log->new_number))
+			goto try_again;
+		if (lc_is_used(mdev->act_log, al_enr+i))
+			goto try_again;
+	}
+	set_bit(BME_LOCKED, &bm_ext->flags);
+proceed:
+	mdev->resync_wenr = LC_FREE;
+	spin_unlock_irq(&mdev->al_lock);
+	return 0;
+
+try_again:
+	if (bm_ext)
+		mdev->resync_wenr = enr;
+	spin_unlock_irq(&mdev->al_lock);
+	return -EAGAIN;
+}
+
+void drbd_rs_complete_io(struct drbd_conf *mdev, sector_t sector)
+{
+	unsigned int enr = BM_SECT_TO_EXT(sector);
+	struct lc_element *e;
+	struct bm_extent *bm_ext;
+	unsigned long flags;
+
+	spin_lock_irqsave(&mdev->al_lock, flags);
+	e = lc_find(mdev->resync, enr);
+	bm_ext = e ? lc_entry(e, struct bm_extent, lce) : NULL;
+	if (!bm_ext) {
+		spin_unlock_irqrestore(&mdev->al_lock, flags);
+		if (__ratelimit(&drbd_ratelimit_state))
+			dev_err(DEV, "drbd_rs_complete_io() called, but extent not found\n");
+		return;
+	}
+
+	if (bm_ext->lce.refcnt == 0) {
+		spin_unlock_irqrestore(&mdev->al_lock, flags);
+		dev_err(DEV, "drbd_rs_complete_io(,%llu [=%u]) called, "
+		    "but refcnt is 0!?\n",
+		    (unsigned long long)sector, enr);
+		return;
+	}
+
+	if (lc_put(mdev->resync, &bm_ext->lce) == 0) {
+		bm_ext->flags = 0; /* clear BME_LOCKED, BME_NO_WRITES and BME_PRIORITY */
+		mdev->resync_locked--;
+		wake_up(&mdev->al_wait);
+	}
+
+	spin_unlock_irqrestore(&mdev->al_lock, flags);
+}
+
+/**
+ * drbd_rs_cancel_all() - Removes all extents from the resync LRU (even BME_LOCKED)
+ * @mdev:	DRBD device.
+ */
+void drbd_rs_cancel_all(struct drbd_conf *mdev)
+{
+	spin_lock_irq(&mdev->al_lock);
+
+	if (get_ldev_if_state(mdev, D_FAILED)) { /* Makes sure ->resync is there. */
+		lc_reset(mdev->resync);
+		put_ldev(mdev);
+	}
+	mdev->resync_locked = 0;
+	mdev->resync_wenr = LC_FREE;
+	spin_unlock_irq(&mdev->al_lock);
+	wake_up(&mdev->al_wait);
+}
+
+/**
+ * drbd_rs_del_all() - Gracefully remove all extents from the resync LRU
+ * @mdev:	DRBD device.
+ *
+ * Returns 0 upon success, -EAGAIN if at least one reference count was
+ * not zero.
+ */
+int drbd_rs_del_all(struct drbd_conf *mdev)
+{
+	struct lc_element *e;
+	struct bm_extent *bm_ext;
+	int i;
+
+	spin_lock_irq(&mdev->al_lock);
+
+	if (get_ldev_if_state(mdev, D_FAILED)) {
+		/* ok, ->resync is there. */
+		for (i = 0; i < mdev->resync->nr_elements; i++) {
+			e = lc_element_by_index(mdev->resync, i);
+			bm_ext = lc_entry(e, struct bm_extent, lce);
+			if (bm_ext->lce.lc_number == LC_FREE)
+				continue;
+			if (bm_ext->lce.lc_number == mdev->resync_wenr) {
+				dev_info(DEV, "dropping %u in drbd_rs_del_all, apparently"
+				     " got 'synced' by application io\n",
+				     mdev->resync_wenr);
+				D_ASSERT(!test_bit(BME_LOCKED, &bm_ext->flags));
+				D_ASSERT(test_bit(BME_NO_WRITES, &bm_ext->flags));
+				clear_bit(BME_NO_WRITES, &bm_ext->flags);
+				mdev->resync_wenr = LC_FREE;
+				lc_put(mdev->resync, &bm_ext->lce);
+			}
+			if (bm_ext->lce.refcnt != 0) {
+				dev_info(DEV, "Retrying drbd_rs_del_all() later. "
+				     "refcnt=%d\n", bm_ext->lce.refcnt);
+				put_ldev(mdev);
+				spin_unlock_irq(&mdev->al_lock);
+				return -EAGAIN;
+			}
+			D_ASSERT(!test_bit(BME_LOCKED, &bm_ext->flags));
+			D_ASSERT(!test_bit(BME_NO_WRITES, &bm_ext->flags));
+			lc_del(mdev->resync, &bm_ext->lce);
+		}
+		D_ASSERT(mdev->resync->used == 0);
+		put_ldev(mdev);
+	}
+	spin_unlock_irq(&mdev->al_lock);
+
+	return 0;
+}
+
+/**
+ * drbd_rs_failed_io() - Record information on a failure to resync the specified blocks
+ * @mdev:	DRBD device.
+ * @sector:	The sector number.
+ * @size:	Size of failed IO operation, in byte.
+ */
+void drbd_rs_failed_io(struct drbd_conf *mdev, sector_t sector, int size)
+{
+	/* Is called from worker and receiver context _only_ */
+	unsigned long sbnr, ebnr, lbnr;
+	unsigned long count;
+	sector_t esector, nr_sectors;
+	int wake_up = 0;
+
+	if (size <= 0 || (size & 0x1ff) != 0 || size > DRBD_MAX_BIO_SIZE) {
+		dev_err(DEV, "drbd_rs_failed_io: sector=%llus size=%d nonsense!\n",
+				(unsigned long long)sector, size);
+		return;
+	}
+	nr_sectors = drbd_get_capacity(mdev->this_bdev);
+	esector = sector + (size >> 9) - 1;
+
+	ERR_IF(sector >= nr_sectors) return;
+	ERR_IF(esector >= nr_sectors) esector = (nr_sectors-1);
+
+	lbnr = BM_SECT_TO_BIT(nr_sectors-1);
+
+	/*
+	 * round up start sector, round down end sector.  we make sure we only
+	 * handle full, aligned, BM_BLOCK_SIZE (4K) blocks */
+	if (unlikely(esector < BM_SECT_PER_BIT-1))
+		return;
+	if (unlikely(esector == (nr_sectors-1)))
+		ebnr = lbnr;
+	else
+		ebnr = BM_SECT_TO_BIT(esector - (BM_SECT_PER_BIT-1));
+	sbnr = BM_SECT_TO_BIT(sector + BM_SECT_PER_BIT-1);
+
+	if (sbnr > ebnr)
+		return;
+
+	/*
+	 * ok, (capacity & 7) != 0 sometimes, but who cares...
+	 * we count rs_{total,left} in bits, not sectors.
+	 */
+	spin_lock_irq(&mdev->al_lock);
+	count = drbd_bm_count_bits(mdev, sbnr, ebnr);
+	if (count) {
+		mdev->rs_failed += count;
+
+		if (get_ldev(mdev)) {
+			drbd_try_clear_on_disk_bm(mdev, sector, count, false);
+			put_ldev(mdev);
+		}
+
+		/* just wake_up unconditional now, various lc_chaged(),
+		 * lc_put() in drbd_try_clear_on_disk_bm(). */
+		wake_up = 1;
+	}
+	spin_unlock_irq(&mdev->al_lock);
+	if (wake_up)
+		wake_up(&mdev->al_wait);
+}
diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c
new file mode 100644
index 00000000..7b976296
--- /dev/null
+++ b/drivers/block/drbd/drbd_bitmap.c
@@ -0,0 +1,1589 @@
+/*
+   drbd_bitmap.c
+
+   This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
+
+   Copyright (C) 2004-2008, LINBIT Information Technologies GmbH.
+   Copyright (C) 2004-2008, Philipp Reisner <philipp.reisner@linbit.com>.
+   Copyright (C) 2004-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
+
+   drbd is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2, or (at your option)
+   any later version.
+
+   drbd is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with drbd; see the file COPYING.  If not, write to
+   the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/bitops.h>
+#include <linux/vmalloc.h>
+#include <linux/string.h>
+#include <linux/drbd.h>
+#include <linux/slab.h>
+#include <asm/kmap_types.h>
+
+#include "drbd_int.h"
+
+
+/* OPAQUE outside this file!
+ * interface defined in drbd_int.h
+
+ * convention:
+ * function name drbd_bm_... => used elsewhere, "public".
+ * function name      bm_... => internal to implementation, "private".
+ */
+
+
+/*
+ * LIMITATIONS:
+ * We want to support >= peta byte of backend storage, while for now still using
+ * a granularity of one bit per 4KiB of storage.
+ * 1 << 50		bytes backend storage (1 PiB)
+ * 1 << (50 - 12)	bits needed
+ *	38 --> we need u64 to index and count bits
+ * 1 << (38 - 3)	bitmap bytes needed
+ *	35 --> we still need u64 to index and count bytes
+ *			(that's 32 GiB of bitmap for 1 PiB storage)
+ * 1 << (35 - 2)	32bit longs needed
+ *	33 --> we'd even need u64 to index and count 32bit long words.
+ * 1 << (35 - 3)	64bit longs needed
+ *	32 --> we could get away with a 32bit unsigned int to index and count
+ *	64bit long words, but I rather stay with unsigned long for now.
+ *	We probably should neither count nor point to bytes or long words
+ *	directly, but either by bitnumber, or by page index and offset.
+ * 1 << (35 - 12)
+ *	22 --> we need that much 4KiB pages of bitmap.
+ *	1 << (22 + 3) --> on a 64bit arch,
+ *	we need 32 MiB to store the array of page pointers.
+ *
+ * Because I'm lazy, and because the resulting patch was too large, too ugly
+ * and still incomplete, on 32bit we still "only" support 16 TiB (minus some),
+ * (1 << 32) bits * 4k storage.
+ *
+
+ * bitmap storage and IO:
+ *	Bitmap is stored little endian on disk, and is kept little endian in
+ *	core memory. Currently we still hold the full bitmap in core as long
+ *	as we are "attached" to a local disk, which at 32 GiB for 1PiB storage
+ *	seems excessive.
+ *
+ *	We plan to reduce the amount of in-core bitmap pages by paging them in
+ *	and out against their on-disk location as necessary, but need to make
+ *	sure we don't cause too much meta data IO, and must not deadlock in
+ *	tight memory situations. This needs some more work.
+ */
+
+/*
+ * NOTE
+ *  Access to the *bm_pages is protected by bm_lock.
+ *  It is safe to read the other members within the lock.
+ *
+ *  drbd_bm_set_bits is called from bio_endio callbacks,
+ *  We may be called with irq already disabled,
+ *  so we need spin_lock_irqsave().
+ *  And we need the kmap_atomic.
+ */
+struct drbd_bitmap {
+	struct page **bm_pages;
+	spinlock_t bm_lock;
+
+	/* see LIMITATIONS: above */
+
+	unsigned long bm_set;       /* nr of set bits; THINK maybe atomic_t? */
+	unsigned long bm_bits;
+	size_t   bm_words;
+	size_t   bm_number_of_pages;
+	sector_t bm_dev_capacity;
+	struct mutex bm_change; /* serializes resize operations */
+
+	wait_queue_head_t bm_io_wait; /* used to serialize IO of single pages */
+
+	enum bm_flag bm_flags;
+
+	/* debugging aid, in case we are still racy somewhere */
+	char          *bm_why;
+	struct task_struct *bm_task;
+};
+
+#define bm_print_lock_info(m) __bm_print_lock_info(m, __func__)
+static void __bm_print_lock_info(struct drbd_conf *mdev, const char *func)
+{
+	struct drbd_bitmap *b = mdev->bitmap;
+	if (!__ratelimit(&drbd_ratelimit_state))
+		return;
+	dev_err(DEV, "FIXME %s in %s, bitmap locked for '%s' by %s\n",
+	    current == mdev->receiver.task ? "receiver" :
+	    current == mdev->asender.task  ? "asender"  :
+	    current == mdev->worker.task   ? "worker"   : current->comm,
+	    func, b->bm_why ?: "?",
+	    b->bm_task == mdev->receiver.task ? "receiver" :
+	    b->bm_task == mdev->asender.task  ? "asender"  :
+	    b->bm_task == mdev->worker.task   ? "worker"   : "?");
+}
+
+void drbd_bm_lock(struct drbd_conf *mdev, char *why, enum bm_flag flags)
+{
+	struct drbd_bitmap *b = mdev->bitmap;
+	int trylock_failed;
+
+	if (!b) {
+		dev_err(DEV, "FIXME no bitmap in drbd_bm_lock!?\n");
+		return;
+	}
+
+	trylock_failed = !mutex_trylock(&b->bm_change);
+
+	if (trylock_failed) {
+		dev_warn(DEV, "%s going to '%s' but bitmap already locked for '%s' by %s\n",
+		    current == mdev->receiver.task ? "receiver" :
+		    current == mdev->asender.task  ? "asender"  :
+		    current == mdev->worker.task   ? "worker"   : current->comm,
+		    why, b->bm_why ?: "?",
+		    b->bm_task == mdev->receiver.task ? "receiver" :
+		    b->bm_task == mdev->asender.task  ? "asender"  :
+		    b->bm_task == mdev->worker.task   ? "worker"   : "?");
+		mutex_lock(&b->bm_change);
+	}
+	if (BM_LOCKED_MASK & b->bm_flags)
+		dev_err(DEV, "FIXME bitmap already locked in bm_lock\n");
+	b->bm_flags |= flags & BM_LOCKED_MASK;
+
+	b->bm_why  = why;
+	b->bm_task = current;
+}
+
+void drbd_bm_unlock(struct drbd_conf *mdev)
+{
+	struct drbd_bitmap *b = mdev->bitmap;
+	if (!b) {
+		dev_err(DEV, "FIXME no bitmap in drbd_bm_unlock!?\n");
+		return;
+	}
+
+	if (!(BM_LOCKED_MASK & mdev->bitmap->bm_flags))
+		dev_err(DEV, "FIXME bitmap not locked in bm_unlock\n");
+
+	b->bm_flags &= ~BM_LOCKED_MASK;
+	b->bm_why  = NULL;
+	b->bm_task = NULL;
+	mutex_unlock(&b->bm_change);
+}
+
+/* we store some "meta" info about our pages in page->private */
+/* at a granularity of 4k storage per bitmap bit:
+ * one peta byte storage: 1<<50 byte, 1<<38 * 4k storage blocks
+ *  1<<38 bits,
+ *  1<<23 4k bitmap pages.
+ * Use 24 bits as page index, covers 2 peta byte storage
+ * at a granularity of 4k per bit.
+ * Used to report the failed page idx on io error from the endio handlers.
+ */
+#define BM_PAGE_IDX_MASK	((1UL<<24)-1)
+/* this page is currently read in, or written back */
+#define BM_PAGE_IO_LOCK		31
+/* if there has been an IO error for this page */
+#define BM_PAGE_IO_ERROR	30
+/* this is to be able to intelligently skip disk IO,
+ * set if bits have been set since last IO. */
+#define BM_PAGE_NEED_WRITEOUT	29
+/* to mark for lazy writeout once syncer cleared all clearable bits,
+ * we if bits have been cleared since last IO. */
+#define BM_PAGE_LAZY_WRITEOUT	28
+
+/* store_page_idx uses non-atomic assignment. It is only used directly after
+ * allocating the page.  All other bm_set_page_* and bm_clear_page_* need to
+ * use atomic bit manipulation, as set_out_of_sync (and therefore bitmap
+ * changes) may happen from various contexts, and wait_on_bit/wake_up_bit
+ * requires it all to be atomic as well. */
+static void bm_store_page_idx(struct page *page, unsigned long idx)
+{
+	BUG_ON(0 != (idx & ~BM_PAGE_IDX_MASK));
+	page_private(page) |= idx;
+}
+
+static unsigned long bm_page_to_idx(struct page *page)
+{
+	return page_private(page) & BM_PAGE_IDX_MASK;
+}
+
+/* As is very unlikely that the same page is under IO from more than one
+ * context, we can get away with a bit per page and one wait queue per bitmap.
+ */
+static void bm_page_lock_io(struct drbd_conf *mdev, int page_nr)
+{
+	struct drbd_bitmap *b = mdev->bitmap;
+	void *addr = &page_private(b->bm_pages[page_nr]);
+	wait_event(b->bm_io_wait, !test_and_set_bit(BM_PAGE_IO_LOCK, addr));
+}
+
+static void bm_page_unlock_io(struct drbd_conf *mdev, int page_nr)
+{
+	struct drbd_bitmap *b = mdev->bitmap;
+	void *addr = &page_private(b->bm_pages[page_nr]);
+	clear_bit(BM_PAGE_IO_LOCK, addr);
+	smp_mb__after_clear_bit();
+	wake_up(&mdev->bitmap->bm_io_wait);
+}
+
+/* set _before_ submit_io, so it may be reset due to being changed
+ * while this page is in flight... will get submitted later again */
+static void bm_set_page_unchanged(struct page *page)
+{
+	/* use cmpxchg? */
+	clear_bit(BM_PAGE_NEED_WRITEOUT, &page_private(page));
+	clear_bit(BM_PAGE_LAZY_WRITEOUT, &page_private(page));
+}
+
+static void bm_set_page_need_writeout(struct page *page)
+{
+	set_bit(BM_PAGE_NEED_WRITEOUT, &page_private(page));
+}
+
+static int bm_test_page_unchanged(struct page *page)
+{
+	volatile const unsigned long *addr = &page_private(page);
+	return (*addr & ((1UL<<BM_PAGE_NEED_WRITEOUT)|(1UL<<BM_PAGE_LAZY_WRITEOUT))) == 0;
+}
+
+static void bm_set_page_io_err(struct page *page)
+{
+	set_bit(BM_PAGE_IO_ERROR, &page_private(page));
+}
+
+static void bm_clear_page_io_err(struct page *page)
+{
+	clear_bit(BM_PAGE_IO_ERROR, &page_private(page));
+}
+
+static void bm_set_page_lazy_writeout(struct page *page)
+{
+	set_bit(BM_PAGE_LAZY_WRITEOUT, &page_private(page));
+}
+
+static int bm_test_page_lazy_writeout(struct page *page)
+{
+	return test_bit(BM_PAGE_LAZY_WRITEOUT, &page_private(page));
+}
+
+/* on a 32bit box, this would allow for exactly (2<<38) bits. */
+static unsigned int bm_word_to_page_idx(struct drbd_bitmap *b, unsigned long long_nr)
+{
+	/* page_nr = (word*sizeof(long)) >> PAGE_SHIFT; */
+	unsigned int page_nr = long_nr >> (PAGE_SHIFT - LN2_BPL + 3);
+	BUG_ON(page_nr >= b->bm_number_of_pages);
+	return page_nr;
+}
+
+static unsigned int bm_bit_to_page_idx(struct drbd_bitmap *b, u64 bitnr)
+{
+	/* page_nr = (bitnr/8) >> PAGE_SHIFT; */
+	unsigned int page_nr = bitnr >> (PAGE_SHIFT + 3);
+	BUG_ON(page_nr >= b->bm_number_of_pages);
+	return page_nr;
+}
+
+static unsigned long *__bm_map_pidx(struct drbd_bitmap *b, unsigned int idx, const enum km_type km)
+{
+	struct page *page = b->bm_pages[idx];
+	return (unsigned long *) kmap_atomic(page, km);
+}
+
+static unsigned long *bm_map_pidx(struct drbd_bitmap *b, unsigned int idx)
+{
+	return __bm_map_pidx(b, idx, KM_IRQ1);
+}
+
+static void __bm_unmap(unsigned long *p_addr, const enum km_type km)
+{
+	kunmap_atomic(p_addr, km);
+};
+
+static void bm_unmap(unsigned long *p_addr)
+{
+	return __bm_unmap(p_addr, KM_IRQ1);
+}
+
+/* long word offset of _bitmap_ sector */
+#define S2W(s)	((s)<<(BM_EXT_SHIFT-BM_BLOCK_SHIFT-LN2_BPL))
+/* word offset from start of bitmap to word number _in_page_
+ * modulo longs per page
+#define MLPP(X) ((X) % (PAGE_SIZE/sizeof(long))
+ hm, well, Philipp thinks gcc might not optimize the % into & (... - 1)
+ so do it explicitly:
+ */
+#define MLPP(X) ((X) & ((PAGE_SIZE/sizeof(long))-1))
+
+/* Long words per page */
+#define LWPP (PAGE_SIZE/sizeof(long))
+
+/*
+ * actually most functions herein should take a struct drbd_bitmap*, not a
+ * struct drbd_conf*, but for the debug macros I like to have the mdev around
+ * to be able to report device specific.
+ */
+
+
+static void bm_free_pages(struct page **pages, unsigned long number)
+{
+	unsigned long i;
+	if (!pages)
+		return;
+
+	for (i = 0; i < number; i++) {
+		if (!pages[i]) {
+			printk(KERN_ALERT "drbd: bm_free_pages tried to free "
+					  "a NULL pointer; i=%lu n=%lu\n",
+					  i, number);
+			continue;
+		}
+		__free_page(pages[i]);
+		pages[i] = NULL;
+	}
+}
+
+static void bm_vk_free(void *ptr, int v)
+{
+	if (v)
+		vfree(ptr);
+	else
+		kfree(ptr);
+}
+
+/*
+ * "have" and "want" are NUMBER OF PAGES.
+ */
+static struct page **bm_realloc_pages(struct drbd_bitmap *b, unsigned long want)
+{
+	struct page **old_pages = b->bm_pages;
+	struct page **new_pages, *page;
+	unsigned int i, bytes, vmalloced = 0;
+	unsigned long have = b->bm_number_of_pages;
+
+	BUG_ON(have == 0 && old_pages != NULL);
+	BUG_ON(have != 0 && old_pages == NULL);
+
+	if (have == want)
+		return old_pages;
+
+	/* Trying kmalloc first, falling back to vmalloc.
+	 * GFP_KERNEL is ok, as this is done when a lower level disk is
+	 * "attached" to the drbd.  Context is receiver thread or cqueue
+	 * thread.  As we have no disk yet, we are not in the IO path,
+	 * not even the IO path of the peer. */
+	bytes = sizeof(struct page *)*want;
+	new_pages = kmalloc(bytes, GFP_KERNEL);
+	if (!new_pages) {
+		new_pages = vmalloc(bytes);
+		if (!new_pages)
+			return NULL;
+		vmalloced = 1;
+	}
+
+	memset(new_pages, 0, bytes);
+	if (want >= have) {
+		for (i = 0; i < have; i++)
+			new_pages[i] = old_pages[i];
+		for (; i < want; i++) {
+			page = alloc_page(GFP_HIGHUSER);
+			if (!page) {
+				bm_free_pages(new_pages + have, i - have);
+				bm_vk_free(new_pages, vmalloced);
+				return NULL;
+			}
+			/* we want to know which page it is
+			 * from the endio handlers */
+			bm_store_page_idx(page, i);
+			new_pages[i] = page;
+		}
+	} else {
+		for (i = 0; i < want; i++)
+			new_pages[i] = old_pages[i];
+		/* NOT HERE, we are outside the spinlock!
+		bm_free_pages(old_pages + want, have - want);
+		*/
+	}
+
+	if (vmalloced)
+		b->bm_flags |= BM_P_VMALLOCED;
+	else
+		b->bm_flags &= ~BM_P_VMALLOCED;
+
+	return new_pages;
+}
+
+/*
+ * called on driver init only. TODO call when a device is created.
+ * allocates the drbd_bitmap, and stores it in mdev->bitmap.
+ */
+int drbd_bm_init(struct drbd_conf *mdev)
+{
+	struct drbd_bitmap *b = mdev->bitmap;
+	WARN_ON(b != NULL);
+	b = kzalloc(sizeof(struct drbd_bitmap), GFP_KERNEL);
+	if (!b)
+		return -ENOMEM;
+	spin_lock_init(&b->bm_lock);
+	mutex_init(&b->bm_change);
+	init_waitqueue_head(&b->bm_io_wait);
+
+	mdev->bitmap = b;
+
+	return 0;
+}
+
+sector_t drbd_bm_capacity(struct drbd_conf *mdev)
+{
+	ERR_IF(!mdev->bitmap) return 0;
+	return mdev->bitmap->bm_dev_capacity;
+}
+
+/* called on driver unload. TODO: call when a device is destroyed.
+ */
+void drbd_bm_cleanup(struct drbd_conf *mdev)
+{
+	ERR_IF (!mdev->bitmap) return;
+	bm_free_pages(mdev->bitmap->bm_pages, mdev->bitmap->bm_number_of_pages);
+	bm_vk_free(mdev->bitmap->bm_pages, (BM_P_VMALLOCED & mdev->bitmap->bm_flags));
+	kfree(mdev->bitmap);
+	mdev->bitmap = NULL;
+}
+
+/*
+ * since (b->bm_bits % BITS_PER_LONG) != 0,
+ * this masks out the remaining bits.
+ * Returns the number of bits cleared.
+ */
+#define BITS_PER_PAGE		(1UL << (PAGE_SHIFT + 3))
+#define BITS_PER_PAGE_MASK	(BITS_PER_PAGE - 1)
+#define BITS_PER_LONG_MASK	(BITS_PER_LONG - 1)
+static int bm_clear_surplus(struct drbd_bitmap *b)
+{
+	unsigned long mask;
+	unsigned long *p_addr, *bm;
+	int tmp;
+	int cleared = 0;
+
+	/* number of bits modulo bits per page */
+	tmp = (b->bm_bits & BITS_PER_PAGE_MASK);
+	/* mask the used bits of the word containing the last bit */
+	mask = (1UL << (tmp & BITS_PER_LONG_MASK)) -1;
+	/* bitmap is always stored little endian,
+	 * on disk and in core memory alike */
+	mask = cpu_to_lel(mask);
+
+	p_addr = bm_map_pidx(b, b->bm_number_of_pages - 1);
+	bm = p_addr + (tmp/BITS_PER_LONG);
+	if (mask) {
+		/* If mask != 0, we are not exactly aligned, so bm now points
+		 * to the long containing the last bit.
+		 * If mask == 0, bm already points to the word immediately
+		 * after the last (long word aligned) bit. */
+		cleared = hweight_long(*bm & ~mask);
+		*bm &= mask;
+		bm++;
+	}
+
+	if (BITS_PER_LONG == 32 && ((bm - p_addr) & 1) == 1) {
+		/* on a 32bit arch, we may need to zero out
+		 * a padding long to align with a 64bit remote */
+		cleared += hweight_long(*bm);
+		*bm = 0;
+	}
+	bm_unmap(p_addr);
+	return cleared;
+}
+
+static void bm_set_surplus(struct drbd_bitmap *b)
+{
+	unsigned long mask;
+	unsigned long *p_addr, *bm;
+	int tmp;
+
+	/* number of bits modulo bits per page */
+	tmp = (b->bm_bits & BITS_PER_PAGE_MASK);
+	/* mask the used bits of the word containing the last bit */
+	mask = (1UL << (tmp & BITS_PER_LONG_MASK)) -1;
+	/* bitmap is always stored little endian,
+	 * on disk and in core memory alike */
+	mask = cpu_to_lel(mask);
+
+	p_addr = bm_map_pidx(b, b->bm_number_of_pages - 1);
+	bm = p_addr + (tmp/BITS_PER_LONG);
+	if (mask) {
+		/* If mask != 0, we are not exactly aligned, so bm now points
+		 * to the long containing the last bit.
+		 * If mask == 0, bm already points to the word immediately
+		 * after the last (long word aligned) bit. */
+		*bm |= ~mask;
+		bm++;
+	}
+
+	if (BITS_PER_LONG == 32 && ((bm - p_addr) & 1) == 1) {
+		/* on a 32bit arch, we may need to zero out
+		 * a padding long to align with a 64bit remote */
+		*bm = ~0UL;
+	}
+	bm_unmap(p_addr);
+}
+
+/* you better not modify the bitmap while this is running,
+ * or its results will be stale */
+static unsigned long bm_count_bits(struct drbd_bitmap *b)
+{
+	unsigned long *p_addr;
+	unsigned long bits = 0;
+	unsigned long mask = (1UL << (b->bm_bits & BITS_PER_LONG_MASK)) -1;
+	int idx, i, last_word;
+
+	/* all but last page */
+	for (idx = 0; idx < b->bm_number_of_pages - 1; idx++) {
+		p_addr = __bm_map_pidx(b, idx, KM_USER0);
+		for (i = 0; i < LWPP; i++)
+			bits += hweight_long(p_addr[i]);
+		__bm_unmap(p_addr, KM_USER0);
+		cond_resched();
+	}
+	/* last (or only) page */
+	last_word = ((b->bm_bits - 1) & BITS_PER_PAGE_MASK) >> LN2_BPL;
+	p_addr = __bm_map_pidx(b, idx, KM_USER0);
+	for (i = 0; i < last_word; i++)
+		bits += hweight_long(p_addr[i]);
+	p_addr[last_word] &= cpu_to_lel(mask);
+	bits += hweight_long(p_addr[last_word]);
+	/* 32bit arch, may have an unused padding long */
+	if (BITS_PER_LONG == 32 && (last_word & 1) == 0)
+		p_addr[last_word+1] = 0;
+	__bm_unmap(p_addr, KM_USER0);
+	return bits;
+}
+
+/* offset and len in long words.*/
+static void bm_memset(struct drbd_bitmap *b, size_t offset, int c, size_t len)
+{
+	unsigned long *p_addr, *bm;
+	unsigned int idx;
+	size_t do_now, end;
+
+	end = offset + len;
+
+	if (end > b->bm_words) {
+		printk(KERN_ALERT "drbd: bm_memset end > bm_words\n");
+		return;
+	}
+
+	while (offset < end) {
+		do_now = min_t(size_t, ALIGN(offset + 1, LWPP), end) - offset;
+		idx = bm_word_to_page_idx(b, offset);
+		p_addr = bm_map_pidx(b, idx);
+		bm = p_addr + MLPP(offset);
+		if (bm+do_now > p_addr + LWPP) {
+			printk(KERN_ALERT "drbd: BUG BUG BUG! p_addr:%p bm:%p do_now:%d\n",
+			       p_addr, bm, (int)do_now);
+		} else
+			memset(bm, c, do_now * sizeof(long));
+		bm_unmap(p_addr);
+		bm_set_page_need_writeout(b->bm_pages[idx]);
+		offset += do_now;
+	}
+}
+
+/*
+ * make sure the bitmap has enough room for the attached storage,
+ * if necessary, resize.
+ * called whenever we may have changed the device size.
+ * returns -ENOMEM if we could not allocate enough memory, 0 on success.
+ * In case this is actually a resize, we copy the old bitmap into the new one.
+ * Otherwise, the bitmap is initialized to all bits set.
+ */
+int drbd_bm_resize(struct drbd_conf *mdev, sector_t capacity, int set_new_bits)
+{
+	struct drbd_bitmap *b = mdev->bitmap;
+	unsigned long bits, words, owords, obits;
+	unsigned long want, have, onpages; /* number of pages */
+	struct page **npages, **opages = NULL;
+	int err = 0, growing;
+	int opages_vmalloced;
+
+	ERR_IF(!b) return -ENOMEM;
+
+	drbd_bm_lock(mdev, "resize", BM_LOCKED_MASK);
+
+	dev_info(DEV, "drbd_bm_resize called with capacity == %llu\n",
+			(unsigned long long)capacity);
+
+	if (capacity == b->bm_dev_capacity)
+		goto out;
+
+	opages_vmalloced = (BM_P_VMALLOCED & b->bm_flags);
+
+	if (capacity == 0) {
+		spin_lock_irq(&b->bm_lock);
+		opages = b->bm_pages;
+		onpages = b->bm_number_of_pages;
+		owords = b->bm_words;
+		b->bm_pages = NULL;
+		b->bm_number_of_pages =
+		b->bm_set   =
+		b->bm_bits  =
+		b->bm_words =
+		b->bm_dev_capacity = 0;
+		spin_unlock_irq(&b->bm_lock);
+		bm_free_pages(opages, onpages);
+		bm_vk_free(opages, opages_vmalloced);
+		goto out;
+	}
+	bits  = BM_SECT_TO_BIT(ALIGN(capacity, BM_SECT_PER_BIT));
+
+	/* if we would use
+	   words = ALIGN(bits,BITS_PER_LONG) >> LN2_BPL;
+	   a 32bit host could present the wrong number of words
+	   to a 64bit host.
+	*/
+	words = ALIGN(bits, 64) >> LN2_BPL;
+
+	if (get_ldev(mdev)) {
+		u64 bits_on_disk = ((u64)mdev->ldev->md.md_size_sect-MD_BM_OFFSET) << 12;
+		put_ldev(mdev);
+		if (bits > bits_on_disk) {
+			dev_info(DEV, "bits = %lu\n", bits);
+			dev_info(DEV, "bits_on_disk = %llu\n", bits_on_disk);
+			err = -ENOSPC;
+			goto out;
+		}
+	}
+
+	want = ALIGN(words*sizeof(long), PAGE_SIZE) >> PAGE_SHIFT;
+	have = b->bm_number_of_pages;
+	if (want == have) {
+		D_ASSERT(b->bm_pages != NULL);
+		npages = b->bm_pages;
+	} else {
+		if (drbd_insert_fault(mdev, DRBD_FAULT_BM_ALLOC))
+			npages = NULL;
+		else
+			npages = bm_realloc_pages(b, want);
+	}
+
+	if (!npages) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	spin_lock_irq(&b->bm_lock);
+	opages = b->bm_pages;
+	owords = b->bm_words;
+	obits  = b->bm_bits;
+
+	growing = bits > obits;
+	if (opages && growing && set_new_bits)
+		bm_set_surplus(b);
+
+	b->bm_pages = npages;
+	b->bm_number_of_pages = want;
+	b->bm_bits  = bits;
+	b->bm_words = words;
+	b->bm_dev_capacity = capacity;
+
+	if (growing) {
+		if (set_new_bits) {
+			bm_memset(b, owords, 0xff, words-owords);
+			b->bm_set += bits - obits;
+		} else
+			bm_memset(b, owords, 0x00, words-owords);
+
+	}
+
+	if (want < have) {
+		/* implicit: (opages != NULL) && (opages != npages) */
+		bm_free_pages(opages + want, have - want);
+	}
+
+	(void)bm_clear_surplus(b);
+
+	spin_unlock_irq(&b->bm_lock);
+	if (opages != npages)
+		bm_vk_free(opages, opages_vmalloced);
+	if (!growing)
+		b->bm_set = bm_count_bits(b);
+	dev_info(DEV, "resync bitmap: bits=%lu words=%lu pages=%lu\n", bits, words, want);
+
+ out:
+	drbd_bm_unlock(mdev);
+	return err;
+}
+
+/* inherently racy:
+ * if not protected by other means, return value may be out of date when
+ * leaving this function...
+ * we still need to lock it, since it is important that this returns
+ * bm_set == 0 precisely.
+ *
+ * maybe bm_set should be atomic_t ?
+ */
+unsigned long _drbd_bm_total_weight(struct drbd_conf *mdev)
+{
+	struct drbd_bitmap *b = mdev->bitmap;
+	unsigned long s;
+	unsigned long flags;
+
+	ERR_IF(!b) return 0;
+	ERR_IF(!b->bm_pages) return 0;
+
+	spin_lock_irqsave(&b->bm_lock, flags);
+	s = b->bm_set;
+	spin_unlock_irqrestore(&b->bm_lock, flags);
+
+	return s;
+}
+
+unsigned long drbd_bm_total_weight(struct drbd_conf *mdev)
+{
+	unsigned long s;
+	/* if I don't have a disk, I don't know about out-of-sync status */
+	if (!get_ldev_if_state(mdev, D_NEGOTIATING))
+		return 0;
+	s = _drbd_bm_total_weight(mdev);
+	put_ldev(mdev);
+	return s;
+}
+
+size_t drbd_bm_words(struct drbd_conf *mdev)
+{
+	struct drbd_bitmap *b = mdev->bitmap;
+	ERR_IF(!b) return 0;
+	ERR_IF(!b->bm_pages) return 0;
+
+	return b->bm_words;
+}
+
+unsigned long drbd_bm_bits(struct drbd_conf *mdev)
+{
+	struct drbd_bitmap *b = mdev->bitmap;
+	ERR_IF(!b) return 0;
+
+	return b->bm_bits;
+}
+
+/* merge number words from buffer into the bitmap starting at offset.
+ * buffer[i] is expected to be little endian unsigned long.
+ * bitmap must be locked by drbd_bm_lock.
+ * currently only used from receive_bitmap.
+ */
+void drbd_bm_merge_lel(struct drbd_conf *mdev, size_t offset, size_t number,
+			unsigned long *buffer)
+{
+	struct drbd_bitmap *b = mdev->bitmap;
+	unsigned long *p_addr, *bm;
+	unsigned long word, bits;
+	unsigned int idx;
+	size_t end, do_now;
+
+	end = offset + number;
+
+	ERR_IF(!b) return;
+	ERR_IF(!b->bm_pages) return;
+	if (number == 0)
+		return;
+	WARN_ON(offset >= b->bm_words);
+	WARN_ON(end    >  b->bm_words);
+
+	spin_lock_irq(&b->bm_lock);
+	while (offset < end) {
+		do_now = min_t(size_t, ALIGN(offset+1, LWPP), end) - offset;
+		idx = bm_word_to_page_idx(b, offset);
+		p_addr = bm_map_pidx(b, idx);
+		bm = p_addr + MLPP(offset);
+		offset += do_now;
+		while (do_now--) {
+			bits = hweight_long(*bm);
+			word = *bm | *buffer++;
+			*bm++ = word;
+			b->bm_set += hweight_long(word) - bits;
+		}
+		bm_unmap(p_addr);
+		bm_set_page_need_writeout(b->bm_pages[idx]);
+	}
+	/* with 32bit <-> 64bit cross-platform connect
+	 * this is only correct for current usage,
+	 * where we _know_ that we are 64 bit aligned,
+	 * and know that this function is used in this way, too...
+	 */
+	if (end == b->bm_words)
+		b->bm_set -= bm_clear_surplus(b);
+	spin_unlock_irq(&b->bm_lock);
+}
+
+/* copy number words from the bitmap starting at offset into the buffer.
+ * buffer[i] will be little endian unsigned long.
+ */
+void drbd_bm_get_lel(struct drbd_conf *mdev, size_t offset, size_t number,
+		     unsigned long *buffer)
+{
+	struct drbd_bitmap *b = mdev->bitmap;
+	unsigned long *p_addr, *bm;
+	size_t end, do_now;
+
+	end = offset + number;
+
+	ERR_IF(!b) return;
+	ERR_IF(!b->bm_pages) return;
+
+	spin_lock_irq(&b->bm_lock);
+	if ((offset >= b->bm_words) ||
+	    (end    >  b->bm_words) ||
+	    (number <= 0))
+		dev_err(DEV, "offset=%lu number=%lu bm_words=%lu\n",
+			(unsigned long)	offset,
+			(unsigned long)	number,
+			(unsigned long) b->bm_words);
+	else {
+		while (offset < end) {
+			do_now = min_t(size_t, ALIGN(offset+1, LWPP), end) - offset;
+			p_addr = bm_map_pidx(b, bm_word_to_page_idx(b, offset));
+			bm = p_addr + MLPP(offset);
+			offset += do_now;
+			while (do_now--)
+				*buffer++ = *bm++;
+			bm_unmap(p_addr);
+		}
+	}
+	spin_unlock_irq(&b->bm_lock);
+}
+
+/* set all bits in the bitmap */
+void drbd_bm_set_all(struct drbd_conf *mdev)
+{
+	struct drbd_bitmap *b = mdev->bitmap;
+	ERR_IF(!b) return;
+	ERR_IF(!b->bm_pages) return;
+
+	spin_lock_irq(&b->bm_lock);
+	bm_memset(b, 0, 0xff, b->bm_words);
+	(void)bm_clear_surplus(b);
+	b->bm_set = b->bm_bits;
+	spin_unlock_irq(&b->bm_lock);
+}
+
+/* clear all bits in the bitmap */
+void drbd_bm_clear_all(struct drbd_conf *mdev)
+{
+	struct drbd_bitmap *b = mdev->bitmap;
+	ERR_IF(!b) return;
+	ERR_IF(!b->bm_pages) return;
+
+	spin_lock_irq(&b->bm_lock);
+	bm_memset(b, 0, 0, b->bm_words);
+	b->bm_set = 0;
+	spin_unlock_irq(&b->bm_lock);
+}
+
+struct bm_aio_ctx {
+	struct drbd_conf *mdev;
+	atomic_t in_flight;
+	struct completion done;
+	unsigned flags;
+#define BM_AIO_COPY_PAGES	1
+	int error;
+};
+
+/* bv_page may be a copy, or may be the original */
+static void bm_async_io_complete(struct bio *bio, int error)
+{
+	struct bm_aio_ctx *ctx = bio->bi_private;
+	struct drbd_conf *mdev = ctx->mdev;
+	struct drbd_bitmap *b = mdev->bitmap;
+	unsigned int idx = bm_page_to_idx(bio->bi_io_vec[0].bv_page);
+	int uptodate = bio_flagged(bio, BIO_UPTODATE);
+
+
+	/* strange behavior of some lower level drivers...
+	 * fail the request by clearing the uptodate flag,
+	 * but do not return any error?!
+	 * do we want to WARN() on this? */
+	if (!error && !uptodate)
+		error = -EIO;
+
+	if ((ctx->flags & BM_AIO_COPY_PAGES) == 0 &&
+	    !bm_test_page_unchanged(b->bm_pages[idx]))
+		dev_warn(DEV, "bitmap page idx %u changed during IO!\n", idx);
+
+	if (error) {
+		/* ctx error will hold the completed-last non-zero error code,
+		 * in case error codes differ. */
+		ctx->error = error;
+		bm_set_page_io_err(b->bm_pages[idx]);
+		/* Not identical to on disk version of it.
+		 * Is BM_PAGE_IO_ERROR enough? */
+		if (__ratelimit(&drbd_ratelimit_state))
+			dev_err(DEV, "IO ERROR %d on bitmap page idx %u\n",
+					error, idx);
+	} else {
+		bm_clear_page_io_err(b->bm_pages[idx]);
+		dynamic_dev_dbg(DEV, "bitmap page idx %u completed\n", idx);
+	}
+
+	bm_page_unlock_io(mdev, idx);
+
+	/* FIXME give back to page pool */
+	if (ctx->flags & BM_AIO_COPY_PAGES)
+		put_page(bio->bi_io_vec[0].bv_page);
+
+	bio_put(bio);
+
+	if (atomic_dec_and_test(&ctx->in_flight))
+		complete(&ctx->done);
+}
+
+static void bm_page_io_async(struct bm_aio_ctx *ctx, int page_nr, int rw) __must_hold(local)
+{
+	/* we are process context. we always get a bio */
+	struct bio *bio = bio_alloc(GFP_KERNEL, 1);
+	struct drbd_conf *mdev = ctx->mdev;
+	struct drbd_bitmap *b = mdev->bitmap;
+	struct page *page;
+	unsigned int len;
+
+	sector_t on_disk_sector =
+		mdev->ldev->md.md_offset + mdev->ldev->md.bm_offset;
+	on_disk_sector += ((sector_t)page_nr) << (PAGE_SHIFT-9);
+
+	/* this might happen with very small
+	 * flexible external meta data device,
+	 * or with PAGE_SIZE > 4k */
+	len = min_t(unsigned int, PAGE_SIZE,
+		(drbd_md_last_sector(mdev->ldev) - on_disk_sector + 1)<<9);
+
+	/* serialize IO on this page */
+	bm_page_lock_io(mdev, page_nr);
+	/* before memcpy and submit,
+	 * so it can be redirtied any time */
+	bm_set_page_unchanged(b->bm_pages[page_nr]);
+
+	if (ctx->flags & BM_AIO_COPY_PAGES) {
+		/* FIXME alloc_page is good enough for now, but actually needs
+		 * to use pre-allocated page pool */
+		void *src, *dest;
+		page = alloc_page(__GFP_HIGHMEM|__GFP_WAIT);
+		dest = kmap_atomic(page, KM_USER0);
+		src = kmap_atomic(b->bm_pages[page_nr], KM_USER1);
+		memcpy(dest, src, PAGE_SIZE);
+		kunmap_atomic(src, KM_USER1);
+		kunmap_atomic(dest, KM_USER0);
+		bm_store_page_idx(page, page_nr);
+	} else
+		page = b->bm_pages[page_nr];
+
+	bio->bi_bdev = mdev->ldev->md_bdev;
+	bio->bi_sector = on_disk_sector;
+	bio_add_page(bio, page, len, 0);
+	bio->bi_private = ctx;
+	bio->bi_end_io = bm_async_io_complete;
+
+	if (drbd_insert_fault(mdev, (rw & WRITE) ? DRBD_FAULT_MD_WR : DRBD_FAULT_MD_RD)) {
+		bio->bi_rw |= rw;
+		bio_endio(bio, -EIO);
+	} else {
+		submit_bio(rw, bio);
+		/* this should not count as user activity and cause the
+		 * resync to throttle -- see drbd_rs_should_slow_down(). */
+		atomic_add(len >> 9, &mdev->rs_sect_ev);
+	}
+}
+
+/*
+ * bm_rw: read/write the whole bitmap from/to its on disk location.
+ */
+static int bm_rw(struct drbd_conf *mdev, int rw, unsigned lazy_writeout_upper_idx) __must_hold(local)
+{
+	struct bm_aio_ctx ctx = {
+		.mdev = mdev,
+		.in_flight = ATOMIC_INIT(1),
+		.done = COMPLETION_INITIALIZER_ONSTACK(ctx.done),
+		.flags = lazy_writeout_upper_idx ? BM_AIO_COPY_PAGES : 0,
+	};
+	struct drbd_bitmap *b = mdev->bitmap;
+	int num_pages, i, count = 0;
+	unsigned long now;
+	char ppb[10];
+	int err = 0;
+
+	/*
+	 * We are protected against bitmap disappearing/resizing by holding an
+	 * ldev reference (caller must have called get_ldev()).
+	 * For read/write, we are protected against changes to the bitmap by
+	 * the bitmap lock (see drbd_bitmap_io).
+	 * For lazy writeout, we don't care for ongoing changes to the bitmap,
+	 * as we submit copies of pages anyways.
+	 */
+	if (!ctx.flags)
+		WARN_ON(!(BM_LOCKED_MASK & b->bm_flags));
+
+	num_pages = b->bm_number_of_pages;
+
+	now = jiffies;
+
+	/* let the layers below us try to merge these bios... */
+	for (i = 0; i < num_pages; i++) {
+		/* ignore completely unchanged pages */
+		if (lazy_writeout_upper_idx && i == lazy_writeout_upper_idx)
+			break;
+		if (rw & WRITE) {
+			if (bm_test_page_unchanged(b->bm_pages[i])) {
+				dynamic_dev_dbg(DEV, "skipped bm write for idx %u\n", i);
+				continue;
+			}
+			/* during lazy writeout,
+			 * ignore those pages not marked for lazy writeout. */
+			if (lazy_writeout_upper_idx &&
+			    !bm_test_page_lazy_writeout(b->bm_pages[i])) {
+				dynamic_dev_dbg(DEV, "skipped bm lazy write for idx %u\n", i);
+				continue;
+			}
+		}
+		atomic_inc(&ctx.in_flight);
+		bm_page_io_async(&ctx, i, rw);
+		++count;
+		cond_resched();
+	}
+
+	/*
+	 * We initialize ctx.in_flight to one to make sure bm_async_io_complete
+	 * will not complete() early, and decrement / test it here.  If there
+	 * are still some bios in flight, we need to wait for them here.
+	 */
+	if (!atomic_dec_and_test(&ctx.in_flight))
+		wait_for_completion(&ctx.done);
+	dev_info(DEV, "bitmap %s of %u pages took %lu jiffies\n",
+			rw == WRITE ? "WRITE" : "READ",
+			count, jiffies - now);
+
+	if (ctx.error) {
+		dev_alert(DEV, "we had at least one MD IO ERROR during bitmap IO\n");
+		drbd_chk_io_error(mdev, 1, true);
+		err = -EIO; /* ctx.error ? */
+	}
+
+	now = jiffies;
+	if (rw == WRITE) {
+		drbd_md_flush(mdev);
+	} else /* rw == READ */ {
+		b->bm_set = bm_count_bits(b);
+		dev_info(DEV, "recounting of set bits took additional %lu jiffies\n",
+		     jiffies - now);
+	}
+	now = b->bm_set;
+
+	dev_info(DEV, "%s (%lu bits) marked out-of-sync by on disk bit-map.\n",
+	     ppsize(ppb, now << (BM_BLOCK_SHIFT-10)), now);
+
+	return err;
+}
+
+/**
+ * drbd_bm_read() - Read the whole bitmap from its on disk location.
+ * @mdev:	DRBD device.
+ */
+int drbd_bm_read(struct drbd_conf *mdev) __must_hold(local)
+{
+	return bm_rw(mdev, READ, 0);
+}
+
+/**
+ * drbd_bm_write() - Write the whole bitmap to its on disk location.
+ * @mdev:	DRBD device.
+ *
+ * Will only write pages that have changed since last IO.
+ */
+int drbd_bm_write(struct drbd_conf *mdev) __must_hold(local)
+{
+	return bm_rw(mdev, WRITE, 0);
+}
+
+/**
+ * drbd_bm_lazy_write_out() - Write bitmap pages 0 to @upper_idx-1, if they have changed.
+ * @mdev:	DRBD device.
+ * @upper_idx:	0: write all changed pages; +ve: page index to stop scanning for changed pages
+ */
+int drbd_bm_write_lazy(struct drbd_conf *mdev, unsigned upper_idx) __must_hold(local)
+{
+	return bm_rw(mdev, WRITE, upper_idx);
+}
+
+
+/**
+ * drbd_bm_write_page: Writes a PAGE_SIZE aligned piece of bitmap
+ * @mdev:	DRBD device.
+ * @idx:	bitmap page index
+ *
+ * We don't want to special case on logical_block_size of the backend device,
+ * so we submit PAGE_SIZE aligned pieces.
+ * Note that on "most" systems, PAGE_SIZE is 4k.
+ *
+ * In case this becomes an issue on systems with larger PAGE_SIZE,
+ * we may want to change this again to write 4k aligned 4k pieces.
+ */
+int drbd_bm_write_page(struct drbd_conf *mdev, unsigned int idx) __must_hold(local)
+{
+	struct bm_aio_ctx ctx = {
+		.mdev = mdev,
+		.in_flight = ATOMIC_INIT(1),
+		.done = COMPLETION_INITIALIZER_ONSTACK(ctx.done),
+		.flags = BM_AIO_COPY_PAGES,
+	};
+
+	if (bm_test_page_unchanged(mdev->bitmap->bm_pages[idx])) {
+		dynamic_dev_dbg(DEV, "skipped bm page write for idx %u\n", idx);
+		return 0;
+	}
+
+	bm_page_io_async(&ctx, idx, WRITE_SYNC);
+	wait_for_completion(&ctx.done);
+
+	if (ctx.error)
+		drbd_chk_io_error(mdev, 1, true);
+		/* that should force detach, so the in memory bitmap will be
+		 * gone in a moment as well. */
+
+	mdev->bm_writ_cnt++;
+	return ctx.error;
+}
+
+/* NOTE
+ * find_first_bit returns int, we return unsigned long.
+ * For this to work on 32bit arch with bitnumbers > (1<<32),
+ * we'd need to return u64, and get a whole lot of other places
+ * fixed where we still use unsigned long.
+ *
+ * this returns a bit number, NOT a sector!
+ */
+static unsigned long __bm_find_next(struct drbd_conf *mdev, unsigned long bm_fo,
+	const int find_zero_bit, const enum km_type km)
+{
+	struct drbd_bitmap *b = mdev->bitmap;
+	unsigned long *p_addr;
+	unsigned long bit_offset;
+	unsigned i;
+
+
+	if (bm_fo > b->bm_bits) {
+		dev_err(DEV, "bm_fo=%lu bm_bits=%lu\n", bm_fo, b->bm_bits);
+		bm_fo = DRBD_END_OF_BITMAP;
+	} else {
+		while (bm_fo < b->bm_bits) {
+			/* bit offset of the first bit in the page */
+			bit_offset = bm_fo & ~BITS_PER_PAGE_MASK;
+			p_addr = __bm_map_pidx(b, bm_bit_to_page_idx(b, bm_fo), km);
+
+			if (find_zero_bit)
+				i = find_next_zero_bit_le(p_addr,
+						PAGE_SIZE*8, bm_fo & BITS_PER_PAGE_MASK);
+			else
+				i = find_next_bit_le(p_addr,
+						PAGE_SIZE*8, bm_fo & BITS_PER_PAGE_MASK);
+
+			__bm_unmap(p_addr, km);
+			if (i < PAGE_SIZE*8) {
+				bm_fo = bit_offset + i;
+				if (bm_fo >= b->bm_bits)
+					break;
+				goto found;
+			}
+			bm_fo = bit_offset + PAGE_SIZE*8;
+		}
+		bm_fo = DRBD_END_OF_BITMAP;
+	}
+ found:
+	return bm_fo;
+}
+
+static unsigned long bm_find_next(struct drbd_conf *mdev,
+	unsigned long bm_fo, const int find_zero_bit)
+{
+	struct drbd_bitmap *b = mdev->bitmap;
+	unsigned long i = DRBD_END_OF_BITMAP;
+
+	ERR_IF(!b) return i;
+	ERR_IF(!b->bm_pages) return i;
+
+	spin_lock_irq(&b->bm_lock);
+	if (BM_DONT_TEST & b->bm_flags)
+		bm_print_lock_info(mdev);
+
+	i = __bm_find_next(mdev, bm_fo, find_zero_bit, KM_IRQ1);
+
+	spin_unlock_irq(&b->bm_lock);
+	return i;
+}
+
+unsigned long drbd_bm_find_next(struct drbd_conf *mdev, unsigned long bm_fo)
+{
+	return bm_find_next(mdev, bm_fo, 0);
+}
+
+#if 0
+/* not yet needed for anything. */
+unsigned long drbd_bm_find_next_zero(struct drbd_conf *mdev, unsigned long bm_fo)
+{
+	return bm_find_next(mdev, bm_fo, 1);
+}
+#endif
+
+/* does not spin_lock_irqsave.
+ * you must take drbd_bm_lock() first */
+unsigned long _drbd_bm_find_next(struct drbd_conf *mdev, unsigned long bm_fo)
+{
+	/* WARN_ON(!(BM_DONT_SET & mdev->b->bm_flags)); */
+	return __bm_find_next(mdev, bm_fo, 0, KM_USER1);
+}
+
+unsigned long _drbd_bm_find_next_zero(struct drbd_conf *mdev, unsigned long bm_fo)
+{
+	/* WARN_ON(!(BM_DONT_SET & mdev->b->bm_flags)); */
+	return __bm_find_next(mdev, bm_fo, 1, KM_USER1);
+}
+
+/* returns number of bits actually changed.
+ * for val != 0, we change 0 -> 1, return code positive
+ * for val == 0, we change 1 -> 0, return code negative
+ * wants bitnr, not sector.
+ * expected to be called for only a few bits (e - s about BITS_PER_LONG).
+ * Must hold bitmap lock already. */
+static int __bm_change_bits_to(struct drbd_conf *mdev, const unsigned long s,
+	unsigned long e, int val)
+{
+	struct drbd_bitmap *b = mdev->bitmap;
+	unsigned long *p_addr = NULL;
+	unsigned long bitnr;
+	unsigned int last_page_nr = -1U;
+	int c = 0;
+	int changed_total = 0;
+
+	if (e >= b->bm_bits) {
+		dev_err(DEV, "ASSERT FAILED: bit_s=%lu bit_e=%lu bm_bits=%lu\n",
+				s, e, b->bm_bits);
+		e = b->bm_bits ? b->bm_bits -1 : 0;
+	}
+	for (bitnr = s; bitnr <= e; bitnr++) {
+		unsigned int page_nr = bm_bit_to_page_idx(b, bitnr);
+		if (page_nr != last_page_nr) {
+			if (p_addr)
+				__bm_unmap(p_addr, KM_IRQ1);
+			if (c < 0)
+				bm_set_page_lazy_writeout(b->bm_pages[last_page_nr]);
+			else if (c > 0)
+				bm_set_page_need_writeout(b->bm_pages[last_page_nr]);
+			changed_total += c;
+			c = 0;
+			p_addr = __bm_map_pidx(b, page_nr, KM_IRQ1);
+			last_page_nr = page_nr;
+		}
+		if (val)
+			c += (0 == __test_and_set_bit_le(bitnr & BITS_PER_PAGE_MASK, p_addr));
+		else
+			c -= (0 != __test_and_clear_bit_le(bitnr & BITS_PER_PAGE_MASK, p_addr));
+	}
+	if (p_addr)
+		__bm_unmap(p_addr, KM_IRQ1);
+	if (c < 0)
+		bm_set_page_lazy_writeout(b->bm_pages[last_page_nr]);
+	else if (c > 0)
+		bm_set_page_need_writeout(b->bm_pages[last_page_nr]);
+	changed_total += c;
+	b->bm_set += changed_total;
+	return changed_total;
+}
+
+/* returns number of bits actually changed.
+ * for val != 0, we change 0 -> 1, return code positive
+ * for val == 0, we change 1 -> 0, return code negative
+ * wants bitnr, not sector */
+static int bm_change_bits_to(struct drbd_conf *mdev, const unsigned long s,
+	const unsigned long e, int val)
+{
+	unsigned long flags;
+	struct drbd_bitmap *b = mdev->bitmap;
+	int c = 0;
+
+	ERR_IF(!b) return 1;
+	ERR_IF(!b->bm_pages) return 0;
+
+	spin_lock_irqsave(&b->bm_lock, flags);
+	if ((val ? BM_DONT_SET : BM_DONT_CLEAR) & b->bm_flags)
+		bm_print_lock_info(mdev);
+
+	c = __bm_change_bits_to(mdev, s, e, val);
+
+	spin_unlock_irqrestore(&b->bm_lock, flags);
+	return c;
+}
+
+/* returns number of bits changed 0 -> 1 */
+int drbd_bm_set_bits(struct drbd_conf *mdev, const unsigned long s, const unsigned long e)
+{
+	return bm_change_bits_to(mdev, s, e, 1);
+}
+
+/* returns number of bits changed 1 -> 0 */
+int drbd_bm_clear_bits(struct drbd_conf *mdev, const unsigned long s, const unsigned long e)
+{
+	return -bm_change_bits_to(mdev, s, e, 0);
+}
+
+/* sets all bits in full words,
+ * from first_word up to, but not including, last_word */
+static inline void bm_set_full_words_within_one_page(struct drbd_bitmap *b,
+		int page_nr, int first_word, int last_word)
+{
+	int i;
+	int bits;
+	unsigned long *paddr = kmap_atomic(b->bm_pages[page_nr], KM_IRQ1);
+	for (i = first_word; i < last_word; i++) {
+		bits = hweight_long(paddr[i]);
+		paddr[i] = ~0UL;
+		b->bm_set += BITS_PER_LONG - bits;
+	}
+	kunmap_atomic(paddr, KM_IRQ1);
+}
+
+/* Same thing as drbd_bm_set_bits,
+ * but more efficient for a large bit range.
+ * You must first drbd_bm_lock().
+ * Can be called to set the whole bitmap in one go.
+ * Sets bits from s to e _inclusive_. */
+void _drbd_bm_set_bits(struct drbd_conf *mdev, const unsigned long s, const unsigned long e)
+{
+	/* First set_bit from the first bit (s)
+	 * up to the next long boundary (sl),
+	 * then assign full words up to the last long boundary (el),
+	 * then set_bit up to and including the last bit (e).
+	 *
+	 * Do not use memset, because we must account for changes,
+	 * so we need to loop over the words with hweight() anyways.
+	 */
+	struct drbd_bitmap *b = mdev->bitmap;
+	unsigned long sl = ALIGN(s,BITS_PER_LONG);
+	unsigned long el = (e+1) & ~((unsigned long)BITS_PER_LONG-1);
+	int first_page;
+	int last_page;
+	int page_nr;
+	int first_word;
+	int last_word;
+
+	if (e - s <= 3*BITS_PER_LONG) {
+		/* don't bother; el and sl may even be wrong. */
+		spin_lock_irq(&b->bm_lock);
+		__bm_change_bits_to(mdev, s, e, 1);
+		spin_unlock_irq(&b->bm_lock);
+		return;
+	}
+
+	/* difference is large enough that we can trust sl and el */
+
+	spin_lock_irq(&b->bm_lock);
+
+	/* bits filling the current long */
+	if (sl)
+		__bm_change_bits_to(mdev, s, sl-1, 1);
+
+	first_page = sl >> (3 + PAGE_SHIFT);
+	last_page = el >> (3 + PAGE_SHIFT);
+
+	/* MLPP: modulo longs per page */
+	/* LWPP: long words per page */
+	first_word = MLPP(sl >> LN2_BPL);
+	last_word = LWPP;
+
+	/* first and full pages, unless first page == last page */
+	for (page_nr = first_page; page_nr < last_page; page_nr++) {
+		bm_set_full_words_within_one_page(mdev->bitmap, page_nr, first_word, last_word);
+		spin_unlock_irq(&b->bm_lock);
+		cond_resched();
+		first_word = 0;
+		spin_lock_irq(&b->bm_lock);
+	}
+
+	/* last page (respectively only page, for first page == last page) */
+	last_word = MLPP(el >> LN2_BPL);
+	bm_set_full_words_within_one_page(mdev->bitmap, last_page, first_word, last_word);
+
+	/* possibly trailing bits.
+	 * example: (e & 63) == 63, el will be e+1.
+	 * if that even was the very last bit,
+	 * it would trigger an assert in __bm_change_bits_to()
+	 */
+	if (el <= e)
+		__bm_change_bits_to(mdev, el, e, 1);
+	spin_unlock_irq(&b->bm_lock);
+}
+
+/* returns bit state
+ * wants bitnr, NOT sector.
+ * inherently racy... area needs to be locked by means of {al,rs}_lru
+ *  1 ... bit set
+ *  0 ... bit not set
+ * -1 ... first out of bounds access, stop testing for bits!
+ */
+int drbd_bm_test_bit(struct drbd_conf *mdev, const unsigned long bitnr)
+{
+	unsigned long flags;
+	struct drbd_bitmap *b = mdev->bitmap;
+	unsigned long *p_addr;
+	int i;
+
+	ERR_IF(!b) return 0;
+	ERR_IF(!b->bm_pages) return 0;
+
+	spin_lock_irqsave(&b->bm_lock, flags);
+	if (BM_DONT_TEST & b->bm_flags)
+		bm_print_lock_info(mdev);
+	if (bitnr < b->bm_bits) {
+		p_addr = bm_map_pidx(b, bm_bit_to_page_idx(b, bitnr));
+		i = test_bit_le(bitnr & BITS_PER_PAGE_MASK, p_addr) ? 1 : 0;
+		bm_unmap(p_addr);
+	} else if (bitnr == b->bm_bits) {
+		i = -1;
+	} else { /* (bitnr > b->bm_bits) */
+		dev_err(DEV, "bitnr=%lu > bm_bits=%lu\n", bitnr, b->bm_bits);
+		i = 0;
+	}
+
+	spin_unlock_irqrestore(&b->bm_lock, flags);
+	return i;
+}
+
+/* returns number of bits set in the range [s, e] */
+int drbd_bm_count_bits(struct drbd_conf *mdev, const unsigned long s, const unsigned long e)
+{
+	unsigned long flags;
+	struct drbd_bitmap *b = mdev->bitmap;
+	unsigned long *p_addr = NULL;
+	unsigned long bitnr;
+	unsigned int page_nr = -1U;
+	int c = 0;
+
+	/* If this is called without a bitmap, that is a bug.  But just to be
+	 * robust in case we screwed up elsewhere, in that case pretend there
+	 * was one dirty bit in the requested area, so we won't try to do a
+	 * local read there (no bitmap probably implies no disk) */
+	ERR_IF(!b) return 1;
+	ERR_IF(!b->bm_pages) return 1;
+
+	spin_lock_irqsave(&b->bm_lock, flags);
+	if (BM_DONT_TEST & b->bm_flags)
+		bm_print_lock_info(mdev);
+	for (bitnr = s; bitnr <= e; bitnr++) {
+		unsigned int idx = bm_bit_to_page_idx(b, bitnr);
+		if (page_nr != idx) {
+			page_nr = idx;
+			if (p_addr)
+				bm_unmap(p_addr);
+			p_addr = bm_map_pidx(b, idx);
+		}
+		ERR_IF (bitnr >= b->bm_bits) {
+			dev_err(DEV, "bitnr=%lu bm_bits=%lu\n", bitnr, b->bm_bits);
+		} else {
+			c += (0 != test_bit_le(bitnr - (page_nr << (PAGE_SHIFT+3)), p_addr));
+		}
+	}
+	if (p_addr)
+		bm_unmap(p_addr);
+	spin_unlock_irqrestore(&b->bm_lock, flags);
+	return c;
+}
+
+
+/* inherently racy...
+ * return value may be already out-of-date when this function returns.
+ * but the general usage is that this is only use during a cstate when bits are
+ * only cleared, not set, and typically only care for the case when the return
+ * value is zero, or we already "locked" this "bitmap extent" by other means.
+ *
+ * enr is bm-extent number, since we chose to name one sector (512 bytes)
+ * worth of the bitmap a "bitmap extent".
+ *
+ * TODO
+ * I think since we use it like a reference count, we should use the real
+ * reference count of some bitmap extent element from some lru instead...
+ *
+ */
+int drbd_bm_e_weight(struct drbd_conf *mdev, unsigned long enr)
+{
+	struct drbd_bitmap *b = mdev->bitmap;
+	int count, s, e;
+	unsigned long flags;
+	unsigned long *p_addr, *bm;
+
+	ERR_IF(!b) return 0;
+	ERR_IF(!b->bm_pages) return 0;
+
+	spin_lock_irqsave(&b->bm_lock, flags);
+	if (BM_DONT_TEST & b->bm_flags)
+		bm_print_lock_info(mdev);
+
+	s = S2W(enr);
+	e = min((size_t)S2W(enr+1), b->bm_words);
+	count = 0;
+	if (s < b->bm_words) {
+		int n = e-s;
+		p_addr = bm_map_pidx(b, bm_word_to_page_idx(b, s));
+		bm = p_addr + MLPP(s);
+		while (n--)
+			count += hweight_long(*bm++);
+		bm_unmap(p_addr);
+	} else {
+		dev_err(DEV, "start offset (%d) too large in drbd_bm_e_weight\n", s);
+	}
+	spin_unlock_irqrestore(&b->bm_lock, flags);
+	return count;
+}
+
+/* Set all bits covered by the AL-extent al_enr.
+ * Returns number of bits changed. */
+unsigned long drbd_bm_ALe_set_all(struct drbd_conf *mdev, unsigned long al_enr)
+{
+	struct drbd_bitmap *b = mdev->bitmap;
+	unsigned long *p_addr, *bm;
+	unsigned long weight;
+	unsigned long s, e;
+	int count, i, do_now;
+	ERR_IF(!b) return 0;
+	ERR_IF(!b->bm_pages) return 0;
+
+	spin_lock_irq(&b->bm_lock);
+	if (BM_DONT_SET & b->bm_flags)
+		bm_print_lock_info(mdev);
+	weight = b->bm_set;
+
+	s = al_enr * BM_WORDS_PER_AL_EXT;
+	e = min_t(size_t, s + BM_WORDS_PER_AL_EXT, b->bm_words);
+	/* assert that s and e are on the same page */
+	D_ASSERT((e-1) >> (PAGE_SHIFT - LN2_BPL + 3)
+	      ==  s    >> (PAGE_SHIFT - LN2_BPL + 3));
+	count = 0;
+	if (s < b->bm_words) {
+		i = do_now = e-s;
+		p_addr = bm_map_pidx(b, bm_word_to_page_idx(b, s));
+		bm = p_addr + MLPP(s);
+		while (i--) {
+			count += hweight_long(*bm);
+			*bm = -1UL;
+			bm++;
+		}
+		bm_unmap(p_addr);
+		b->bm_set += do_now*BITS_PER_LONG - count;
+		if (e == b->bm_words)
+			b->bm_set -= bm_clear_surplus(b);
+	} else {
+		dev_err(DEV, "start offset (%lu) too large in drbd_bm_ALe_set_all\n", s);
+	}
+	weight = b->bm_set - weight;
+	spin_unlock_irq(&b->bm_lock);
+	return weight;
+}
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
new file mode 100644
index 00000000..ef2ceed3
--- /dev/null
+++ b/drivers/block/drbd/drbd_int.h
@@ -0,0 +1,2465 @@
+/*
+  drbd_int.h
+
+  This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
+
+  Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
+  Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
+  Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
+
+  drbd is free software; you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation; either version 2, or (at your option)
+  any later version.
+
+  drbd is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with drbd; see the file COPYING.  If not, write to
+  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+
+*/
+
+#ifndef _DRBD_INT_H
+#define _DRBD_INT_H
+
+#include <linux/compiler.h>
+#include <linux/types.h>
+#include <linux/version.h>
+#include <linux/list.h>
+#include <linux/sched.h>
+#include <linux/bitops.h>
+#include <linux/slab.h>
+#include <linux/crypto.h>
+#include <linux/ratelimit.h>
+#include <linux/tcp.h>
+#include <linux/mutex.h>
+#include <linux/major.h>
+#include <linux/blkdev.h>
+#include <linux/genhd.h>
+#include <net/tcp.h>
+#include <linux/lru_cache.h>
+#include <linux/prefetch.h>
+
+#ifdef __CHECKER__
+# define __protected_by(x)       __attribute__((require_context(x,1,999,"rdwr")))
+# define __protected_read_by(x)  __attribute__((require_context(x,1,999,"read")))
+# define __protected_write_by(x) __attribute__((require_context(x,1,999,"write")))
+# define __must_hold(x)       __attribute__((context(x,1,1), require_context(x,1,999,"call")))
+#else
+# define __protected_by(x)
+# define __protected_read_by(x)
+# define __protected_write_by(x)
+# define __must_hold(x)
+#endif
+
+#define __no_warn(lock, stmt) do { __acquire(lock); stmt; __release(lock); } while (0)
+
+/* module parameter, defined in drbd_main.c */
+extern unsigned int minor_count;
+extern int disable_sendpage;
+extern int allow_oos;
+extern unsigned int cn_idx;
+
+#ifdef CONFIG_DRBD_FAULT_INJECTION
+extern int enable_faults;
+extern int fault_rate;
+extern int fault_devs;
+#endif
+
+extern char usermode_helper[];
+
+
+/* I don't remember why XCPU ...
+ * This is used to wake the asender,
+ * and to interrupt sending the sending task
+ * on disconnect.
+ */
+#define DRBD_SIG SIGXCPU
+
+/* This is used to stop/restart our threads.
+ * Cannot use SIGTERM nor SIGKILL, since these
+ * are sent out by init on runlevel changes
+ * I choose SIGHUP for now.
+ */
+#define DRBD_SIGKILL SIGHUP
+
+/* All EEs on the free list should have ID_VACANT (== 0)
+ * freshly allocated EEs get !ID_VACANT (== 1)
+ * so if it says "cannot dereference null pointer at address 0x00000001",
+ * it is most likely one of these :( */
+
+#define ID_IN_SYNC      (4711ULL)
+#define ID_OUT_OF_SYNC  (4712ULL)
+
+#define ID_SYNCER (-1ULL)
+#define ID_VACANT 0
+#define is_syncer_block_id(id) ((id) == ID_SYNCER)
+#define UUID_NEW_BM_OFFSET ((u64)0x0001000000000000ULL)
+
+struct drbd_conf;
+
+
+/* to shorten dev_warn(DEV, "msg"); and relatives statements */
+#define DEV (disk_to_dev(mdev->vdisk))
+
+#define D_ASSERT(exp)	if (!(exp)) \
+	 dev_err(DEV, "ASSERT( " #exp " ) in %s:%d\n", __FILE__, __LINE__)
+
+#define ERR_IF(exp) if (({						\
+	int _b = (exp) != 0;						\
+	if (_b) dev_err(DEV, "ASSERT FAILED: %s: (%s) in %s:%d\n",	\
+			__func__, #exp, __FILE__, __LINE__);		\
+	_b;								\
+	}))
+
+/* Defines to control fault insertion */
+enum {
+	DRBD_FAULT_MD_WR = 0,	/* meta data write */
+	DRBD_FAULT_MD_RD = 1,	/*           read  */
+	DRBD_FAULT_RS_WR = 2,	/* resync          */
+	DRBD_FAULT_RS_RD = 3,
+	DRBD_FAULT_DT_WR = 4,	/* data            */
+	DRBD_FAULT_DT_RD = 5,
+	DRBD_FAULT_DT_RA = 6,	/* data read ahead */
+	DRBD_FAULT_BM_ALLOC = 7,	/* bitmap allocation */
+	DRBD_FAULT_AL_EE = 8,	/* alloc ee */
+	DRBD_FAULT_RECEIVE = 9, /* Changes some bytes upon receiving a [rs]data block */
+
+	DRBD_FAULT_MAX,
+};
+
+extern unsigned int
+_drbd_insert_fault(struct drbd_conf *mdev, unsigned int type);
+
+static inline int
+drbd_insert_fault(struct drbd_conf *mdev, unsigned int type) {
+#ifdef CONFIG_DRBD_FAULT_INJECTION
+	return fault_rate &&
+		(enable_faults & (1<<type)) &&
+		_drbd_insert_fault(mdev, type);
+#else
+	return 0;
+#endif
+}
+
+/* integer division, round _UP_ to the next integer */
+#define div_ceil(A, B) ((A)/(B) + ((A)%(B) ? 1 : 0))
+/* usual integer division */
+#define div_floor(A, B) ((A)/(B))
+
+/* drbd_meta-data.c (still in drbd_main.c) */
+/* 4th incarnation of the disk layout. */
+#define DRBD_MD_MAGIC (DRBD_MAGIC+4)
+
+extern struct drbd_conf **minor_table;
+extern struct ratelimit_state drbd_ratelimit_state;
+
+/* on the wire */
+enum drbd_packets {
+	/* receiver (data socket) */
+	P_DATA		      = 0x00,
+	P_DATA_REPLY	      = 0x01, /* Response to P_DATA_REQUEST */
+	P_RS_DATA_REPLY	      = 0x02, /* Response to P_RS_DATA_REQUEST */
+	P_BARRIER	      = 0x03,
+	P_BITMAP	      = 0x04,
+	P_BECOME_SYNC_TARGET  = 0x05,
+	P_BECOME_SYNC_SOURCE  = 0x06,
+	P_UNPLUG_REMOTE	      = 0x07, /* Used at various times to hint the peer */
+	P_DATA_REQUEST	      = 0x08, /* Used to ask for a data block */
+	P_RS_DATA_REQUEST     = 0x09, /* Used to ask for a data block for resync */
+	P_SYNC_PARAM	      = 0x0a,
+	P_PROTOCOL	      = 0x0b,
+	P_UUIDS		      = 0x0c,
+	P_SIZES		      = 0x0d,
+	P_STATE		      = 0x0e,
+	P_SYNC_UUID	      = 0x0f,
+	P_AUTH_CHALLENGE      = 0x10,
+	P_AUTH_RESPONSE	      = 0x11,
+	P_STATE_CHG_REQ	      = 0x12,
+
+	/* asender (meta socket */
+	P_PING		      = 0x13,
+	P_PING_ACK	      = 0x14,
+	P_RECV_ACK	      = 0x15, /* Used in protocol B */
+	P_WRITE_ACK	      = 0x16, /* Used in protocol C */
+	P_RS_WRITE_ACK	      = 0x17, /* Is a P_WRITE_ACK, additionally call set_in_sync(). */
+	P_DISCARD_ACK	      = 0x18, /* Used in proto C, two-primaries conflict detection */
+	P_NEG_ACK	      = 0x19, /* Sent if local disk is unusable */
+	P_NEG_DREPLY	      = 0x1a, /* Local disk is broken... */
+	P_NEG_RS_DREPLY	      = 0x1b, /* Local disk is broken... */
+	P_BARRIER_ACK	      = 0x1c,
+	P_STATE_CHG_REPLY     = 0x1d,
+
+	/* "new" commands, no longer fitting into the ordering scheme above */
+
+	P_OV_REQUEST	      = 0x1e, /* data socket */
+	P_OV_REPLY	      = 0x1f,
+	P_OV_RESULT	      = 0x20, /* meta socket */
+	P_CSUM_RS_REQUEST     = 0x21, /* data socket */
+	P_RS_IS_IN_SYNC	      = 0x22, /* meta socket */
+	P_SYNC_PARAM89	      = 0x23, /* data socket, protocol version 89 replacement for P_SYNC_PARAM */
+	P_COMPRESSED_BITMAP   = 0x24, /* compressed or otherwise encoded bitmap transfer */
+	/* P_CKPT_FENCE_REQ      = 0x25, * currently reserved for protocol D */
+	/* P_CKPT_DISABLE_REQ    = 0x26, * currently reserved for protocol D */
+	P_DELAY_PROBE         = 0x27, /* is used on BOTH sockets */
+	P_OUT_OF_SYNC         = 0x28, /* Mark as out of sync (Outrunning), data socket */
+	P_RS_CANCEL           = 0x29, /* meta: Used to cancel RS_DATA_REQUEST packet by SyncSource */
+
+	P_MAX_CMD	      = 0x2A,
+	P_MAY_IGNORE	      = 0x100, /* Flag to test if (cmd > P_MAY_IGNORE) ... */
+	P_MAX_OPT_CMD	      = 0x101,
+
+	/* special command ids for handshake */
+
+	P_HAND_SHAKE_M	      = 0xfff1, /* First Packet on the MetaSock */
+	P_HAND_SHAKE_S	      = 0xfff2, /* First Packet on the Socket */
+
+	P_HAND_SHAKE	      = 0xfffe	/* FIXED for the next century! */
+};
+
+static inline const char *cmdname(enum drbd_packets cmd)
+{
+	/* THINK may need to become several global tables
+	 * when we want to support more than
+	 * one PRO_VERSION */
+	static const char *cmdnames[] = {
+		[P_DATA]	        = "Data",
+		[P_DATA_REPLY]	        = "DataReply",
+		[P_RS_DATA_REPLY]	= "RSDataReply",
+		[P_BARRIER]	        = "Barrier",
+		[P_BITMAP]	        = "ReportBitMap",
+		[P_BECOME_SYNC_TARGET]  = "BecomeSyncTarget",
+		[P_BECOME_SYNC_SOURCE]  = "BecomeSyncSource",
+		[P_UNPLUG_REMOTE]	= "UnplugRemote",
+		[P_DATA_REQUEST]	= "DataRequest",
+		[P_RS_DATA_REQUEST]     = "RSDataRequest",
+		[P_SYNC_PARAM]	        = "SyncParam",
+		[P_SYNC_PARAM89]	= "SyncParam89",
+		[P_PROTOCOL]            = "ReportProtocol",
+		[P_UUIDS]	        = "ReportUUIDs",
+		[P_SIZES]	        = "ReportSizes",
+		[P_STATE]	        = "ReportState",
+		[P_SYNC_UUID]           = "ReportSyncUUID",
+		[P_AUTH_CHALLENGE]      = "AuthChallenge",
+		[P_AUTH_RESPONSE]	= "AuthResponse",
+		[P_PING]		= "Ping",
+		[P_PING_ACK]	        = "PingAck",
+		[P_RECV_ACK]	        = "RecvAck",
+		[P_WRITE_ACK]	        = "WriteAck",
+		[P_RS_WRITE_ACK]	= "RSWriteAck",
+		[P_DISCARD_ACK]	        = "DiscardAck",
+		[P_NEG_ACK]	        = "NegAck",
+		[P_NEG_DREPLY]	        = "NegDReply",
+		[P_NEG_RS_DREPLY]	= "NegRSDReply",
+		[P_BARRIER_ACK]	        = "BarrierAck",
+		[P_STATE_CHG_REQ]       = "StateChgRequest",
+		[P_STATE_CHG_REPLY]     = "StateChgReply",
+		[P_OV_REQUEST]          = "OVRequest",
+		[P_OV_REPLY]            = "OVReply",
+		[P_OV_RESULT]           = "OVResult",
+		[P_CSUM_RS_REQUEST]     = "CsumRSRequest",
+		[P_RS_IS_IN_SYNC]	= "CsumRSIsInSync",
+		[P_COMPRESSED_BITMAP]   = "CBitmap",
+		[P_DELAY_PROBE]         = "DelayProbe",
+		[P_OUT_OF_SYNC]		= "OutOfSync",
+		[P_MAX_CMD]	        = NULL,
+	};
+
+	if (cmd == P_HAND_SHAKE_M)
+		return "HandShakeM";
+	if (cmd == P_HAND_SHAKE_S)
+		return "HandShakeS";
+	if (cmd == P_HAND_SHAKE)
+		return "HandShake";
+	if (cmd >= P_MAX_CMD)
+		return "Unknown";
+	return cmdnames[cmd];
+}
+
+/* for sending/receiving the bitmap,
+ * possibly in some encoding scheme */
+struct bm_xfer_ctx {
+	/* "const"
+	 * stores total bits and long words
+	 * of the bitmap, so we don't need to
+	 * call the accessor functions over and again. */
+	unsigned long bm_bits;
+	unsigned long bm_words;
+	/* during xfer, current position within the bitmap */
+	unsigned long bit_offset;
+	unsigned long word_offset;
+
+	/* statistics; index: (h->command == P_BITMAP) */
+	unsigned packets[2];
+	unsigned bytes[2];
+};
+
+extern void INFO_bm_xfer_stats(struct drbd_conf *mdev,
+		const char *direction, struct bm_xfer_ctx *c);
+
+static inline void bm_xfer_ctx_bit_to_word_offset(struct bm_xfer_ctx *c)
+{
+	/* word_offset counts "native long words" (32 or 64 bit),
+	 * aligned at 64 bit.
+	 * Encoded packet may end at an unaligned bit offset.
+	 * In case a fallback clear text packet is transmitted in
+	 * between, we adjust this offset back to the last 64bit
+	 * aligned "native long word", which makes coding and decoding
+	 * the plain text bitmap much more convenient.  */
+#if BITS_PER_LONG == 64
+	c->word_offset = c->bit_offset >> 6;
+#elif BITS_PER_LONG == 32
+	c->word_offset = c->bit_offset >> 5;
+	c->word_offset &= ~(1UL);
+#else
+# error "unsupported BITS_PER_LONG"
+#endif
+}
+
+#ifndef __packed
+#define __packed __attribute__((packed))
+#endif
+
+/* This is the layout for a packet on the wire.
+ * The byteorder is the network byte order.
+ *     (except block_id and barrier fields.
+ *	these are pointers to local structs
+ *	and have no relevance for the partner,
+ *	which just echoes them as received.)
+ *
+ * NOTE that the payload starts at a long aligned offset,
+ * regardless of 32 or 64 bit arch!
+ */
+struct p_header80 {
+	u32	  magic;
+	u16	  command;
+	u16	  length;	/* bytes of data after this header */
+	u8	  payload[0];
+} __packed;
+
+/* Header for big packets, Used for data packets exceeding 64kB */
+struct p_header95 {
+	u16	  magic;	/* use DRBD_MAGIC_BIG here */
+	u16	  command;
+	u32	  length;	/* Use only 24 bits of that. Ignore the highest 8 bit. */
+	u8	  payload[0];
+} __packed;
+
+union p_header {
+	struct p_header80 h80;
+	struct p_header95 h95;
+};
+
+/*
+ * short commands, packets without payload, plain p_header:
+ *   P_PING
+ *   P_PING_ACK
+ *   P_BECOME_SYNC_TARGET
+ *   P_BECOME_SYNC_SOURCE
+ *   P_UNPLUG_REMOTE
+ */
+
+/*
+ * commands with out-of-struct payload:
+ *   P_BITMAP    (no additional fields)
+ *   P_DATA, P_DATA_REPLY (see p_data)
+ *   P_COMPRESSED_BITMAP (see receive_compressed_bitmap)
+ */
+
+/* these defines must not be changed without changing the protocol version */
+#define DP_HARDBARRIER	      1 /* depricated */
+#define DP_RW_SYNC	      2 /* equals REQ_SYNC    */
+#define DP_MAY_SET_IN_SYNC    4
+#define DP_UNPLUG             8 /* not used anymore   */
+#define DP_FUA               16 /* equals REQ_FUA     */
+#define DP_FLUSH             32 /* equals REQ_FLUSH   */
+#define DP_DISCARD           64 /* equals REQ_DISCARD */
+
+struct p_data {
+	union p_header head;
+	u64	    sector;    /* 64 bits sector number */
+	u64	    block_id;  /* to identify the request in protocol B&C */
+	u32	    seq_num;
+	u32	    dp_flags;
+} __packed;
+
+/*
+ * commands which share a struct:
+ *  p_block_ack:
+ *   P_RECV_ACK (proto B), P_WRITE_ACK (proto C),
+ *   P_DISCARD_ACK (proto C, two-primaries conflict detection)
+ *  p_block_req:
+ *   P_DATA_REQUEST, P_RS_DATA_REQUEST
+ */
+struct p_block_ack {
+	struct p_header80 head;
+	u64	    sector;
+	u64	    block_id;
+	u32	    blksize;
+	u32	    seq_num;
+} __packed;
+
+
+struct p_block_req {
+	struct p_header80 head;
+	u64 sector;
+	u64 block_id;
+	u32 blksize;
+	u32 pad;	/* to multiple of 8 Byte */
+} __packed;
+
+/*
+ * commands with their own struct for additional fields:
+ *   P_HAND_SHAKE
+ *   P_BARRIER
+ *   P_BARRIER_ACK
+ *   P_SYNC_PARAM
+ *   ReportParams
+ */
+
+struct p_handshake {
+	struct p_header80 head;	/* 8 bytes */
+	u32 protocol_min;
+	u32 feature_flags;
+	u32 protocol_max;
+
+	/* should be more than enough for future enhancements
+	 * for now, feature_flags and the reserverd array shall be zero.
+	 */
+
+	u32 _pad;
+	u64 reserverd[7];
+} __packed;
+/* 80 bytes, FIXED for the next century */
+
+struct p_barrier {
+	struct p_header80 head;
+	u32 barrier;	/* barrier number _handle_ only */
+	u32 pad;	/* to multiple of 8 Byte */
+} __packed;
+
+struct p_barrier_ack {
+	struct p_header80 head;
+	u32 barrier;
+	u32 set_size;
+} __packed;
+
+struct p_rs_param {
+	struct p_header80 head;
+	u32 rate;
+
+	      /* Since protocol version 88 and higher. */
+	char verify_alg[0];
+} __packed;
+
+struct p_rs_param_89 {
+	struct p_header80 head;
+	u32 rate;
+        /* protocol version 89: */
+	char verify_alg[SHARED_SECRET_MAX];
+	char csums_alg[SHARED_SECRET_MAX];
+} __packed;
+
+struct p_rs_param_95 {
+	struct p_header80 head;
+	u32 rate;
+	char verify_alg[SHARED_SECRET_MAX];
+	char csums_alg[SHARED_SECRET_MAX];
+	u32 c_plan_ahead;
+	u32 c_delay_target;
+	u32 c_fill_target;
+	u32 c_max_rate;
+} __packed;
+
+enum drbd_conn_flags {
+	CF_WANT_LOSE = 1,
+	CF_DRY_RUN = 2,
+};
+
+struct p_protocol {
+	struct p_header80 head;
+	u32 protocol;
+	u32 after_sb_0p;
+	u32 after_sb_1p;
+	u32 after_sb_2p;
+	u32 conn_flags;
+	u32 two_primaries;
+
+              /* Since protocol version 87 and higher. */
+	char integrity_alg[0];
+
+} __packed;
+
+struct p_uuids {
+	struct p_header80 head;
+	u64 uuid[UI_EXTENDED_SIZE];
+} __packed;
+
+struct p_rs_uuid {
+	struct p_header80 head;
+	u64	    uuid;
+} __packed;
+
+struct p_sizes {
+	struct p_header80 head;
+	u64	    d_size;  /* size of disk */
+	u64	    u_size;  /* user requested size */
+	u64	    c_size;  /* current exported size */
+	u32	    max_bio_size;  /* Maximal size of a BIO */
+	u16	    queue_order_type;  /* not yet implemented in DRBD*/
+	u16	    dds_flags; /* use enum dds_flags here. */
+} __packed;
+
+struct p_state {
+	struct p_header80 head;
+	u32	    state;
+} __packed;
+
+struct p_req_state {
+	struct p_header80 head;
+	u32	    mask;
+	u32	    val;
+} __packed;
+
+struct p_req_state_reply {
+	struct p_header80 head;
+	u32	    retcode;
+} __packed;
+
+struct p_drbd06_param {
+	u64	  size;
+	u32	  state;
+	u32	  blksize;
+	u32	  protocol;
+	u32	  version;
+	u32	  gen_cnt[5];
+	u32	  bit_map_gen[5];
+} __packed;
+
+struct p_discard {
+	struct p_header80 head;
+	u64	    block_id;
+	u32	    seq_num;
+	u32	    pad;
+} __packed;
+
+struct p_block_desc {
+	struct p_header80 head;
+	u64 sector;
+	u32 blksize;
+	u32 pad;	/* to multiple of 8 Byte */
+} __packed;
+
+/* Valid values for the encoding field.
+ * Bump proto version when changing this. */
+enum drbd_bitmap_code {
+	/* RLE_VLI_Bytes = 0,
+	 * and other bit variants had been defined during
+	 * algorithm evaluation. */
+	RLE_VLI_Bits = 2,
+};
+
+struct p_compressed_bm {
+	struct p_header80 head;
+	/* (encoding & 0x0f): actual encoding, see enum drbd_bitmap_code
+	 * (encoding & 0x80): polarity (set/unset) of first runlength
+	 * ((encoding >> 4) & 0x07): pad_bits, number of trailing zero bits
+	 * used to pad up to head.length bytes
+	 */
+	u8 encoding;
+
+	u8 code[0];
+} __packed;
+
+struct p_delay_probe93 {
+	struct p_header80 head;
+	u32     seq_num; /* sequence number to match the two probe packets */
+	u32     offset;  /* usecs the probe got sent after the reference time point */
+} __packed;
+
+/* DCBP: Drbd Compressed Bitmap Packet ... */
+static inline enum drbd_bitmap_code
+DCBP_get_code(struct p_compressed_bm *p)
+{
+	return (enum drbd_bitmap_code)(p->encoding & 0x0f);
+}
+
+static inline void
+DCBP_set_code(struct p_compressed_bm *p, enum drbd_bitmap_code code)
+{
+	BUG_ON(code & ~0xf);
+	p->encoding = (p->encoding & ~0xf) | code;
+}
+
+static inline int
+DCBP_get_start(struct p_compressed_bm *p)
+{
+	return (p->encoding & 0x80) != 0;
+}
+
+static inline void
+DCBP_set_start(struct p_compressed_bm *p, int set)
+{
+	p->encoding = (p->encoding & ~0x80) | (set ? 0x80 : 0);
+}
+
+static inline int
+DCBP_get_pad_bits(struct p_compressed_bm *p)
+{
+	return (p->encoding >> 4) & 0x7;
+}
+
+static inline void
+DCBP_set_pad_bits(struct p_compressed_bm *p, int n)
+{
+	BUG_ON(n & ~0x7);
+	p->encoding = (p->encoding & (~0x7 << 4)) | (n << 4);
+}
+
+/* one bitmap packet, including the p_header,
+ * should fit within one _architecture independend_ page.
+ * so we need to use the fixed size 4KiB page size
+ * most architectures have used for a long time.
+ */
+#define BM_PACKET_PAYLOAD_BYTES (4096 - sizeof(struct p_header80))
+#define BM_PACKET_WORDS (BM_PACKET_PAYLOAD_BYTES/sizeof(long))
+#define BM_PACKET_VLI_BYTES_MAX (4096 - sizeof(struct p_compressed_bm))
+#if (PAGE_SIZE < 4096)
+/* drbd_send_bitmap / receive_bitmap would break horribly */
+#error "PAGE_SIZE too small"
+#endif
+
+union p_polymorph {
+        union p_header           header;
+        struct p_handshake       handshake;
+        struct p_data            data;
+        struct p_block_ack       block_ack;
+        struct p_barrier         barrier;
+        struct p_barrier_ack     barrier_ack;
+        struct p_rs_param_89     rs_param_89;
+        struct p_rs_param_95     rs_param_95;
+        struct p_protocol        protocol;
+        struct p_sizes           sizes;
+        struct p_uuids           uuids;
+        struct p_state           state;
+        struct p_req_state       req_state;
+        struct p_req_state_reply req_state_reply;
+        struct p_block_req       block_req;
+	struct p_delay_probe93   delay_probe93;
+	struct p_rs_uuid         rs_uuid;
+	struct p_block_desc      block_desc;
+} __packed;
+
+/**********************************************************************/
+enum drbd_thread_state {
+	None,
+	Running,
+	Exiting,
+	Restarting
+};
+
+struct drbd_thread {
+	spinlock_t t_lock;
+	struct task_struct *task;
+	struct completion stop;
+	enum drbd_thread_state t_state;
+	int (*function) (struct drbd_thread *);
+	struct drbd_conf *mdev;
+	int reset_cpu_mask;
+};
+
+static inline enum drbd_thread_state get_t_state(struct drbd_thread *thi)
+{
+	/* THINK testing the t_state seems to be uncritical in all cases
+	 * (but thread_{start,stop}), so we can read it *without* the lock.
+	 *	--lge */
+
+	smp_rmb();
+	return thi->t_state;
+}
+
+struct drbd_work;
+typedef int (*drbd_work_cb)(struct drbd_conf *, struct drbd_work *, int cancel);
+struct drbd_work {
+	struct list_head list;
+	drbd_work_cb cb;
+};
+
+struct drbd_tl_epoch;
+struct drbd_request {
+	struct drbd_work w;
+	struct drbd_conf *mdev;
+
+	/* if local IO is not allowed, will be NULL.
+	 * if local IO _is_ allowed, holds the locally submitted bio clone,
+	 * or, after local IO completion, the ERR_PTR(error).
+	 * see drbd_endio_pri(). */
+	struct bio *private_bio;
+
+	struct hlist_node collision;
+	sector_t sector;
+	unsigned int size;
+	unsigned int epoch; /* barrier_nr */
+
+	/* barrier_nr: used to check on "completion" whether this req was in
+	 * the current epoch, and we therefore have to close it,
+	 * starting a new epoch...
+	 */
+
+	struct list_head tl_requests; /* ring list in the transfer log */
+	struct bio *master_bio;       /* master bio pointer */
+	unsigned long rq_state; /* see comments above _req_mod() */
+	int seq_num;
+	unsigned long start_time;
+};
+
+struct drbd_tl_epoch {
+	struct drbd_work w;
+	struct list_head requests; /* requests before */
+	struct drbd_tl_epoch *next; /* pointer to the next barrier */
+	unsigned int br_number;  /* the barriers identifier. */
+	int n_writes;	/* number of requests attached before this barrier */
+};
+
+struct drbd_request;
+
+/* These Tl_epoch_entries may be in one of 6 lists:
+   active_ee .. data packet being written
+   sync_ee   .. syncer block being written
+   done_ee   .. block written, need to send P_WRITE_ACK
+   read_ee   .. [RS]P_DATA_REQUEST being read
+*/
+
+struct drbd_epoch {
+	struct list_head list;
+	unsigned int barrier_nr;
+	atomic_t epoch_size; /* increased on every request added. */
+	atomic_t active;     /* increased on every req. added, and dec on every finished. */
+	unsigned long flags;
+};
+
+/* drbd_epoch flag bits */
+enum {
+	DE_HAVE_BARRIER_NUMBER,
+};
+
+enum epoch_event {
+	EV_PUT,
+	EV_GOT_BARRIER_NR,
+	EV_BECAME_LAST,
+	EV_CLEANUP = 32, /* used as flag */
+};
+
+struct drbd_wq_barrier {
+	struct drbd_work w;
+	struct completion done;
+};
+
+struct digest_info {
+	int digest_size;
+	void *digest;
+};
+
+struct drbd_epoch_entry {
+	struct drbd_work w;
+	struct hlist_node collision;
+	struct drbd_epoch *epoch; /* for writes */
+	struct drbd_conf *mdev;
+	struct page *pages;
+	atomic_t pending_bios;
+	unsigned int size;
+	/* see comments on ee flag bits below */
+	unsigned long flags;
+	sector_t sector;
+	union {
+		u64 block_id;
+		struct digest_info *digest;
+	};
+};
+
+/* ee flag bits.
+ * While corresponding bios are in flight, the only modification will be
+ * set_bit WAS_ERROR, which has to be atomic.
+ * If no bios are in flight yet, or all have been completed,
+ * non-atomic modification to ee->flags is ok.
+ */
+enum {
+	__EE_CALL_AL_COMPLETE_IO,
+	__EE_MAY_SET_IN_SYNC,
+
+	/* In case a barrier failed,
+	 * we need to resubmit without the barrier flag. */
+	__EE_RESUBMITTED,
+
+	/* we may have several bios per epoch entry.
+	 * if any of those fail, we set this flag atomically
+	 * from the endio callback */
+	__EE_WAS_ERROR,
+
+	/* This ee has a pointer to a digest instead of a block id */
+	__EE_HAS_DIGEST,
+};
+#define EE_CALL_AL_COMPLETE_IO (1<<__EE_CALL_AL_COMPLETE_IO)
+#define EE_MAY_SET_IN_SYNC     (1<<__EE_MAY_SET_IN_SYNC)
+#define	EE_RESUBMITTED         (1<<__EE_RESUBMITTED)
+#define EE_WAS_ERROR           (1<<__EE_WAS_ERROR)
+#define EE_HAS_DIGEST          (1<<__EE_HAS_DIGEST)
+
+/* global flag bits */
+enum {
+	CREATE_BARRIER,		/* next P_DATA is preceded by a P_BARRIER */
+	SIGNAL_ASENDER,		/* whether asender wants to be interrupted */
+	SEND_PING,		/* whether asender should send a ping asap */
+
+	UNPLUG_QUEUED,		/* only relevant with kernel 2.4 */
+	UNPLUG_REMOTE,		/* sending a "UnplugRemote" could help */
+	MD_DIRTY,		/* current uuids and flags not yet on disk */
+	DISCARD_CONCURRENT,	/* Set on one node, cleared on the peer! */
+	USE_DEGR_WFC_T,		/* degr-wfc-timeout instead of wfc-timeout. */
+	CLUSTER_ST_CHANGE,	/* Cluster wide state change going on... */
+	CL_ST_CHG_SUCCESS,
+	CL_ST_CHG_FAIL,
+	CRASHED_PRIMARY,	/* This node was a crashed primary.
+				 * Gets cleared when the state.conn
+				 * goes into C_CONNECTED state. */
+	NO_BARRIER_SUPP,	/* underlying block device doesn't implement barriers */
+	CONSIDER_RESYNC,
+
+	MD_NO_FUA,		/* Users wants us to not use FUA/FLUSH on meta data dev */
+	SUSPEND_IO,		/* suspend application io */
+	BITMAP_IO,		/* suspend application io;
+				   once no more io in flight, start bitmap io */
+	BITMAP_IO_QUEUED,       /* Started bitmap IO */
+	GO_DISKLESS,		/* Disk is being detached, on io-error or admin request. */
+	WAS_IO_ERROR,		/* Local disk failed returned IO error */
+	RESYNC_AFTER_NEG,       /* Resync after online grow after the attach&negotiate finished. */
+	NET_CONGESTED,		/* The data socket is congested */
+
+	CONFIG_PENDING,		/* serialization of (re)configuration requests.
+				 * if set, also prevents the device from dying */
+	DEVICE_DYING,		/* device became unconfigured,
+				 * but worker thread is still handling the cleanup.
+				 * reconfiguring (nl_disk_conf, nl_net_conf) is dissalowed,
+				 * while this is set. */
+	RESIZE_PENDING,		/* Size change detected locally, waiting for the response from
+				 * the peer, if it changed there as well. */
+	CONN_DRY_RUN,		/* Expect disconnect after resync handshake. */
+	GOT_PING_ACK,		/* set when we receive a ping_ack packet, misc wait gets woken */
+	NEW_CUR_UUID,		/* Create new current UUID when thawing IO */
+	AL_SUSPENDED,		/* Activity logging is currently suspended. */
+	AHEAD_TO_SYNC_SOURCE,   /* Ahead -> SyncSource queued */
+};
+
+struct drbd_bitmap; /* opaque for drbd_conf */
+
+/* definition of bits in bm_flags to be used in drbd_bm_lock
+ * and drbd_bitmap_io and friends. */
+enum bm_flag {
+	/* do we need to kfree, or vfree bm_pages? */
+	BM_P_VMALLOCED = 0x10000, /* internal use only, will be masked out */
+
+	/* currently locked for bulk operation */
+	BM_LOCKED_MASK = 0x7,
+
+	/* in detail, that is: */
+	BM_DONT_CLEAR = 0x1,
+	BM_DONT_SET   = 0x2,
+	BM_DONT_TEST  = 0x4,
+
+	/* (test bit, count bit) allowed (common case) */
+	BM_LOCKED_TEST_ALLOWED = 0x3,
+
+	/* testing bits, as well as setting new bits allowed, but clearing bits
+	 * would be unexpected.  Used during bitmap receive.  Setting new bits
+	 * requires sending of "out-of-sync" information, though. */
+	BM_LOCKED_SET_ALLOWED = 0x1,
+
+	/* clear is not expected while bitmap is locked for bulk operation */
+};
+
+
+/* TODO sort members for performance
+ * MAYBE group them further */
+
+/* THINK maybe we actually want to use the default "event/%s" worker threads
+ * or similar in linux 2.6, which uses per cpu data and threads.
+ */
+struct drbd_work_queue {
+	struct list_head q;
+	struct semaphore s; /* producers up it, worker down()s it */
+	spinlock_t q_lock;  /* to protect the list. */
+};
+
+struct drbd_socket {
+	struct drbd_work_queue work;
+	struct mutex mutex;
+	struct socket    *socket;
+	/* this way we get our
+	 * send/receive buffers off the stack */
+	union p_polymorph sbuf;
+	union p_polymorph rbuf;
+};
+
+struct drbd_md {
+	u64 md_offset;		/* sector offset to 'super' block */
+
+	u64 la_size_sect;	/* last agreed size, unit sectors */
+	u64 uuid[UI_SIZE];
+	u64 device_uuid;
+	u32 flags;
+	u32 md_size_sect;
+
+	s32 al_offset;	/* signed relative sector offset to al area */
+	s32 bm_offset;	/* signed relative sector offset to bitmap */
+
+	/* u32 al_nr_extents;	   important for restoring the AL
+	 * is stored into  sync_conf.al_extents, which in turn
+	 * gets applied to act_log->nr_elements
+	 */
+};
+
+/* for sync_conf and other types... */
+#define NL_PACKET(name, number, fields) struct name { fields };
+#define NL_INTEGER(pn,pr,member) int member;
+#define NL_INT64(pn,pr,member) __u64 member;
+#define NL_BIT(pn,pr,member)   unsigned member:1;
+#define NL_STRING(pn,pr,member,len) unsigned char member[len]; int member ## _len;
+#include "linux/drbd_nl.h"
+
+struct drbd_backing_dev {
+	struct block_device *backing_bdev;
+	struct block_device *md_bdev;
+	struct drbd_md md;
+	struct disk_conf dc; /* The user provided config... */
+	sector_t known_size; /* last known size of that backing device */
+};
+
+struct drbd_md_io {
+	struct drbd_conf *mdev;
+	struct completion event;
+	int error;
+};
+
+struct bm_io_work {
+	struct drbd_work w;
+	char *why;
+	enum bm_flag flags;
+	int (*io_fn)(struct drbd_conf *mdev);
+	void (*done)(struct drbd_conf *mdev, int rv);
+};
+
+enum write_ordering_e {
+	WO_none,
+	WO_drain_io,
+	WO_bdev_flush,
+};
+
+struct fifo_buffer {
+	int *values;
+	unsigned int head_index;
+	unsigned int size;
+};
+
+struct drbd_conf {
+	/* things that are stored as / read from meta data on disk */
+	unsigned long flags;
+
+	/* configured by drbdsetup */
+	struct net_conf *net_conf; /* protected by get_net_conf() and put_net_conf() */
+	struct syncer_conf sync_conf;
+	struct drbd_backing_dev *ldev __protected_by(local);
+
+	sector_t p_size;     /* partner's disk size */
+	struct request_queue *rq_queue;
+	struct block_device *this_bdev;
+	struct gendisk	    *vdisk;
+
+	struct drbd_socket data; /* data/barrier/cstate/parameter packets */
+	struct drbd_socket meta; /* ping/ack (metadata) packets */
+	int agreed_pro_version;  /* actually used protocol version */
+	unsigned long last_received; /* in jiffies, either socket */
+	unsigned int ko_count;
+	struct drbd_work  resync_work,
+			  unplug_work,
+			  go_diskless,
+			  md_sync_work,
+			  start_resync_work;
+	struct timer_list resync_timer;
+	struct timer_list md_sync_timer;
+	struct timer_list start_resync_timer;
+	struct timer_list request_timer;
+#ifdef DRBD_DEBUG_MD_SYNC
+	struct {
+		unsigned int line;
+		const char* func;
+	} last_md_mark_dirty;
+#endif
+
+	/* Used after attach while negotiating new disk state. */
+	union drbd_state new_state_tmp;
+
+	union drbd_state state;
+	wait_queue_head_t misc_wait;
+	wait_queue_head_t state_wait;  /* upon each state change. */
+	wait_queue_head_t net_cnt_wait;
+	unsigned int send_cnt;
+	unsigned int recv_cnt;
+	unsigned int read_cnt;
+	unsigned int writ_cnt;
+	unsigned int al_writ_cnt;
+	unsigned int bm_writ_cnt;
+	atomic_t ap_bio_cnt;	 /* Requests we need to complete */
+	atomic_t ap_pending_cnt; /* AP data packets on the wire, ack expected */
+	atomic_t rs_pending_cnt; /* RS request/data packets on the wire */
+	atomic_t unacked_cnt;	 /* Need to send replys for */
+	atomic_t local_cnt;	 /* Waiting for local completion */
+	atomic_t net_cnt;	 /* Users of net_conf */
+	spinlock_t req_lock;
+	struct drbd_tl_epoch *unused_spare_tle; /* for pre-allocation */
+	struct drbd_tl_epoch *newest_tle;
+	struct drbd_tl_epoch *oldest_tle;
+	struct list_head out_of_sequence_requests;
+	struct hlist_head *tl_hash;
+	unsigned int tl_hash_s;
+
+	/* blocks to resync in this run [unit BM_BLOCK_SIZE] */
+	unsigned long rs_total;
+	/* number of resync blocks that failed in this run */
+	unsigned long rs_failed;
+	/* Syncer's start time [unit jiffies] */
+	unsigned long rs_start;
+	/* cumulated time in PausedSyncX state [unit jiffies] */
+	unsigned long rs_paused;
+	/* skipped because csum was equal [unit BM_BLOCK_SIZE] */
+	unsigned long rs_same_csum;
+#define DRBD_SYNC_MARKS 8
+#define DRBD_SYNC_MARK_STEP (3*HZ)
+	/* block not up-to-date at mark [unit BM_BLOCK_SIZE] */
+	unsigned long rs_mark_left[DRBD_SYNC_MARKS];
+	/* marks's time [unit jiffies] */
+	unsigned long rs_mark_time[DRBD_SYNC_MARKS];
+	/* current index into rs_mark_{left,time} */
+	int rs_last_mark;
+
+	/* where does the admin want us to start? (sector) */
+	sector_t ov_start_sector;
+	/* where are we now? (sector) */
+	sector_t ov_position;
+	/* Start sector of out of sync range (to merge printk reporting). */
+	sector_t ov_last_oos_start;
+	/* size of out-of-sync range in sectors. */
+	sector_t ov_last_oos_size;
+	unsigned long ov_left; /* in bits */
+	struct crypto_hash *csums_tfm;
+	struct crypto_hash *verify_tfm;
+
+	struct drbd_thread receiver;
+	struct drbd_thread worker;
+	struct drbd_thread asender;
+	struct drbd_bitmap *bitmap;
+	unsigned long bm_resync_fo; /* bit offset for drbd_bm_find_next */
+
+	/* Used to track operations of resync... */
+	struct lru_cache *resync;
+	/* Number of locked elements in resync LRU */
+	unsigned int resync_locked;
+	/* resync extent number waiting for application requests */
+	unsigned int resync_wenr;
+
+	int open_cnt;
+	u64 *p_uuid;
+	struct drbd_epoch *current_epoch;
+	spinlock_t epoch_lock;
+	unsigned int epochs;
+	enum write_ordering_e write_ordering;
+	struct list_head active_ee; /* IO in progress (P_DATA gets written to disk) */
+	struct list_head sync_ee;   /* IO in progress (P_RS_DATA_REPLY gets written to disk) */
+	struct list_head done_ee;   /* send ack */
+	struct list_head read_ee;   /* IO in progress (any read) */
+	struct list_head net_ee;    /* zero-copy network send in progress */
+	struct hlist_head *ee_hash; /* is proteced by req_lock! */
+	unsigned int ee_hash_s;
+
+	/* this one is protected by ee_lock, single thread */
+	struct drbd_epoch_entry *last_write_w_barrier;
+
+	int next_barrier_nr;
+	struct hlist_head *app_reads_hash; /* is proteced by req_lock */
+	struct list_head resync_reads;
+	atomic_t pp_in_use;		/* allocated from page pool */
+	atomic_t pp_in_use_by_net;	/* sendpage()d, still referenced by tcp */
+	wait_queue_head_t ee_wait;
+	struct page *md_io_page;	/* one page buffer for md_io */
+	struct page *md_io_tmpp;	/* for logical_block_size != 512 */
+	struct mutex md_io_mutex;	/* protects the md_io_buffer */
+	spinlock_t al_lock;
+	wait_queue_head_t al_wait;
+	struct lru_cache *act_log;	/* activity log */
+	unsigned int al_tr_number;
+	int al_tr_cycle;
+	int al_tr_pos;   /* position of the next transaction in the journal */
+	struct crypto_hash *cram_hmac_tfm;
+	struct crypto_hash *integrity_w_tfm; /* to be used by the worker thread */
+	struct crypto_hash *integrity_r_tfm; /* to be used by the receiver thread */
+	void *int_dig_out;
+	void *int_dig_in;
+	void *int_dig_vv;
+	wait_queue_head_t seq_wait;
+	atomic_t packet_seq;
+	unsigned int peer_seq;
+	spinlock_t peer_seq_lock;
+	unsigned int minor;
+	unsigned long comm_bm_set; /* communicated number of set bits. */
+	cpumask_var_t cpu_mask;
+	struct bm_io_work bm_io_work;
+	u64 ed_uuid; /* UUID of the exposed data */
+	struct mutex state_mutex;
+	char congestion_reason;  /* Why we where congested... */
+	atomic_t rs_sect_in; /* for incoming resync data rate, SyncTarget */
+	atomic_t rs_sect_ev; /* for submitted resync data rate, both */
+	int rs_last_sect_ev; /* counter to compare with */
+	int rs_last_events;  /* counter of read or write "events" (unit sectors)
+			      * on the lower level device when we last looked. */
+	int c_sync_rate; /* current resync rate after syncer throttle magic */
+	struct fifo_buffer rs_plan_s; /* correction values of resync planer */
+	int rs_in_flight; /* resync sectors in flight (to proxy, in proxy and from proxy) */
+	int rs_planed;    /* resync sectors already planned */
+	atomic_t ap_in_flight; /* App sectors in flight (waiting for ack) */
+	int peer_max_bio_size;
+	int local_max_bio_size;
+};
+
+static inline struct drbd_conf *minor_to_mdev(unsigned int minor)
+{
+	struct drbd_conf *mdev;
+
+	mdev = minor < minor_count ? minor_table[minor] : NULL;
+
+	return mdev;
+}
+
+static inline unsigned int mdev_to_minor(struct drbd_conf *mdev)
+{
+	return mdev->minor;
+}
+
+/* returns 1 if it was successful,
+ * returns 0 if there was no data socket.
+ * so wherever you are going to use the data.socket, e.g. do
+ * if (!drbd_get_data_sock(mdev))
+ *	return 0;
+ *	CODE();
+ * drbd_put_data_sock(mdev);
+ */
+static inline int drbd_get_data_sock(struct drbd_conf *mdev)
+{
+	mutex_lock(&mdev->data.mutex);
+	/* drbd_disconnect() could have called drbd_free_sock()
+	 * while we were waiting in down()... */
+	if (unlikely(mdev->data.socket == NULL)) {
+		mutex_unlock(&mdev->data.mutex);
+		return 0;
+	}
+	return 1;
+}
+
+static inline void drbd_put_data_sock(struct drbd_conf *mdev)
+{
+	mutex_unlock(&mdev->data.mutex);
+}
+
+/*
+ * function declarations
+ *************************/
+
+/* drbd_main.c */
+
+enum chg_state_flags {
+	CS_HARD	= 1,
+	CS_VERBOSE = 2,
+	CS_WAIT_COMPLETE = 4,
+	CS_SERIALIZE    = 8,
+	CS_ORDERED      = CS_WAIT_COMPLETE + CS_SERIALIZE,
+};
+
+enum dds_flags {
+	DDSF_FORCED    = 1,
+	DDSF_NO_RESYNC = 2, /* Do not run a resync for the new space */
+};
+
+extern void drbd_init_set_defaults(struct drbd_conf *mdev);
+extern enum drbd_state_rv drbd_change_state(struct drbd_conf *mdev,
+					    enum chg_state_flags f,
+					    union drbd_state mask,
+					    union drbd_state val);
+extern void drbd_force_state(struct drbd_conf *, union drbd_state,
+			union drbd_state);
+extern enum drbd_state_rv _drbd_request_state(struct drbd_conf *,
+					      union drbd_state,
+					      union drbd_state,
+					      enum chg_state_flags);
+extern enum drbd_state_rv __drbd_set_state(struct drbd_conf *, union drbd_state,
+					   enum chg_state_flags,
+					   struct completion *done);
+extern void print_st_err(struct drbd_conf *, union drbd_state,
+			union drbd_state, int);
+extern int  drbd_thread_start(struct drbd_thread *thi);
+extern void _drbd_thread_stop(struct drbd_thread *thi, int restart, int wait);
+#ifdef CONFIG_SMP
+extern void drbd_thread_current_set_cpu(struct drbd_conf *mdev);
+extern void drbd_calc_cpu_mask(struct drbd_conf *mdev);
+#else
+#define drbd_thread_current_set_cpu(A) ({})
+#define drbd_calc_cpu_mask(A) ({})
+#endif
+extern void drbd_free_resources(struct drbd_conf *mdev);
+extern void tl_release(struct drbd_conf *mdev, unsigned int barrier_nr,
+		       unsigned int set_size);
+extern void tl_clear(struct drbd_conf *mdev);
+extern void _tl_add_barrier(struct drbd_conf *, struct drbd_tl_epoch *);
+extern void drbd_free_sock(struct drbd_conf *mdev);
+extern int drbd_send(struct drbd_conf *mdev, struct socket *sock,
+			void *buf, size_t size, unsigned msg_flags);
+extern int drbd_send_protocol(struct drbd_conf *mdev);
+extern int drbd_send_uuids(struct drbd_conf *mdev);
+extern int drbd_send_uuids_skip_initial_sync(struct drbd_conf *mdev);
+extern int drbd_gen_and_send_sync_uuid(struct drbd_conf *mdev);
+extern int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply, enum dds_flags flags);
+extern int _drbd_send_state(struct drbd_conf *mdev);
+extern int drbd_send_state(struct drbd_conf *mdev);
+extern int _drbd_send_cmd(struct drbd_conf *mdev, struct socket *sock,
+			enum drbd_packets cmd, struct p_header80 *h,
+			size_t size, unsigned msg_flags);
+#define USE_DATA_SOCKET 1
+#define USE_META_SOCKET 0
+extern int drbd_send_cmd(struct drbd_conf *mdev, int use_data_socket,
+			enum drbd_packets cmd, struct p_header80 *h,
+			size_t size);
+extern int drbd_send_cmd2(struct drbd_conf *mdev, enum drbd_packets cmd,
+			char *data, size_t size);
+extern int drbd_send_sync_param(struct drbd_conf *mdev, struct syncer_conf *sc);
+extern int drbd_send_b_ack(struct drbd_conf *mdev, u32 barrier_nr,
+			u32 set_size);
+extern int drbd_send_ack(struct drbd_conf *mdev, enum drbd_packets cmd,
+			struct drbd_epoch_entry *e);
+extern int drbd_send_ack_rp(struct drbd_conf *mdev, enum drbd_packets cmd,
+			struct p_block_req *rp);
+extern int drbd_send_ack_dp(struct drbd_conf *mdev, enum drbd_packets cmd,
+			struct p_data *dp, int data_size);
+extern int drbd_send_ack_ex(struct drbd_conf *mdev, enum drbd_packets cmd,
+			    sector_t sector, int blksize, u64 block_id);
+extern int drbd_send_oos(struct drbd_conf *mdev, struct drbd_request *req);
+extern int drbd_send_block(struct drbd_conf *mdev, enum drbd_packets cmd,
+			   struct drbd_epoch_entry *e);
+extern int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req);
+extern int drbd_send_drequest(struct drbd_conf *mdev, int cmd,
+			      sector_t sector, int size, u64 block_id);
+extern int drbd_send_drequest_csum(struct drbd_conf *mdev,
+				   sector_t sector,int size,
+				   void *digest, int digest_size,
+				   enum drbd_packets cmd);
+extern int drbd_send_ov_request(struct drbd_conf *mdev,sector_t sector,int size);
+
+extern int drbd_send_bitmap(struct drbd_conf *mdev);
+extern int _drbd_send_bitmap(struct drbd_conf *mdev);
+extern int drbd_send_sr_reply(struct drbd_conf *mdev, enum drbd_state_rv retcode);
+extern void drbd_free_bc(struct drbd_backing_dev *ldev);
+extern void drbd_mdev_cleanup(struct drbd_conf *mdev);
+void drbd_print_uuids(struct drbd_conf *mdev, const char *text);
+
+extern void drbd_md_sync(struct drbd_conf *mdev);
+extern int  drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev);
+extern void drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local);
+extern void _drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local);
+extern void drbd_uuid_new_current(struct drbd_conf *mdev) __must_hold(local);
+extern void _drbd_uuid_new_current(struct drbd_conf *mdev) __must_hold(local);
+extern void drbd_uuid_set_bm(struct drbd_conf *mdev, u64 val) __must_hold(local);
+extern void drbd_md_set_flag(struct drbd_conf *mdev, int flags) __must_hold(local);
+extern void drbd_md_clear_flag(struct drbd_conf *mdev, int flags)__must_hold(local);
+extern int drbd_md_test_flag(struct drbd_backing_dev *, int);
+#ifndef DRBD_DEBUG_MD_SYNC
+extern void drbd_md_mark_dirty(struct drbd_conf *mdev);
+#else
+#define drbd_md_mark_dirty(m)	drbd_md_mark_dirty_(m, __LINE__ , __func__ )
+extern void drbd_md_mark_dirty_(struct drbd_conf *mdev,
+		unsigned int line, const char *func);
+#endif
+extern void drbd_queue_bitmap_io(struct drbd_conf *mdev,
+				 int (*io_fn)(struct drbd_conf *),
+				 void (*done)(struct drbd_conf *, int),
+				 char *why, enum bm_flag flags);
+extern int drbd_bitmap_io(struct drbd_conf *mdev,
+		int (*io_fn)(struct drbd_conf *),
+		char *why, enum bm_flag flags);
+extern int drbd_bmio_set_n_write(struct drbd_conf *mdev);
+extern int drbd_bmio_clear_n_write(struct drbd_conf *mdev);
+extern void drbd_go_diskless(struct drbd_conf *mdev);
+extern void drbd_ldev_destroy(struct drbd_conf *mdev);
+
+
+/* Meta data layout
+   We reserve a 128MB Block (4k aligned)
+   * either at the end of the backing device
+   * or on a separate meta data device. */
+
+#define MD_RESERVED_SECT (128LU << 11)  /* 128 MB, unit sectors */
+/* The following numbers are sectors */
+#define MD_AL_OFFSET 8	    /* 8 Sectors after start of meta area */
+#define MD_AL_MAX_SIZE 64   /* = 32 kb LOG  ~ 3776 extents ~ 14 GB Storage */
+/* Allows up to about 3.8TB */
+#define MD_BM_OFFSET (MD_AL_OFFSET + MD_AL_MAX_SIZE)
+
+/* Since the smalles IO unit is usually 512 byte */
+#define MD_SECTOR_SHIFT	 9
+#define MD_SECTOR_SIZE	 (1<<MD_SECTOR_SHIFT)
+
+/* activity log */
+#define AL_EXTENTS_PT ((MD_SECTOR_SIZE-12)/8-1) /* 61 ; Extents per 512B sector */
+#define AL_EXTENT_SHIFT 22		 /* One extent represents 4M Storage */
+#define AL_EXTENT_SIZE (1<<AL_EXTENT_SHIFT)
+
+#if BITS_PER_LONG == 32
+#define LN2_BPL 5
+#define cpu_to_lel(A) cpu_to_le32(A)
+#define lel_to_cpu(A) le32_to_cpu(A)
+#elif BITS_PER_LONG == 64
+#define LN2_BPL 6
+#define cpu_to_lel(A) cpu_to_le64(A)
+#define lel_to_cpu(A) le64_to_cpu(A)
+#else
+#error "LN2 of BITS_PER_LONG unknown!"
+#endif
+
+/* resync bitmap */
+/* 16MB sized 'bitmap extent' to track syncer usage */
+struct bm_extent {
+	int rs_left; /* number of bits set (out of sync) in this extent. */
+	int rs_failed; /* number of failed resync requests in this extent. */
+	unsigned long flags;
+	struct lc_element lce;
+};
+
+#define BME_NO_WRITES  0  /* bm_extent.flags: no more requests on this one! */
+#define BME_LOCKED     1  /* bm_extent.flags: syncer active on this one. */
+#define BME_PRIORITY   2  /* finish resync IO on this extent ASAP! App IO waiting! */
+
+/* drbd_bitmap.c */
+/*
+ * We need to store one bit for a block.
+ * Example: 1GB disk @ 4096 byte blocks ==> we need 32 KB bitmap.
+ * Bit 0 ==> local node thinks this block is binary identical on both nodes
+ * Bit 1 ==> local node thinks this block needs to be synced.
+ */
+
+#define SLEEP_TIME (HZ/10)
+
+#define BM_BLOCK_SHIFT  12			 /* 4k per bit */
+#define BM_BLOCK_SIZE	 (1<<BM_BLOCK_SHIFT)
+/* (9+3) : 512 bytes @ 8 bits; representing 16M storage
+ * per sector of on disk bitmap */
+#define BM_EXT_SHIFT	 (BM_BLOCK_SHIFT + MD_SECTOR_SHIFT + 3)  /* = 24 */
+#define BM_EXT_SIZE	 (1<<BM_EXT_SHIFT)
+
+#if (BM_EXT_SHIFT != 24) || (BM_BLOCK_SHIFT != 12)
+#error "HAVE YOU FIXED drbdmeta AS WELL??"
+#endif
+
+/* thus many _storage_ sectors are described by one bit */
+#define BM_SECT_TO_BIT(x)   ((x)>>(BM_BLOCK_SHIFT-9))
+#define BM_BIT_TO_SECT(x)   ((sector_t)(x)<<(BM_BLOCK_SHIFT-9))
+#define BM_SECT_PER_BIT     BM_BIT_TO_SECT(1)
+
+/* bit to represented kilo byte conversion */
+#define Bit2KB(bits) ((bits)<<(BM_BLOCK_SHIFT-10))
+
+/* in which _bitmap_ extent (resp. sector) the bit for a certain
+ * _storage_ sector is located in */
+#define BM_SECT_TO_EXT(x)   ((x)>>(BM_EXT_SHIFT-9))
+
+/* how much _storage_ sectors we have per bitmap sector */
+#define BM_EXT_TO_SECT(x)   ((sector_t)(x) << (BM_EXT_SHIFT-9))
+#define BM_SECT_PER_EXT     BM_EXT_TO_SECT(1)
+
+/* in one sector of the bitmap, we have this many activity_log extents. */
+#define AL_EXT_PER_BM_SECT  (1 << (BM_EXT_SHIFT - AL_EXTENT_SHIFT))
+#define BM_WORDS_PER_AL_EXT (1 << (AL_EXTENT_SHIFT-BM_BLOCK_SHIFT-LN2_BPL))
+
+#define BM_BLOCKS_PER_BM_EXT_B (BM_EXT_SHIFT - BM_BLOCK_SHIFT)
+#define BM_BLOCKS_PER_BM_EXT_MASK  ((1<<BM_BLOCKS_PER_BM_EXT_B) - 1)
+
+/* the extent in "PER_EXTENT" below is an activity log extent
+ * we need that many (long words/bytes) to store the bitmap
+ *		     of one AL_EXTENT_SIZE chunk of storage.
+ * we can store the bitmap for that many AL_EXTENTS within
+ * one sector of the _on_disk_ bitmap:
+ * bit	 0	  bit 37   bit 38	     bit (512*8)-1
+ *	     ...|........|........|.. // ..|........|
+ * sect. 0	 `296	  `304			   ^(512*8*8)-1
+ *
+#define BM_WORDS_PER_EXT    ( (AL_EXT_SIZE/BM_BLOCK_SIZE) / BITS_PER_LONG )
+#define BM_BYTES_PER_EXT    ( (AL_EXT_SIZE/BM_BLOCK_SIZE) / 8 )  // 128
+#define BM_EXT_PER_SECT	    ( 512 / BM_BYTES_PER_EXTENT )	 //   4
+ */
+
+#define DRBD_MAX_SECTORS_32 (0xffffffffLU)
+#define DRBD_MAX_SECTORS_BM \
+	  ((MD_RESERVED_SECT - MD_BM_OFFSET) * (1LL<<(BM_EXT_SHIFT-9)))
+#if DRBD_MAX_SECTORS_BM < DRBD_MAX_SECTORS_32
+#define DRBD_MAX_SECTORS      DRBD_MAX_SECTORS_BM
+#define DRBD_MAX_SECTORS_FLEX DRBD_MAX_SECTORS_BM
+#elif !defined(CONFIG_LBDAF) && BITS_PER_LONG == 32
+#define DRBD_MAX_SECTORS      DRBD_MAX_SECTORS_32
+#define DRBD_MAX_SECTORS_FLEX DRBD_MAX_SECTORS_32
+#else
+#define DRBD_MAX_SECTORS      DRBD_MAX_SECTORS_BM
+/* 16 TB in units of sectors */
+#if BITS_PER_LONG == 32
+/* adjust by one page worth of bitmap,
+ * so we won't wrap around in drbd_bm_find_next_bit.
+ * you should use 64bit OS for that much storage, anyways. */
+#define DRBD_MAX_SECTORS_FLEX BM_BIT_TO_SECT(0xffff7fff)
+#else
+/* we allow up to 1 PiB now on 64bit architecture with "flexible" meta data */
+#define DRBD_MAX_SECTORS_FLEX (1UL << 51)
+/* corresponds to (1UL << 38) bits right now. */
+#endif
+#endif
+
+/* Sector shift value for the "hash" functions of tl_hash and ee_hash tables.
+ * With a value of 8 all IO in one 128K block make it to the same slot of the
+ * hash table. */
+#define HT_SHIFT 8
+#define DRBD_MAX_BIO_SIZE (1U<<(9+HT_SHIFT))
+#define DRBD_MAX_BIO_SIZE_SAFE (1 << 12)       /* Works always = 4k */
+
+#define DRBD_MAX_SIZE_H80_PACKET (1 << 15) /* The old header only allows packets up to 32Kib data */
+
+/* Number of elements in the app_reads_hash */
+#define APP_R_HSIZE 15
+
+extern int  drbd_bm_init(struct drbd_conf *mdev);
+extern int  drbd_bm_resize(struct drbd_conf *mdev, sector_t sectors, int set_new_bits);
+extern void drbd_bm_cleanup(struct drbd_conf *mdev);
+extern void drbd_bm_set_all(struct drbd_conf *mdev);
+extern void drbd_bm_clear_all(struct drbd_conf *mdev);
+/* set/clear/test only a few bits at a time */
+extern int  drbd_bm_set_bits(
+		struct drbd_conf *mdev, unsigned long s, unsigned long e);
+extern int  drbd_bm_clear_bits(
+		struct drbd_conf *mdev, unsigned long s, unsigned long e);
+extern int drbd_bm_count_bits(
+	struct drbd_conf *mdev, const unsigned long s, const unsigned long e);
+/* bm_set_bits variant for use while holding drbd_bm_lock,
+ * may process the whole bitmap in one go */
+extern void _drbd_bm_set_bits(struct drbd_conf *mdev,
+		const unsigned long s, const unsigned long e);
+extern int  drbd_bm_test_bit(struct drbd_conf *mdev, unsigned long bitnr);
+extern int  drbd_bm_e_weight(struct drbd_conf *mdev, unsigned long enr);
+extern int  drbd_bm_write_page(struct drbd_conf *mdev, unsigned int idx) __must_hold(local);
+extern int  drbd_bm_read(struct drbd_conf *mdev) __must_hold(local);
+extern int  drbd_bm_write(struct drbd_conf *mdev) __must_hold(local);
+extern unsigned long drbd_bm_ALe_set_all(struct drbd_conf *mdev,
+		unsigned long al_enr);
+extern size_t	     drbd_bm_words(struct drbd_conf *mdev);
+extern unsigned long drbd_bm_bits(struct drbd_conf *mdev);
+extern sector_t      drbd_bm_capacity(struct drbd_conf *mdev);
+
+#define DRBD_END_OF_BITMAP	(~(unsigned long)0)
+extern unsigned long drbd_bm_find_next(struct drbd_conf *mdev, unsigned long bm_fo);
+/* bm_find_next variants for use while you hold drbd_bm_lock() */
+extern unsigned long _drbd_bm_find_next(struct drbd_conf *mdev, unsigned long bm_fo);
+extern unsigned long _drbd_bm_find_next_zero(struct drbd_conf *mdev, unsigned long bm_fo);
+extern unsigned long _drbd_bm_total_weight(struct drbd_conf *mdev);
+extern unsigned long drbd_bm_total_weight(struct drbd_conf *mdev);
+extern int drbd_bm_rs_done(struct drbd_conf *mdev);
+/* for receive_bitmap */
+extern void drbd_bm_merge_lel(struct drbd_conf *mdev, size_t offset,
+		size_t number, unsigned long *buffer);
+/* for _drbd_send_bitmap */
+extern void drbd_bm_get_lel(struct drbd_conf *mdev, size_t offset,
+		size_t number, unsigned long *buffer);
+
+extern void drbd_bm_lock(struct drbd_conf *mdev, char *why, enum bm_flag flags);
+extern void drbd_bm_unlock(struct drbd_conf *mdev);
+/* drbd_main.c */
+
+extern struct kmem_cache *drbd_request_cache;
+extern struct kmem_cache *drbd_ee_cache;	/* epoch entries */
+extern struct kmem_cache *drbd_bm_ext_cache;	/* bitmap extents */
+extern struct kmem_cache *drbd_al_ext_cache;	/* activity log extents */
+extern mempool_t *drbd_request_mempool;
+extern mempool_t *drbd_ee_mempool;
+
+extern struct page *drbd_pp_pool; /* drbd's page pool */
+extern spinlock_t   drbd_pp_lock;
+extern int	    drbd_pp_vacant;
+extern wait_queue_head_t drbd_pp_wait;
+
+extern rwlock_t global_state_lock;
+
+extern struct drbd_conf *drbd_new_device(unsigned int minor);
+extern void drbd_free_mdev(struct drbd_conf *mdev);
+
+extern int proc_details;
+
+/* drbd_req */
+extern int drbd_make_request(struct request_queue *q, struct bio *bio);
+extern int drbd_read_remote(struct drbd_conf *mdev, struct drbd_request *req);
+extern int drbd_merge_bvec(struct request_queue *q, struct bvec_merge_data *bvm, struct bio_vec *bvec);
+extern int is_valid_ar_handle(struct drbd_request *, sector_t);
+
+
+/* drbd_nl.c */
+extern void drbd_suspend_io(struct drbd_conf *mdev);
+extern void drbd_resume_io(struct drbd_conf *mdev);
+extern char *ppsize(char *buf, unsigned long long size);
+extern sector_t drbd_new_dev_size(struct drbd_conf *, struct drbd_backing_dev *, int);
+enum determine_dev_size { dev_size_error = -1, unchanged = 0, shrunk = 1, grew = 2 };
+extern enum determine_dev_size drbd_determine_dev_size(struct drbd_conf *, enum dds_flags) __must_hold(local);
+extern void resync_after_online_grow(struct drbd_conf *);
+extern void drbd_reconsider_max_bio_size(struct drbd_conf *mdev);
+extern enum drbd_state_rv drbd_set_role(struct drbd_conf *mdev,
+					enum drbd_role new_role,
+					int force);
+extern enum drbd_disk_state drbd_try_outdate_peer(struct drbd_conf *mdev);
+extern void drbd_try_outdate_peer_async(struct drbd_conf *mdev);
+extern int drbd_khelper(struct drbd_conf *mdev, char *cmd);
+
+/* drbd_worker.c */
+extern int drbd_worker(struct drbd_thread *thi);
+extern int drbd_alter_sa(struct drbd_conf *mdev, int na);
+extern void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side);
+extern void resume_next_sg(struct drbd_conf *mdev);
+extern void suspend_other_sg(struct drbd_conf *mdev);
+extern int drbd_resync_finished(struct drbd_conf *mdev);
+/* maybe rather drbd_main.c ? */
+extern int drbd_md_sync_page_io(struct drbd_conf *mdev,
+		struct drbd_backing_dev *bdev, sector_t sector, int rw);
+extern void drbd_ov_oos_found(struct drbd_conf*, sector_t, int);
+extern void drbd_rs_controller_reset(struct drbd_conf *mdev);
+
+static inline void ov_oos_print(struct drbd_conf *mdev)
+{
+	if (mdev->ov_last_oos_size) {
+		dev_err(DEV, "Out of sync: start=%llu, size=%lu (sectors)\n",
+		     (unsigned long long)mdev->ov_last_oos_start,
+		     (unsigned long)mdev->ov_last_oos_size);
+	}
+	mdev->ov_last_oos_size=0;
+}
+
+
+extern void drbd_csum_bio(struct drbd_conf *, struct crypto_hash *, struct bio *, void *);
+extern void drbd_csum_ee(struct drbd_conf *, struct crypto_hash *, struct drbd_epoch_entry *, void *);
+/* worker callbacks */
+extern int w_req_cancel_conflict(struct drbd_conf *, struct drbd_work *, int);
+extern int w_read_retry_remote(struct drbd_conf *, struct drbd_work *, int);
+extern int w_e_end_data_req(struct drbd_conf *, struct drbd_work *, int);
+extern int w_e_end_rsdata_req(struct drbd_conf *, struct drbd_work *, int);
+extern int w_e_end_csum_rs_req(struct drbd_conf *, struct drbd_work *, int);
+extern int w_e_end_ov_reply(struct drbd_conf *, struct drbd_work *, int);
+extern int w_e_end_ov_req(struct drbd_conf *, struct drbd_work *, int);
+extern int w_ov_finished(struct drbd_conf *, struct drbd_work *, int);
+extern int w_resync_timer(struct drbd_conf *, struct drbd_work *, int);
+extern int w_resume_next_sg(struct drbd_conf *, struct drbd_work *, int);
+extern int w_send_write_hint(struct drbd_conf *, struct drbd_work *, int);
+extern int w_send_dblock(struct drbd_conf *, struct drbd_work *, int);
+extern int w_send_barrier(struct drbd_conf *, struct drbd_work *, int);
+extern int w_send_read_req(struct drbd_conf *, struct drbd_work *, int);
+extern int w_prev_work_done(struct drbd_conf *, struct drbd_work *, int);
+extern int w_e_reissue(struct drbd_conf *, struct drbd_work *, int);
+extern int w_restart_disk_io(struct drbd_conf *, struct drbd_work *, int);
+extern int w_send_oos(struct drbd_conf *, struct drbd_work *, int);
+extern int w_start_resync(struct drbd_conf *, struct drbd_work *, int);
+
+extern void resync_timer_fn(unsigned long data);
+extern void start_resync_timer_fn(unsigned long data);
+
+/* drbd_receiver.c */
+extern int drbd_rs_should_slow_down(struct drbd_conf *mdev, sector_t sector);
+extern int drbd_submit_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e,
+		const unsigned rw, const int fault_type);
+extern int drbd_release_ee(struct drbd_conf *mdev, struct list_head *list);
+extern struct drbd_epoch_entry *drbd_alloc_ee(struct drbd_conf *mdev,
+					    u64 id,
+					    sector_t sector,
+					    unsigned int data_size,
+					    gfp_t gfp_mask) __must_hold(local);
+extern void drbd_free_some_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e,
+		int is_net);
+#define drbd_free_ee(m,e)	drbd_free_some_ee(m, e, 0)
+#define drbd_free_net_ee(m,e)	drbd_free_some_ee(m, e, 1)
+extern void drbd_wait_ee_list_empty(struct drbd_conf *mdev,
+		struct list_head *head);
+extern void _drbd_wait_ee_list_empty(struct drbd_conf *mdev,
+		struct list_head *head);
+extern void drbd_set_recv_tcq(struct drbd_conf *mdev, int tcq_enabled);
+extern void _drbd_clear_done_ee(struct drbd_conf *mdev, struct list_head *to_be_freed);
+extern void drbd_flush_workqueue(struct drbd_conf *mdev);
+extern void drbd_free_tl_hash(struct drbd_conf *mdev);
+
+/* yes, there is kernel_setsockopt, but only since 2.6.18. we don't need to
+ * mess with get_fs/set_fs, we know we are KERNEL_DS always. */
+static inline int drbd_setsockopt(struct socket *sock, int level, int optname,
+			char __user *optval, int optlen)
+{
+	int err;
+	if (level == SOL_SOCKET)
+		err = sock_setsockopt(sock, level, optname, optval, optlen);
+	else
+		err = sock->ops->setsockopt(sock, level, optname, optval,
+					    optlen);
+	return err;
+}
+
+static inline void drbd_tcp_cork(struct socket *sock)
+{
+	int __user val = 1;
+	(void) drbd_setsockopt(sock, SOL_TCP, TCP_CORK,
+			(char __user *)&val, sizeof(val));
+}
+
+static inline void drbd_tcp_uncork(struct socket *sock)
+{
+	int __user val = 0;
+	(void) drbd_setsockopt(sock, SOL_TCP, TCP_CORK,
+			(char __user *)&val, sizeof(val));
+}
+
+static inline void drbd_tcp_nodelay(struct socket *sock)
+{
+	int __user val = 1;
+	(void) drbd_setsockopt(sock, SOL_TCP, TCP_NODELAY,
+			(char __user *)&val, sizeof(val));
+}
+
+static inline void drbd_tcp_quickack(struct socket *sock)
+{
+	int __user val = 2;
+	(void) drbd_setsockopt(sock, SOL_TCP, TCP_QUICKACK,
+			(char __user *)&val, sizeof(val));
+}
+
+void drbd_bump_write_ordering(struct drbd_conf *mdev, enum write_ordering_e wo);
+
+/* drbd_proc.c */
+extern struct proc_dir_entry *drbd_proc;
+extern const struct file_operations drbd_proc_fops;
+extern const char *drbd_conn_str(enum drbd_conns s);
+extern const char *drbd_role_str(enum drbd_role s);
+
+/* drbd_actlog.c */
+extern void drbd_al_begin_io(struct drbd_conf *mdev, sector_t sector);
+extern void drbd_al_complete_io(struct drbd_conf *mdev, sector_t sector);
+extern void drbd_rs_complete_io(struct drbd_conf *mdev, sector_t sector);
+extern int drbd_rs_begin_io(struct drbd_conf *mdev, sector_t sector);
+extern int drbd_try_rs_begin_io(struct drbd_conf *mdev, sector_t sector);
+extern void drbd_rs_cancel_all(struct drbd_conf *mdev);
+extern int drbd_rs_del_all(struct drbd_conf *mdev);
+extern void drbd_rs_failed_io(struct drbd_conf *mdev,
+		sector_t sector, int size);
+extern int drbd_al_read_log(struct drbd_conf *mdev, struct drbd_backing_dev *);
+extern void drbd_advance_rs_marks(struct drbd_conf *mdev, unsigned long still_to_go);
+extern void __drbd_set_in_sync(struct drbd_conf *mdev, sector_t sector,
+		int size, const char *file, const unsigned int line);
+#define drbd_set_in_sync(mdev, sector, size) \
+	__drbd_set_in_sync(mdev, sector, size, __FILE__, __LINE__)
+extern int __drbd_set_out_of_sync(struct drbd_conf *mdev, sector_t sector,
+		int size, const char *file, const unsigned int line);
+#define drbd_set_out_of_sync(mdev, sector, size) \
+	__drbd_set_out_of_sync(mdev, sector, size, __FILE__, __LINE__)
+extern void drbd_al_apply_to_bm(struct drbd_conf *mdev);
+extern void drbd_al_shrink(struct drbd_conf *mdev);
+
+
+/* drbd_nl.c */
+
+void drbd_nl_cleanup(void);
+int __init drbd_nl_init(void);
+void drbd_bcast_state(struct drbd_conf *mdev, union drbd_state);
+void drbd_bcast_sync_progress(struct drbd_conf *mdev);
+void drbd_bcast_ee(struct drbd_conf *mdev,
+		const char *reason, const int dgs,
+		const char* seen_hash, const char* calc_hash,
+		const struct drbd_epoch_entry* e);
+
+
+/**
+ * DOC: DRBD State macros
+ *
+ * These macros are used to express state changes in easily readable form.
+ *
+ * The NS macros expand to a mask and a value, that can be bit ored onto the
+ * current state as soon as the spinlock (req_lock) was taken.
+ *
+ * The _NS macros are used for state functions that get called with the
+ * spinlock. These macros expand directly to the new state value.
+ *
+ * Besides the basic forms NS() and _NS() additional _?NS[23] are defined
+ * to express state changes that affect more than one aspect of the state.
+ *
+ * E.g. NS2(conn, C_CONNECTED, peer, R_SECONDARY)
+ * Means that the network connection was established and that the peer
+ * is in secondary role.
+ */
+#define role_MASK R_MASK
+#define peer_MASK R_MASK
+#define disk_MASK D_MASK
+#define pdsk_MASK D_MASK
+#define conn_MASK C_MASK
+#define susp_MASK 1
+#define user_isp_MASK 1
+#define aftr_isp_MASK 1
+#define susp_nod_MASK 1
+#define susp_fen_MASK 1
+
+#define NS(T, S) \
+	({ union drbd_state mask; mask.i = 0; mask.T = T##_MASK; mask; }), \
+	({ union drbd_state val; val.i = 0; val.T = (S); val; })
+#define NS2(T1, S1, T2, S2) \
+	({ union drbd_state mask; mask.i = 0; mask.T1 = T1##_MASK; \
+	  mask.T2 = T2##_MASK; mask; }), \
+	({ union drbd_state val; val.i = 0; val.T1 = (S1); \
+	  val.T2 = (S2); val; })
+#define NS3(T1, S1, T2, S2, T3, S3) \
+	({ union drbd_state mask; mask.i = 0; mask.T1 = T1##_MASK; \
+	  mask.T2 = T2##_MASK; mask.T3 = T3##_MASK; mask; }), \
+	({ union drbd_state val;  val.i = 0; val.T1 = (S1); \
+	  val.T2 = (S2); val.T3 = (S3); val; })
+
+#define _NS(D, T, S) \
+	D, ({ union drbd_state __ns; __ns.i = D->state.i; __ns.T = (S); __ns; })
+#define _NS2(D, T1, S1, T2, S2) \
+	D, ({ union drbd_state __ns; __ns.i = D->state.i; __ns.T1 = (S1); \
+	__ns.T2 = (S2); __ns; })
+#define _NS3(D, T1, S1, T2, S2, T3, S3) \
+	D, ({ union drbd_state __ns; __ns.i = D->state.i; __ns.T1 = (S1); \
+	__ns.T2 = (S2); __ns.T3 = (S3); __ns; })
+
+/*
+ * inline helper functions
+ *************************/
+
+/* see also page_chain_add and friends in drbd_receiver.c */
+static inline struct page *page_chain_next(struct page *page)
+{
+	return (struct page *)page_private(page);
+}
+#define page_chain_for_each(page) \
+	for (; page && ({ prefetch(page_chain_next(page)); 1; }); \
+			page = page_chain_next(page))
+#define page_chain_for_each_safe(page, n) \
+	for (; page && ({ n = page_chain_next(page); 1; }); page = n)
+
+static inline int drbd_bio_has_active_page(struct bio *bio)
+{
+	struct bio_vec *bvec;
+	int i;
+
+	__bio_for_each_segment(bvec, bio, i, 0) {
+		if (page_count(bvec->bv_page) > 1)
+			return 1;
+	}
+
+	return 0;
+}
+
+static inline int drbd_ee_has_active_page(struct drbd_epoch_entry *e)
+{
+	struct page *page = e->pages;
+	page_chain_for_each(page) {
+		if (page_count(page) > 1)
+			return 1;
+	}
+	return 0;
+}
+
+
+static inline void drbd_state_lock(struct drbd_conf *mdev)
+{
+	wait_event(mdev->misc_wait,
+		   !test_and_set_bit(CLUSTER_ST_CHANGE, &mdev->flags));
+}
+
+static inline void drbd_state_unlock(struct drbd_conf *mdev)
+{
+	clear_bit(CLUSTER_ST_CHANGE, &mdev->flags);
+	wake_up(&mdev->misc_wait);
+}
+
+static inline enum drbd_state_rv
+_drbd_set_state(struct drbd_conf *mdev, union drbd_state ns,
+		enum chg_state_flags flags, struct completion *done)
+{
+	enum drbd_state_rv rv;
+
+	read_lock(&global_state_lock);
+	rv = __drbd_set_state(mdev, ns, flags, done);
+	read_unlock(&global_state_lock);
+
+	return rv;
+}
+
+/**
+ * drbd_request_state() - Reqest a state change
+ * @mdev:	DRBD device.
+ * @mask:	mask of state bits to change.
+ * @val:	value of new state bits.
+ *
+ * This is the most graceful way of requesting a state change. It is verbose
+ * quite verbose in case the state change is not possible, and all those
+ * state changes are globally serialized.
+ */
+static inline int drbd_request_state(struct drbd_conf *mdev,
+				     union drbd_state mask,
+				     union drbd_state val)
+{
+	return _drbd_request_state(mdev, mask, val, CS_VERBOSE + CS_ORDERED);
+}
+
+#define __drbd_chk_io_error(m,f) __drbd_chk_io_error_(m,f, __func__)
+static inline void __drbd_chk_io_error_(struct drbd_conf *mdev, int forcedetach, const char *where)
+{
+	switch (mdev->ldev->dc.on_io_error) {
+	case EP_PASS_ON:
+		if (!forcedetach) {
+			if (__ratelimit(&drbd_ratelimit_state))
+				dev_err(DEV, "Local IO failed in %s.\n", where);
+			if (mdev->state.disk > D_INCONSISTENT)
+				_drbd_set_state(_NS(mdev, disk, D_INCONSISTENT), CS_HARD, NULL);
+			break;
+		}
+		/* NOTE fall through to detach case if forcedetach set */
+	case EP_DETACH:
+	case EP_CALL_HELPER:
+		set_bit(WAS_IO_ERROR, &mdev->flags);
+		if (mdev->state.disk > D_FAILED) {
+			_drbd_set_state(_NS(mdev, disk, D_FAILED), CS_HARD, NULL);
+			dev_err(DEV,
+				"Local IO failed in %s. Detaching...\n", where);
+		}
+		break;
+	}
+}
+
+/**
+ * drbd_chk_io_error: Handle the on_io_error setting, should be called from all io completion handlers
+ * @mdev:	 DRBD device.
+ * @error:	 Error code passed to the IO completion callback
+ * @forcedetach: Force detach. I.e. the error happened while accessing the meta data
+ *
+ * See also drbd_main.c:after_state_ch() if (os.disk > D_FAILED && ns.disk == D_FAILED)
+ */
+#define drbd_chk_io_error(m,e,f) drbd_chk_io_error_(m,e,f, __func__)
+static inline void drbd_chk_io_error_(struct drbd_conf *mdev,
+	int error, int forcedetach, const char *where)
+{
+	if (error) {
+		unsigned long flags;
+		spin_lock_irqsave(&mdev->req_lock, flags);
+		__drbd_chk_io_error_(mdev, forcedetach, where);
+		spin_unlock_irqrestore(&mdev->req_lock, flags);
+	}
+}
+
+
+/**
+ * drbd_md_first_sector() - Returns the first sector number of the meta data area
+ * @bdev:	Meta data block device.
+ *
+ * BTW, for internal meta data, this happens to be the maximum capacity
+ * we could agree upon with our peer node.
+ */
+static inline sector_t drbd_md_first_sector(struct drbd_backing_dev *bdev)
+{
+	switch (bdev->dc.meta_dev_idx) {
+	case DRBD_MD_INDEX_INTERNAL:
+	case DRBD_MD_INDEX_FLEX_INT:
+		return bdev->md.md_offset + bdev->md.bm_offset;
+	case DRBD_MD_INDEX_FLEX_EXT:
+	default:
+		return bdev->md.md_offset;
+	}
+}
+
+/**
+ * drbd_md_last_sector() - Return the last sector number of the meta data area
+ * @bdev:	Meta data block device.
+ */
+static inline sector_t drbd_md_last_sector(struct drbd_backing_dev *bdev)
+{
+	switch (bdev->dc.meta_dev_idx) {
+	case DRBD_MD_INDEX_INTERNAL:
+	case DRBD_MD_INDEX_FLEX_INT:
+		return bdev->md.md_offset + MD_AL_OFFSET - 1;
+	case DRBD_MD_INDEX_FLEX_EXT:
+	default:
+		return bdev->md.md_offset + bdev->md.md_size_sect;
+	}
+}
+
+/* Returns the number of 512 byte sectors of the device */
+static inline sector_t drbd_get_capacity(struct block_device *bdev)
+{
+	/* return bdev ? get_capacity(bdev->bd_disk) : 0; */
+	return bdev ? i_size_read(bdev->bd_inode) >> 9 : 0;
+}
+
+/**
+ * drbd_get_max_capacity() - Returns the capacity we announce to out peer
+ * @bdev:	Meta data block device.
+ *
+ * returns the capacity we announce to out peer.  we clip ourselves at the
+ * various MAX_SECTORS, because if we don't, current implementation will
+ * oops sooner or later
+ */
+static inline sector_t drbd_get_max_capacity(struct drbd_backing_dev *bdev)
+{
+	sector_t s;
+	switch (bdev->dc.meta_dev_idx) {
+	case DRBD_MD_INDEX_INTERNAL:
+	case DRBD_MD_INDEX_FLEX_INT:
+		s = drbd_get_capacity(bdev->backing_bdev)
+			? min_t(sector_t, DRBD_MAX_SECTORS_FLEX,
+					drbd_md_first_sector(bdev))
+			: 0;
+		break;
+	case DRBD_MD_INDEX_FLEX_EXT:
+		s = min_t(sector_t, DRBD_MAX_SECTORS_FLEX,
+				drbd_get_capacity(bdev->backing_bdev));
+		/* clip at maximum size the meta device can support */
+		s = min_t(sector_t, s,
+			BM_EXT_TO_SECT(bdev->md.md_size_sect
+				     - bdev->md.bm_offset));
+		break;
+	default:
+		s = min_t(sector_t, DRBD_MAX_SECTORS,
+				drbd_get_capacity(bdev->backing_bdev));
+	}
+	return s;
+}
+
+/**
+ * drbd_md_ss__() - Return the sector number of our meta data super block
+ * @mdev:	DRBD device.
+ * @bdev:	Meta data block device.
+ */
+static inline sector_t drbd_md_ss__(struct drbd_conf *mdev,
+				    struct drbd_backing_dev *bdev)
+{
+	switch (bdev->dc.meta_dev_idx) {
+	default: /* external, some index */
+		return MD_RESERVED_SECT * bdev->dc.meta_dev_idx;
+	case DRBD_MD_INDEX_INTERNAL:
+		/* with drbd08, internal meta data is always "flexible" */
+	case DRBD_MD_INDEX_FLEX_INT:
+		/* sizeof(struct md_on_disk_07) == 4k
+		 * position: last 4k aligned block of 4k size */
+		if (!bdev->backing_bdev) {
+			if (__ratelimit(&drbd_ratelimit_state)) {
+				dev_err(DEV, "bdev->backing_bdev==NULL\n");
+				dump_stack();
+			}
+			return 0;
+		}
+		return (drbd_get_capacity(bdev->backing_bdev) & ~7ULL)
+			- MD_AL_OFFSET;
+	case DRBD_MD_INDEX_FLEX_EXT:
+		return 0;
+	}
+}
+
+static inline void
+drbd_queue_work_front(struct drbd_work_queue *q, struct drbd_work *w)
+{
+	unsigned long flags;
+	spin_lock_irqsave(&q->q_lock, flags);
+	list_add(&w->list, &q->q);
+	up(&q->s); /* within the spinlock,
+		      see comment near end of drbd_worker() */
+	spin_unlock_irqrestore(&q->q_lock, flags);
+}
+
+static inline void
+drbd_queue_work(struct drbd_work_queue *q, struct drbd_work *w)
+{
+	unsigned long flags;
+	spin_lock_irqsave(&q->q_lock, flags);
+	list_add_tail(&w->list, &q->q);
+	up(&q->s); /* within the spinlock,
+		      see comment near end of drbd_worker() */
+	spin_unlock_irqrestore(&q->q_lock, flags);
+}
+
+static inline void wake_asender(struct drbd_conf *mdev)
+{
+	if (test_bit(SIGNAL_ASENDER, &mdev->flags))
+		force_sig(DRBD_SIG, mdev->asender.task);
+}
+
+static inline void request_ping(struct drbd_conf *mdev)
+{
+	set_bit(SEND_PING, &mdev->flags);
+	wake_asender(mdev);
+}
+
+static inline int drbd_send_short_cmd(struct drbd_conf *mdev,
+	enum drbd_packets cmd)
+{
+	struct p_header80 h;
+	return drbd_send_cmd(mdev, USE_DATA_SOCKET, cmd, &h, sizeof(h));
+}
+
+static inline int drbd_send_ping(struct drbd_conf *mdev)
+{
+	struct p_header80 h;
+	return drbd_send_cmd(mdev, USE_META_SOCKET, P_PING, &h, sizeof(h));
+}
+
+static inline int drbd_send_ping_ack(struct drbd_conf *mdev)
+{
+	struct p_header80 h;
+	return drbd_send_cmd(mdev, USE_META_SOCKET, P_PING_ACK, &h, sizeof(h));
+}
+
+static inline void drbd_thread_stop(struct drbd_thread *thi)
+{
+	_drbd_thread_stop(thi, false, true);
+}
+
+static inline void drbd_thread_stop_nowait(struct drbd_thread *thi)
+{
+	_drbd_thread_stop(thi, false, false);
+}
+
+static inline void drbd_thread_restart_nowait(struct drbd_thread *thi)
+{
+	_drbd_thread_stop(thi, true, false);
+}
+
+/* counts how many answer packets packets we expect from our peer,
+ * for either explicit application requests,
+ * or implicit barrier packets as necessary.
+ * increased:
+ *  w_send_barrier
+ *  _req_mod(req, queue_for_net_write or queue_for_net_read);
+ *    it is much easier and equally valid to count what we queue for the
+ *    worker, even before it actually was queued or send.
+ *    (drbd_make_request_common; recovery path on read io-error)
+ * decreased:
+ *  got_BarrierAck (respective tl_clear, tl_clear_barrier)
+ *  _req_mod(req, data_received)
+ *     [from receive_DataReply]
+ *  _req_mod(req, write_acked_by_peer or recv_acked_by_peer or neg_acked)
+ *     [from got_BlockAck (P_WRITE_ACK, P_RECV_ACK)]
+ *     for some reason it is NOT decreased in got_NegAck,
+ *     but in the resulting cleanup code from report_params.
+ *     we should try to remember the reason for that...
+ *  _req_mod(req, send_failed or send_canceled)
+ *  _req_mod(req, connection_lost_while_pending)
+ *     [from tl_clear_barrier]
+ */
+static inline void inc_ap_pending(struct drbd_conf *mdev)
+{
+	atomic_inc(&mdev->ap_pending_cnt);
+}
+
+#define ERR_IF_CNT_IS_NEGATIVE(which)				\
+	if (atomic_read(&mdev->which) < 0)			\
+		dev_err(DEV, "in %s:%d: " #which " = %d < 0 !\n",	\
+		    __func__ , __LINE__ ,			\
+		    atomic_read(&mdev->which))
+
+#define dec_ap_pending(mdev)	do {				\
+	typecheck(struct drbd_conf *, mdev);			\
+	if (atomic_dec_and_test(&mdev->ap_pending_cnt))		\
+		wake_up(&mdev->misc_wait);			\
+	ERR_IF_CNT_IS_NEGATIVE(ap_pending_cnt); } while (0)
+
+/* counts how many resync-related answers we still expect from the peer
+ *		     increase			decrease
+ * C_SYNC_TARGET sends P_RS_DATA_REQUEST (and expects P_RS_DATA_REPLY)
+ * C_SYNC_SOURCE sends P_RS_DATA_REPLY   (and expects P_WRITE_ACK with ID_SYNCER)
+ *					   (or P_NEG_ACK with ID_SYNCER)
+ */
+static inline void inc_rs_pending(struct drbd_conf *mdev)
+{
+	atomic_inc(&mdev->rs_pending_cnt);
+}
+
+#define dec_rs_pending(mdev)	do {				\
+	typecheck(struct drbd_conf *, mdev);			\
+	atomic_dec(&mdev->rs_pending_cnt);			\
+	ERR_IF_CNT_IS_NEGATIVE(rs_pending_cnt); } while (0)
+
+/* counts how many answers we still need to send to the peer.
+ * increased on
+ *  receive_Data	unless protocol A;
+ *			we need to send a P_RECV_ACK (proto B)
+ *			or P_WRITE_ACK (proto C)
+ *  receive_RSDataReply (recv_resync_read) we need to send a P_WRITE_ACK
+ *  receive_DataRequest (receive_RSDataRequest) we need to send back P_DATA
+ *  receive_Barrier_*	we need to send a P_BARRIER_ACK
+ */
+static inline void inc_unacked(struct drbd_conf *mdev)
+{
+	atomic_inc(&mdev->unacked_cnt);
+}
+
+#define dec_unacked(mdev)	do {				\
+	typecheck(struct drbd_conf *, mdev);			\
+	atomic_dec(&mdev->unacked_cnt);				\
+	ERR_IF_CNT_IS_NEGATIVE(unacked_cnt); } while (0)
+
+#define sub_unacked(mdev, n)	do {				\
+	typecheck(struct drbd_conf *, mdev);			\
+	atomic_sub(n, &mdev->unacked_cnt);			\
+	ERR_IF_CNT_IS_NEGATIVE(unacked_cnt); } while (0)
+
+
+static inline void put_net_conf(struct drbd_conf *mdev)
+{
+	if (atomic_dec_and_test(&mdev->net_cnt))
+		wake_up(&mdev->net_cnt_wait);
+}
+
+/**
+ * get_net_conf() - Increase ref count on mdev->net_conf; Returns 0 if nothing there
+ * @mdev:	DRBD device.
+ *
+ * You have to call put_net_conf() when finished working with mdev->net_conf.
+ */
+static inline int get_net_conf(struct drbd_conf *mdev)
+{
+	int have_net_conf;
+
+	atomic_inc(&mdev->net_cnt);
+	have_net_conf = mdev->state.conn >= C_UNCONNECTED;
+	if (!have_net_conf)
+		put_net_conf(mdev);
+	return have_net_conf;
+}
+
+/**
+ * get_ldev() - Increase the ref count on mdev->ldev. Returns 0 if there is no ldev
+ * @M:		DRBD device.
+ *
+ * You have to call put_ldev() when finished working with mdev->ldev.
+ */
+#define get_ldev(M) __cond_lock(local, _get_ldev_if_state(M,D_INCONSISTENT))
+#define get_ldev_if_state(M,MINS) __cond_lock(local, _get_ldev_if_state(M,MINS))
+
+static inline void put_ldev(struct drbd_conf *mdev)
+{
+	int i = atomic_dec_return(&mdev->local_cnt);
+
+	/* This may be called from some endio handler,
+	 * so we must not sleep here. */
+
+	__release(local);
+	D_ASSERT(i >= 0);
+	if (i == 0) {
+		if (mdev->state.disk == D_DISKLESS)
+			/* even internal references gone, safe to destroy */
+			drbd_ldev_destroy(mdev);
+		if (mdev->state.disk == D_FAILED)
+			/* all application IO references gone. */
+			drbd_go_diskless(mdev);
+		wake_up(&mdev->misc_wait);
+	}
+}
+
+#ifndef __CHECKER__
+static inline int _get_ldev_if_state(struct drbd_conf *mdev, enum drbd_disk_state mins)
+{
+	int io_allowed;
+
+	/* never get a reference while D_DISKLESS */
+	if (mdev->state.disk == D_DISKLESS)
+		return 0;
+
+	atomic_inc(&mdev->local_cnt);
+	io_allowed = (mdev->state.disk >= mins);
+	if (!io_allowed)
+		put_ldev(mdev);
+	return io_allowed;
+}
+#else
+extern int _get_ldev_if_state(struct drbd_conf *mdev, enum drbd_disk_state mins);
+#endif
+
+/* you must have an "get_ldev" reference */
+static inline void drbd_get_syncer_progress(struct drbd_conf *mdev,
+		unsigned long *bits_left, unsigned int *per_mil_done)
+{
+	/* this is to break it at compile time when we change that, in case we
+	 * want to support more than (1<<32) bits on a 32bit arch. */
+	typecheck(unsigned long, mdev->rs_total);
+
+	/* note: both rs_total and rs_left are in bits, i.e. in
+	 * units of BM_BLOCK_SIZE.
+	 * for the percentage, we don't care. */
+
+	if (mdev->state.conn == C_VERIFY_S || mdev->state.conn == C_VERIFY_T)
+		*bits_left = mdev->ov_left;
+	else
+		*bits_left = drbd_bm_total_weight(mdev) - mdev->rs_failed;
+	/* >> 10 to prevent overflow,
+	 * +1 to prevent division by zero */
+	if (*bits_left > mdev->rs_total) {
+		/* doh. maybe a logic bug somewhere.
+		 * may also be just a race condition
+		 * between this and a disconnect during sync.
+		 * for now, just prevent in-kernel buffer overflow.
+		 */
+		smp_rmb();
+		dev_warn(DEV, "cs:%s rs_left=%lu > rs_total=%lu (rs_failed %lu)\n",
+				drbd_conn_str(mdev->state.conn),
+				*bits_left, mdev->rs_total, mdev->rs_failed);
+		*per_mil_done = 0;
+	} else {
+		/* Make sure the division happens in long context.
+		 * We allow up to one petabyte storage right now,
+		 * at a granularity of 4k per bit that is 2**38 bits.
+		 * After shift right and multiplication by 1000,
+		 * this should still fit easily into a 32bit long,
+		 * so we don't need a 64bit division on 32bit arch.
+		 * Note: currently we don't support such large bitmaps on 32bit
+		 * arch anyways, but no harm done to be prepared for it here.
+		 */
+		unsigned int shift = mdev->rs_total >= (1ULL << 32) ? 16 : 10;
+		unsigned long left = *bits_left >> shift;
+		unsigned long total = 1UL + (mdev->rs_total >> shift);
+		unsigned long tmp = 1000UL - left * 1000UL/total;
+		*per_mil_done = tmp;
+	}
+}
+
+
+/* this throttles on-the-fly application requests
+ * according to max_buffers settings;
+ * maybe re-implement using semaphores? */
+static inline int drbd_get_max_buffers(struct drbd_conf *mdev)
+{
+	int mxb = 1000000; /* arbitrary limit on open requests */
+	if (get_net_conf(mdev)) {
+		mxb = mdev->net_conf->max_buffers;
+		put_net_conf(mdev);
+	}
+	return mxb;
+}
+
+static inline int drbd_state_is_stable(struct drbd_conf *mdev)
+{
+	union drbd_state s = mdev->state;
+
+	/* DO NOT add a default clause, we want the compiler to warn us
+	 * for any newly introduced state we may have forgotten to add here */
+
+	switch ((enum drbd_conns)s.conn) {
+	/* new io only accepted when there is no connection, ... */
+	case C_STANDALONE:
+	case C_WF_CONNECTION:
+	/* ... or there is a well established connection. */
+	case C_CONNECTED:
+	case C_SYNC_SOURCE:
+	case C_SYNC_TARGET:
+	case C_VERIFY_S:
+	case C_VERIFY_T:
+	case C_PAUSED_SYNC_S:
+	case C_PAUSED_SYNC_T:
+	case C_AHEAD:
+	case C_BEHIND:
+		/* transitional states, IO allowed */
+	case C_DISCONNECTING:
+	case C_UNCONNECTED:
+	case C_TIMEOUT:
+	case C_BROKEN_PIPE:
+	case C_NETWORK_FAILURE:
+	case C_PROTOCOL_ERROR:
+	case C_TEAR_DOWN:
+	case C_WF_REPORT_PARAMS:
+	case C_STARTING_SYNC_S:
+	case C_STARTING_SYNC_T:
+		break;
+
+		/* Allow IO in BM exchange states with new protocols */
+	case C_WF_BITMAP_S:
+		if (mdev->agreed_pro_version < 96)
+			return 0;
+		break;
+
+		/* no new io accepted in these states */
+	case C_WF_BITMAP_T:
+	case C_WF_SYNC_UUID:
+	case C_MASK:
+		/* not "stable" */
+		return 0;
+	}
+
+	switch ((enum drbd_disk_state)s.disk) {
+	case D_DISKLESS:
+	case D_INCONSISTENT:
+	case D_OUTDATED:
+	case D_CONSISTENT:
+	case D_UP_TO_DATE:
+		/* disk state is stable as well. */
+		break;
+
+	/* no new io accepted during tansitional states */
+	case D_ATTACHING:
+	case D_FAILED:
+	case D_NEGOTIATING:
+	case D_UNKNOWN:
+	case D_MASK:
+		/* not "stable" */
+		return 0;
+	}
+
+	return 1;
+}
+
+static inline int is_susp(union drbd_state s)
+{
+	return s.susp || s.susp_nod || s.susp_fen;
+}
+
+static inline bool may_inc_ap_bio(struct drbd_conf *mdev)
+{
+	int mxb = drbd_get_max_buffers(mdev);
+
+	if (is_susp(mdev->state))
+		return false;
+	if (test_bit(SUSPEND_IO, &mdev->flags))
+		return false;
+
+	/* to avoid potential deadlock or bitmap corruption,
+	 * in various places, we only allow new application io
+	 * to start during "stable" states. */
+
+	/* no new io accepted when attaching or detaching the disk */
+	if (!drbd_state_is_stable(mdev))
+		return false;
+
+	/* since some older kernels don't have atomic_add_unless,
+	 * and we are within the spinlock anyways, we have this workaround.  */
+	if (atomic_read(&mdev->ap_bio_cnt) > mxb)
+		return false;
+	if (test_bit(BITMAP_IO, &mdev->flags))
+		return false;
+	return true;
+}
+
+static inline bool inc_ap_bio_cond(struct drbd_conf *mdev, int count)
+{
+	bool rv = false;
+
+	spin_lock_irq(&mdev->req_lock);
+	rv = may_inc_ap_bio(mdev);
+	if (rv)
+		atomic_add(count, &mdev->ap_bio_cnt);
+	spin_unlock_irq(&mdev->req_lock);
+
+	return rv;
+}
+
+static inline void inc_ap_bio(struct drbd_conf *mdev, int count)
+{
+	/* we wait here
+	 *    as long as the device is suspended
+	 *    until the bitmap is no longer on the fly during connection
+	 *    handshake as long as we would exeed the max_buffer limit.
+	 *
+	 * to avoid races with the reconnect code,
+	 * we need to atomic_inc within the spinlock. */
+
+	wait_event(mdev->misc_wait, inc_ap_bio_cond(mdev, count));
+}
+
+static inline void dec_ap_bio(struct drbd_conf *mdev)
+{
+	int mxb = drbd_get_max_buffers(mdev);
+	int ap_bio = atomic_dec_return(&mdev->ap_bio_cnt);
+
+	D_ASSERT(ap_bio >= 0);
+	/* this currently does wake_up for every dec_ap_bio!
+	 * maybe rather introduce some type of hysteresis?
+	 * e.g. (ap_bio == mxb/2 || ap_bio == 0) ? */
+	if (ap_bio < mxb)
+		wake_up(&mdev->misc_wait);
+	if (ap_bio == 0 && test_bit(BITMAP_IO, &mdev->flags)) {
+		if (!test_and_set_bit(BITMAP_IO_QUEUED, &mdev->flags))
+			drbd_queue_work(&mdev->data.work, &mdev->bm_io_work.w);
+	}
+}
+
+static inline int drbd_set_ed_uuid(struct drbd_conf *mdev, u64 val)
+{
+	int changed = mdev->ed_uuid != val;
+	mdev->ed_uuid = val;
+	return changed;
+}
+
+static inline int seq_cmp(u32 a, u32 b)
+{
+	/* we assume wrap around at 32bit.
+	 * for wrap around at 24bit (old atomic_t),
+	 * we'd have to
+	 *  a <<= 8; b <<= 8;
+	 */
+	return (s32)(a) - (s32)(b);
+}
+#define seq_lt(a, b) (seq_cmp((a), (b)) < 0)
+#define seq_gt(a, b) (seq_cmp((a), (b)) > 0)
+#define seq_ge(a, b) (seq_cmp((a), (b)) >= 0)
+#define seq_le(a, b) (seq_cmp((a), (b)) <= 0)
+/* CAUTION: please no side effects in arguments! */
+#define seq_max(a, b) ((u32)(seq_gt((a), (b)) ? (a) : (b)))
+
+static inline void update_peer_seq(struct drbd_conf *mdev, unsigned int new_seq)
+{
+	unsigned int m;
+	spin_lock(&mdev->peer_seq_lock);
+	m = seq_max(mdev->peer_seq, new_seq);
+	mdev->peer_seq = m;
+	spin_unlock(&mdev->peer_seq_lock);
+	if (m == new_seq)
+		wake_up(&mdev->seq_wait);
+}
+
+static inline void drbd_update_congested(struct drbd_conf *mdev)
+{
+	struct sock *sk = mdev->data.socket->sk;
+	if (sk->sk_wmem_queued > sk->sk_sndbuf * 4 / 5)
+		set_bit(NET_CONGESTED, &mdev->flags);
+}
+
+static inline int drbd_queue_order_type(struct drbd_conf *mdev)
+{
+	/* sorry, we currently have no working implementation
+	 * of distributed TCQ stuff */
+#ifndef QUEUE_ORDERED_NONE
+#define QUEUE_ORDERED_NONE 0
+#endif
+	return QUEUE_ORDERED_NONE;
+}
+
+static inline void drbd_md_flush(struct drbd_conf *mdev)
+{
+	int r;
+
+	if (test_bit(MD_NO_FUA, &mdev->flags))
+		return;
+
+	r = blkdev_issue_flush(mdev->ldev->md_bdev, GFP_KERNEL, NULL);
+	if (r) {
+		set_bit(MD_NO_FUA, &mdev->flags);
+		dev_err(DEV, "meta data flush failed with status %d, disabling md-flushes\n", r);
+	}
+}
+
+#endif
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
new file mode 100644
index 00000000..0358e553
--- /dev/null
+++ b/drivers/block/drbd/drbd_main.c
@@ -0,0 +1,4203 @@
+/*
+   drbd.c
+
+   This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
+
+   Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
+   Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
+   Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
+
+   Thanks to Carter Burden, Bart Grantham and Gennadiy Nerubayev
+   from Logicworks, Inc. for making SDP replication support possible.
+
+   drbd is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2, or (at your option)
+   any later version.
+
+   drbd is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with drbd; see the file COPYING.  If not, write to
+   the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ */
+
+#include <linux/module.h>
+#include <linux/drbd.h>
+#include <asm/uaccess.h>
+#include <asm/types.h>
+#include <net/sock.h>
+#include <linux/ctype.h>
+#include <linux/mutex.h>
+#include <linux/fs.h>
+#include <linux/file.h>
+#include <linux/proc_fs.h>
+#include <linux/init.h>
+#include <linux/mm.h>
+#include <linux/memcontrol.h>
+#include <linux/mm_inline.h>
+#include <linux/slab.h>
+#include <linux/random.h>
+#include <linux/reboot.h>
+#include <linux/notifier.h>
+#include <linux/kthread.h>
+
+#define __KERNEL_SYSCALLS__
+#include <linux/unistd.h>
+#include <linux/vmalloc.h>
+
+#include <linux/drbd_limits.h>
+#include "drbd_int.h"
+#include "drbd_req.h" /* only for _req_mod in tl_release and tl_clear */
+
+#include "drbd_vli.h"
+
+struct after_state_chg_work {
+	struct drbd_work w;
+	union drbd_state os;
+	union drbd_state ns;
+	enum chg_state_flags flags;
+	struct completion *done;
+};
+
+static DEFINE_MUTEX(drbd_main_mutex);
+int drbdd_init(struct drbd_thread *);
+int drbd_worker(struct drbd_thread *);
+int drbd_asender(struct drbd_thread *);
+
+int drbd_init(void);
+static int drbd_open(struct block_device *bdev, fmode_t mode);
+static int drbd_release(struct gendisk *gd, fmode_t mode);
+static int w_after_state_ch(struct drbd_conf *mdev, struct drbd_work *w, int unused);
+static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
+			   union drbd_state ns, enum chg_state_flags flags);
+static int w_md_sync(struct drbd_conf *mdev, struct drbd_work *w, int unused);
+static void md_sync_timer_fn(unsigned long data);
+static int w_bitmap_io(struct drbd_conf *mdev, struct drbd_work *w, int unused);
+static int w_go_diskless(struct drbd_conf *mdev, struct drbd_work *w, int unused);
+
+MODULE_AUTHOR("Philipp Reisner <phil@linbit.com>, "
+	      "Lars Ellenberg <lars@linbit.com>");
+MODULE_DESCRIPTION("drbd - Distributed Replicated Block Device v" REL_VERSION);
+MODULE_VERSION(REL_VERSION);
+MODULE_LICENSE("GPL");
+MODULE_PARM_DESC(minor_count, "Maximum number of drbd devices ("
+		 __stringify(DRBD_MINOR_COUNT_MIN) "-" __stringify(DRBD_MINOR_COUNT_MAX) ")");
+MODULE_ALIAS_BLOCKDEV_MAJOR(DRBD_MAJOR);
+
+#include <linux/moduleparam.h>
+/* allow_open_on_secondary */
+MODULE_PARM_DESC(allow_oos, "DONT USE!");
+/* thanks to these macros, if compiled into the kernel (not-module),
+ * this becomes the boot parameter drbd.minor_count */
+module_param(minor_count, uint, 0444);
+module_param(disable_sendpage, bool, 0644);
+module_param(allow_oos, bool, 0);
+module_param(cn_idx, uint, 0444);
+module_param(proc_details, int, 0644);
+
+#ifdef CONFIG_DRBD_FAULT_INJECTION
+int enable_faults;
+int fault_rate;
+static int fault_count;
+int fault_devs;
+/* bitmap of enabled faults */
+module_param(enable_faults, int, 0664);
+/* fault rate % value - applies to all enabled faults */
+module_param(fault_rate, int, 0664);
+/* count of faults inserted */
+module_param(fault_count, int, 0664);
+/* bitmap of devices to insert faults on */
+module_param(fault_devs, int, 0644);
+#endif
+
+/* module parameter, defined */
+unsigned int minor_count = DRBD_MINOR_COUNT_DEF;
+int disable_sendpage;
+int allow_oos;
+unsigned int cn_idx = CN_IDX_DRBD;
+int proc_details;       /* Detail level in proc drbd*/
+
+/* Module parameter for setting the user mode helper program
+ * to run. Default is /sbin/drbdadm */
+char usermode_helper[80] = "/sbin/drbdadm";
+
+module_param_string(usermode_helper, usermode_helper, sizeof(usermode_helper), 0644);
+
+/* in 2.6.x, our device mapping and config info contains our virtual gendisks
+ * as member "struct gendisk *vdisk;"
+ */
+struct drbd_conf **minor_table;
+
+struct kmem_cache *drbd_request_cache;
+struct kmem_cache *drbd_ee_cache;	/* epoch entries */
+struct kmem_cache *drbd_bm_ext_cache;	/* bitmap extents */
+struct kmem_cache *drbd_al_ext_cache;	/* activity log extents */
+mempool_t *drbd_request_mempool;
+mempool_t *drbd_ee_mempool;
+
+/* I do not use a standard mempool, because:
+   1) I want to hand out the pre-allocated objects first.
+   2) I want to be able to interrupt sleeping allocation with a signal.
+   Note: This is a single linked list, the next pointer is the private
+	 member of struct page.
+ */
+struct page *drbd_pp_pool;
+spinlock_t   drbd_pp_lock;
+int          drbd_pp_vacant;
+wait_queue_head_t drbd_pp_wait;
+
+DEFINE_RATELIMIT_STATE(drbd_ratelimit_state, 5 * HZ, 5);
+
+static const struct block_device_operations drbd_ops = {
+	.owner =   THIS_MODULE,
+	.open =    drbd_open,
+	.release = drbd_release,
+};
+
+#define ARRY_SIZE(A) (sizeof(A)/sizeof(A[0]))
+
+#ifdef __CHECKER__
+/* When checking with sparse, and this is an inline function, sparse will
+   give tons of false positives. When this is a real functions sparse works.
+ */
+int _get_ldev_if_state(struct drbd_conf *mdev, enum drbd_disk_state mins)
+{
+	int io_allowed;
+
+	atomic_inc(&mdev->local_cnt);
+	io_allowed = (mdev->state.disk >= mins);
+	if (!io_allowed) {
+		if (atomic_dec_and_test(&mdev->local_cnt))
+			wake_up(&mdev->misc_wait);
+	}
+	return io_allowed;
+}
+
+#endif
+
+/**
+ * DOC: The transfer log
+ *
+ * The transfer log is a single linked list of &struct drbd_tl_epoch objects.
+ * mdev->newest_tle points to the head, mdev->oldest_tle points to the tail
+ * of the list. There is always at least one &struct drbd_tl_epoch object.
+ *
+ * Each &struct drbd_tl_epoch has a circular double linked list of requests
+ * attached.
+ */
+static int tl_init(struct drbd_conf *mdev)
+{
+	struct drbd_tl_epoch *b;
+
+	/* during device minor initialization, we may well use GFP_KERNEL */
+	b = kmalloc(sizeof(struct drbd_tl_epoch), GFP_KERNEL);
+	if (!b)
+		return 0;
+	INIT_LIST_HEAD(&b->requests);
+	INIT_LIST_HEAD(&b->w.list);
+	b->next = NULL;
+	b->br_number = 4711;
+	b->n_writes = 0;
+	b->w.cb = NULL; /* if this is != NULL, we need to dec_ap_pending in tl_clear */
+
+	mdev->oldest_tle = b;
+	mdev->newest_tle = b;
+	INIT_LIST_HEAD(&mdev->out_of_sequence_requests);
+
+	mdev->tl_hash = NULL;
+	mdev->tl_hash_s = 0;
+
+	return 1;
+}
+
+static void tl_cleanup(struct drbd_conf *mdev)
+{
+	D_ASSERT(mdev->oldest_tle == mdev->newest_tle);
+	D_ASSERT(list_empty(&mdev->out_of_sequence_requests));
+	kfree(mdev->oldest_tle);
+	mdev->oldest_tle = NULL;
+	kfree(mdev->unused_spare_tle);
+	mdev->unused_spare_tle = NULL;
+	kfree(mdev->tl_hash);
+	mdev->tl_hash = NULL;
+	mdev->tl_hash_s = 0;
+}
+
+/**
+ * _tl_add_barrier() - Adds a barrier to the transfer log
+ * @mdev:	DRBD device.
+ * @new:	Barrier to be added before the current head of the TL.
+ *
+ * The caller must hold the req_lock.
+ */
+void _tl_add_barrier(struct drbd_conf *mdev, struct drbd_tl_epoch *new)
+{
+	struct drbd_tl_epoch *newest_before;
+
+	INIT_LIST_HEAD(&new->requests);
+	INIT_LIST_HEAD(&new->w.list);
+	new->w.cb = NULL; /* if this is != NULL, we need to dec_ap_pending in tl_clear */
+	new->next = NULL;
+	new->n_writes = 0;
+
+	newest_before = mdev->newest_tle;
+	/* never send a barrier number == 0, because that is special-cased
+	 * when using TCQ for our write ordering code */
+	new->br_number = (newest_before->br_number+1) ?: 1;
+	if (mdev->newest_tle != new) {
+		mdev->newest_tle->next = new;
+		mdev->newest_tle = new;
+	}
+}
+
+/**
+ * tl_release() - Free or recycle the oldest &struct drbd_tl_epoch object of the TL
+ * @mdev:	DRBD device.
+ * @barrier_nr:	Expected identifier of the DRBD write barrier packet.
+ * @set_size:	Expected number of requests before that barrier.
+ *
+ * In case the passed barrier_nr or set_size does not match the oldest
+ * &struct drbd_tl_epoch objects this function will cause a termination
+ * of the connection.
+ */
+void tl_release(struct drbd_conf *mdev, unsigned int barrier_nr,
+		       unsigned int set_size)
+{
+	struct drbd_tl_epoch *b, *nob; /* next old barrier */
+	struct list_head *le, *tle;
+	struct drbd_request *r;
+
+	spin_lock_irq(&mdev->req_lock);
+
+	b = mdev->oldest_tle;
+
+	/* first some paranoia code */
+	if (b == NULL) {
+		dev_err(DEV, "BAD! BarrierAck #%u received, but no epoch in tl!?\n",
+			barrier_nr);
+		goto bail;
+	}
+	if (b->br_number != barrier_nr) {
+		dev_err(DEV, "BAD! BarrierAck #%u received, expected #%u!\n",
+			barrier_nr, b->br_number);
+		goto bail;
+	}
+	if (b->n_writes != set_size) {
+		dev_err(DEV, "BAD! BarrierAck #%u received with n_writes=%u, expected n_writes=%u!\n",
+			barrier_nr, set_size, b->n_writes);
+		goto bail;
+	}
+
+	/* Clean up list of requests processed during current epoch */
+	list_for_each_safe(le, tle, &b->requests) {
+		r = list_entry(le, struct drbd_request, tl_requests);
+		_req_mod(r, barrier_acked);
+	}
+	/* There could be requests on the list waiting for completion
+	   of the write to the local disk. To avoid corruptions of
+	   slab's data structures we have to remove the lists head.
+
+	   Also there could have been a barrier ack out of sequence, overtaking
+	   the write acks - which would be a bug and violating write ordering.
+	   To not deadlock in case we lose connection while such requests are
+	   still pending, we need some way to find them for the
+	   _req_mode(connection_lost_while_pending).
+
+	   These have been list_move'd to the out_of_sequence_requests list in
+	   _req_mod(, barrier_acked) above.
+	   */
+	list_del_init(&b->requests);
+
+	nob = b->next;
+	if (test_and_clear_bit(CREATE_BARRIER, &mdev->flags)) {
+		_tl_add_barrier(mdev, b);
+		if (nob)
+			mdev->oldest_tle = nob;
+		/* if nob == NULL b was the only barrier, and becomes the new
+		   barrier. Therefore mdev->oldest_tle points already to b */
+	} else {
+		D_ASSERT(nob != NULL);
+		mdev->oldest_tle = nob;
+		kfree(b);
+	}
+
+	spin_unlock_irq(&mdev->req_lock);
+	dec_ap_pending(mdev);
+
+	return;
+
+bail:
+	spin_unlock_irq(&mdev->req_lock);
+	drbd_force_state(mdev, NS(conn, C_PROTOCOL_ERROR));
+}
+
+
+/**
+ * _tl_restart() - Walks the transfer log, and applies an action to all requests
+ * @mdev:	DRBD device.
+ * @what:       The action/event to perform with all request objects
+ *
+ * @what might be one of connection_lost_while_pending, resend, fail_frozen_disk_io,
+ * restart_frozen_disk_io.
+ */
+static void _tl_restart(struct drbd_conf *mdev, enum drbd_req_event what)
+{
+	struct drbd_tl_epoch *b, *tmp, **pn;
+	struct list_head *le, *tle, carry_reads;
+	struct drbd_request *req;
+	int rv, n_writes, n_reads;
+
+	b = mdev->oldest_tle;
+	pn = &mdev->oldest_tle;
+	while (b) {
+		n_writes = 0;
+		n_reads = 0;
+		INIT_LIST_HEAD(&carry_reads);
+		list_for_each_safe(le, tle, &b->requests) {
+			req = list_entry(le, struct drbd_request, tl_requests);
+			rv = _req_mod(req, what);
+
+			n_writes += (rv & MR_WRITE) >> MR_WRITE_SHIFT;
+			n_reads  += (rv & MR_READ) >> MR_READ_SHIFT;
+		}
+		tmp = b->next;
+
+		if (n_writes) {
+			if (what == resend) {
+				b->n_writes = n_writes;
+				if (b->w.cb == NULL) {
+					b->w.cb = w_send_barrier;
+					inc_ap_pending(mdev);
+					set_bit(CREATE_BARRIER, &mdev->flags);
+				}
+
+				drbd_queue_work(&mdev->data.work, &b->w);
+			}
+			pn = &b->next;
+		} else {
+			if (n_reads)
+				list_add(&carry_reads, &b->requests);
+			/* there could still be requests on that ring list,
+			 * in case local io is still pending */
+			list_del(&b->requests);
+
+			/* dec_ap_pending corresponding to queue_barrier.
+			 * the newest barrier may not have been queued yet,
+			 * in which case w.cb is still NULL. */
+			if (b->w.cb != NULL)
+				dec_ap_pending(mdev);
+
+			if (b == mdev->newest_tle) {
+				/* recycle, but reinit! */
+				D_ASSERT(tmp == NULL);
+				INIT_LIST_HEAD(&b->requests);
+				list_splice(&carry_reads, &b->requests);
+				INIT_LIST_HEAD(&b->w.list);
+				b->w.cb = NULL;
+				b->br_number = net_random();
+				b->n_writes = 0;
+
+				*pn = b;
+				break;
+			}
+			*pn = tmp;
+			kfree(b);
+		}
+		b = tmp;
+		list_splice(&carry_reads, &b->requests);
+	}
+}
+
+
+/**
+ * tl_clear() - Clears all requests and &struct drbd_tl_epoch objects out of the TL
+ * @mdev:	DRBD device.
+ *
+ * This is called after the connection to the peer was lost. The storage covered
+ * by the requests on the transfer gets marked as our of sync. Called from the
+ * receiver thread and the worker thread.
+ */
+void tl_clear(struct drbd_conf *mdev)
+{
+	struct list_head *le, *tle;
+	struct drbd_request *r;
+
+	spin_lock_irq(&mdev->req_lock);
+
+	_tl_restart(mdev, connection_lost_while_pending);
+
+	/* we expect this list to be empty. */
+	D_ASSERT(list_empty(&mdev->out_of_sequence_requests));
+
+	/* but just in case, clean it up anyways! */
+	list_for_each_safe(le, tle, &mdev->out_of_sequence_requests) {
+		r = list_entry(le, struct drbd_request, tl_requests);
+		/* It would be nice to complete outside of spinlock.
+		 * But this is easier for now. */
+		_req_mod(r, connection_lost_while_pending);
+	}
+
+	/* ensure bit indicating barrier is required is clear */
+	clear_bit(CREATE_BARRIER, &mdev->flags);
+
+	memset(mdev->app_reads_hash, 0, APP_R_HSIZE*sizeof(void *));
+
+	spin_unlock_irq(&mdev->req_lock);
+}
+
+void tl_restart(struct drbd_conf *mdev, enum drbd_req_event what)
+{
+	spin_lock_irq(&mdev->req_lock);
+	_tl_restart(mdev, what);
+	spin_unlock_irq(&mdev->req_lock);
+}
+
+/**
+ * cl_wide_st_chg() - true if the state change is a cluster wide one
+ * @mdev:	DRBD device.
+ * @os:		old (current) state.
+ * @ns:		new (wanted) state.
+ */
+static int cl_wide_st_chg(struct drbd_conf *mdev,
+			  union drbd_state os, union drbd_state ns)
+{
+	return (os.conn >= C_CONNECTED && ns.conn >= C_CONNECTED &&
+		 ((os.role != R_PRIMARY && ns.role == R_PRIMARY) ||
+		  (os.conn != C_STARTING_SYNC_T && ns.conn == C_STARTING_SYNC_T) ||
+		  (os.conn != C_STARTING_SYNC_S && ns.conn == C_STARTING_SYNC_S) ||
+		  (os.disk != D_DISKLESS && ns.disk == D_DISKLESS))) ||
+		(os.conn >= C_CONNECTED && ns.conn == C_DISCONNECTING) ||
+		(os.conn == C_CONNECTED && ns.conn == C_VERIFY_S);
+}
+
+enum drbd_state_rv
+drbd_change_state(struct drbd_conf *mdev, enum chg_state_flags f,
+		  union drbd_state mask, union drbd_state val)
+{
+	unsigned long flags;
+	union drbd_state os, ns;
+	enum drbd_state_rv rv;
+
+	spin_lock_irqsave(&mdev->req_lock, flags);
+	os = mdev->state;
+	ns.i = (os.i & ~mask.i) | val.i;
+	rv = _drbd_set_state(mdev, ns, f, NULL);
+	ns = mdev->state;
+	spin_unlock_irqrestore(&mdev->req_lock, flags);
+
+	return rv;
+}
+
+/**
+ * drbd_force_state() - Impose a change which happens outside our control on our state
+ * @mdev:	DRBD device.
+ * @mask:	mask of state bits to change.
+ * @val:	value of new state bits.
+ */
+void drbd_force_state(struct drbd_conf *mdev,
+	union drbd_state mask, union drbd_state val)
+{
+	drbd_change_state(mdev, CS_HARD, mask, val);
+}
+
+static enum drbd_state_rv is_valid_state(struct drbd_conf *, union drbd_state);
+static enum drbd_state_rv is_valid_state_transition(struct drbd_conf *,
+						    union drbd_state,
+						    union drbd_state);
+static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state os,
+				       union drbd_state ns, const char **warn_sync_abort);
+int drbd_send_state_req(struct drbd_conf *,
+			union drbd_state, union drbd_state);
+
+static enum drbd_state_rv
+_req_st_cond(struct drbd_conf *mdev, union drbd_state mask,
+	     union drbd_state val)
+{
+	union drbd_state os, ns;
+	unsigned long flags;
+	enum drbd_state_rv rv;
+
+	if (test_and_clear_bit(CL_ST_CHG_SUCCESS, &mdev->flags))
+		return SS_CW_SUCCESS;
+
+	if (test_and_clear_bit(CL_ST_CHG_FAIL, &mdev->flags))
+		return SS_CW_FAILED_BY_PEER;
+
+	rv = 0;
+	spin_lock_irqsave(&mdev->req_lock, flags);
+	os = mdev->state;
+	ns.i = (os.i & ~mask.i) | val.i;
+	ns = sanitize_state(mdev, os, ns, NULL);
+
+	if (!cl_wide_st_chg(mdev, os, ns))
+		rv = SS_CW_NO_NEED;
+	if (!rv) {
+		rv = is_valid_state(mdev, ns);
+		if (rv == SS_SUCCESS) {
+			rv = is_valid_state_transition(mdev, ns, os);
+			if (rv == SS_SUCCESS)
+				rv = SS_UNKNOWN_ERROR; /* cont waiting, otherwise fail. */
+		}
+	}
+	spin_unlock_irqrestore(&mdev->req_lock, flags);
+
+	return rv;
+}
+
+/**
+ * drbd_req_state() - Perform an eventually cluster wide state change
+ * @mdev:	DRBD device.
+ * @mask:	mask of state bits to change.
+ * @val:	value of new state bits.
+ * @f:		flags
+ *
+ * Should not be called directly, use drbd_request_state() or
+ * _drbd_request_state().
+ */
+static enum drbd_state_rv
+drbd_req_state(struct drbd_conf *mdev, union drbd_state mask,
+	       union drbd_state val, enum chg_state_flags f)
+{
+	struct completion done;
+	unsigned long flags;
+	union drbd_state os, ns;
+	enum drbd_state_rv rv;
+
+	init_completion(&done);
+
+	if (f & CS_SERIALIZE)
+		mutex_lock(&mdev->state_mutex);
+
+	spin_lock_irqsave(&mdev->req_lock, flags);
+	os = mdev->state;
+	ns.i = (os.i & ~mask.i) | val.i;
+	ns = sanitize_state(mdev, os, ns, NULL);
+
+	if (cl_wide_st_chg(mdev, os, ns)) {
+		rv = is_valid_state(mdev, ns);
+		if (rv == SS_SUCCESS)
+			rv = is_valid_state_transition(mdev, ns, os);
+		spin_unlock_irqrestore(&mdev->req_lock, flags);
+
+		if (rv < SS_SUCCESS) {
+			if (f & CS_VERBOSE)
+				print_st_err(mdev, os, ns, rv);
+			goto abort;
+		}
+
+		drbd_state_lock(mdev);
+		if (!drbd_send_state_req(mdev, mask, val)) {
+			drbd_state_unlock(mdev);
+			rv = SS_CW_FAILED_BY_PEER;
+			if (f & CS_VERBOSE)
+				print_st_err(mdev, os, ns, rv);
+			goto abort;
+		}
+
+		wait_event(mdev->state_wait,
+			(rv = _req_st_cond(mdev, mask, val)));
+
+		if (rv < SS_SUCCESS) {
+			drbd_state_unlock(mdev);
+			if (f & CS_VERBOSE)
+				print_st_err(mdev, os, ns, rv);
+			goto abort;
+		}
+		spin_lock_irqsave(&mdev->req_lock, flags);
+		os = mdev->state;
+		ns.i = (os.i & ~mask.i) | val.i;
+		rv = _drbd_set_state(mdev, ns, f, &done);
+		drbd_state_unlock(mdev);
+	} else {
+		rv = _drbd_set_state(mdev, ns, f, &done);
+	}
+
+	spin_unlock_irqrestore(&mdev->req_lock, flags);
+
+	if (f & CS_WAIT_COMPLETE && rv == SS_SUCCESS) {
+		D_ASSERT(current != mdev->worker.task);
+		wait_for_completion(&done);
+	}
+
+abort:
+	if (f & CS_SERIALIZE)
+		mutex_unlock(&mdev->state_mutex);
+
+	return rv;
+}
+
+/**
+ * _drbd_request_state() - Request a state change (with flags)
+ * @mdev:	DRBD device.
+ * @mask:	mask of state bits to change.
+ * @val:	value of new state bits.
+ * @f:		flags
+ *
+ * Cousin of drbd_request_state(), useful with the CS_WAIT_COMPLETE
+ * flag, or when logging of failed state change requests is not desired.
+ */
+enum drbd_state_rv
+_drbd_request_state(struct drbd_conf *mdev, union drbd_state mask,
+		    union drbd_state val, enum chg_state_flags f)
+{
+	enum drbd_state_rv rv;
+
+	wait_event(mdev->state_wait,
+		   (rv = drbd_req_state(mdev, mask, val, f)) != SS_IN_TRANSIENT_STATE);
+
+	return rv;
+}
+
+static void print_st(struct drbd_conf *mdev, char *name, union drbd_state ns)
+{
+	dev_err(DEV, " %s = { cs:%s ro:%s/%s ds:%s/%s %c%c%c%c }\n",
+	    name,
+	    drbd_conn_str(ns.conn),
+	    drbd_role_str(ns.role),
+	    drbd_role_str(ns.peer),
+	    drbd_disk_str(ns.disk),
+	    drbd_disk_str(ns.pdsk),
+	    is_susp(ns) ? 's' : 'r',
+	    ns.aftr_isp ? 'a' : '-',
+	    ns.peer_isp ? 'p' : '-',
+	    ns.user_isp ? 'u' : '-'
+	    );
+}
+
+void print_st_err(struct drbd_conf *mdev, union drbd_state os,
+	          union drbd_state ns, enum drbd_state_rv err)
+{
+	if (err == SS_IN_TRANSIENT_STATE)
+		return;
+	dev_err(DEV, "State change failed: %s\n", drbd_set_st_err_str(err));
+	print_st(mdev, " state", os);
+	print_st(mdev, "wanted", ns);
+}
+
+
+/**
+ * is_valid_state() - Returns an SS_ error code if ns is not valid
+ * @mdev:	DRBD device.
+ * @ns:		State to consider.
+ */
+static enum drbd_state_rv
+is_valid_state(struct drbd_conf *mdev, union drbd_state ns)
+{
+	/* See drbd_state_sw_errors in drbd_strings.c */
+
+	enum drbd_fencing_p fp;
+	enum drbd_state_rv rv = SS_SUCCESS;
+
+	fp = FP_DONT_CARE;
+	if (get_ldev(mdev)) {
+		fp = mdev->ldev->dc.fencing;
+		put_ldev(mdev);
+	}
+
+	if (get_net_conf(mdev)) {
+		if (!mdev->net_conf->two_primaries &&
+		    ns.role == R_PRIMARY && ns.peer == R_PRIMARY)
+			rv = SS_TWO_PRIMARIES;
+		put_net_conf(mdev);
+	}
+
+	if (rv <= 0)
+		/* already found a reason to abort */;
+	else if (ns.role == R_SECONDARY && mdev->open_cnt)
+		rv = SS_DEVICE_IN_USE;
+
+	else if (ns.role == R_PRIMARY && ns.conn < C_CONNECTED && ns.disk < D_UP_TO_DATE)
+		rv = SS_NO_UP_TO_DATE_DISK;
+
+	else if (fp >= FP_RESOURCE &&
+		 ns.role == R_PRIMARY && ns.conn < C_CONNECTED && ns.pdsk >= D_UNKNOWN)
+		rv = SS_PRIMARY_NOP;
+
+	else if (ns.role == R_PRIMARY && ns.disk <= D_INCONSISTENT && ns.pdsk <= D_INCONSISTENT)
+		rv = SS_NO_UP_TO_DATE_DISK;
+
+	else if (ns.conn > C_CONNECTED && ns.disk < D_INCONSISTENT)
+		rv = SS_NO_LOCAL_DISK;
+
+	else if (ns.conn > C_CONNECTED && ns.pdsk < D_INCONSISTENT)
+		rv = SS_NO_REMOTE_DISK;
+
+	else if (ns.conn > C_CONNECTED && ns.disk < D_UP_TO_DATE && ns.pdsk < D_UP_TO_DATE)
+		rv = SS_NO_UP_TO_DATE_DISK;
+
+	else if ((ns.conn == C_CONNECTED ||
+		  ns.conn == C_WF_BITMAP_S ||
+		  ns.conn == C_SYNC_SOURCE ||
+		  ns.conn == C_PAUSED_SYNC_S) &&
+		  ns.disk == D_OUTDATED)
+		rv = SS_CONNECTED_OUTDATES;
+
+	else if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) &&
+		 (mdev->sync_conf.verify_alg[0] == 0))
+		rv = SS_NO_VERIFY_ALG;
+
+	else if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) &&
+		  mdev->agreed_pro_version < 88)
+		rv = SS_NOT_SUPPORTED;
+
+	else if (ns.conn >= C_CONNECTED && ns.pdsk == D_UNKNOWN)
+		rv = SS_CONNECTED_OUTDATES;
+
+	return rv;
+}
+
+/**
+ * is_valid_state_transition() - Returns an SS_ error code if the state transition is not possible
+ * @mdev:	DRBD device.
+ * @ns:		new state.
+ * @os:		old state.
+ */
+static enum drbd_state_rv
+is_valid_state_transition(struct drbd_conf *mdev, union drbd_state ns,
+			  union drbd_state os)
+{
+	enum drbd_state_rv rv = SS_SUCCESS;
+
+	if ((ns.conn == C_STARTING_SYNC_T || ns.conn == C_STARTING_SYNC_S) &&
+	    os.conn > C_CONNECTED)
+		rv = SS_RESYNC_RUNNING;
+
+	if (ns.conn == C_DISCONNECTING && os.conn == C_STANDALONE)
+		rv = SS_ALREADY_STANDALONE;
+
+	if (ns.disk > D_ATTACHING && os.disk == D_DISKLESS)
+		rv = SS_IS_DISKLESS;
+
+	if (ns.conn == C_WF_CONNECTION && os.conn < C_UNCONNECTED)
+		rv = SS_NO_NET_CONFIG;
+
+	if (ns.disk == D_OUTDATED && os.disk < D_OUTDATED && os.disk != D_ATTACHING)
+		rv = SS_LOWER_THAN_OUTDATED;
+
+	if (ns.conn == C_DISCONNECTING && os.conn == C_UNCONNECTED)
+		rv = SS_IN_TRANSIENT_STATE;
+
+	if (ns.conn == os.conn && ns.conn == C_WF_REPORT_PARAMS)
+		rv = SS_IN_TRANSIENT_STATE;
+
+	if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) && os.conn < C_CONNECTED)
+		rv = SS_NEED_CONNECTION;
+
+	if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) &&
+	    ns.conn != os.conn && os.conn > C_CONNECTED)
+		rv = SS_RESYNC_RUNNING;
+
+	if ((ns.conn == C_STARTING_SYNC_S || ns.conn == C_STARTING_SYNC_T) &&
+	    os.conn < C_CONNECTED)
+		rv = SS_NEED_CONNECTION;
+
+	if ((ns.conn == C_SYNC_TARGET || ns.conn == C_SYNC_SOURCE)
+	    && os.conn < C_WF_REPORT_PARAMS)
+		rv = SS_NEED_CONNECTION; /* No NetworkFailure -> SyncTarget etc... */
+
+	return rv;
+}
+
+/**
+ * sanitize_state() - Resolves implicitly necessary additional changes to a state transition
+ * @mdev:	DRBD device.
+ * @os:		old state.
+ * @ns:		new state.
+ * @warn_sync_abort:
+ *
+ * When we loose connection, we have to set the state of the peers disk (pdsk)
+ * to D_UNKNOWN. This rule and many more along those lines are in this function.
+ */
+static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state os,
+				       union drbd_state ns, const char **warn_sync_abort)
+{
+	enum drbd_fencing_p fp;
+	enum drbd_disk_state disk_min, disk_max, pdsk_min, pdsk_max;
+
+	fp = FP_DONT_CARE;
+	if (get_ldev(mdev)) {
+		fp = mdev->ldev->dc.fencing;
+		put_ldev(mdev);
+	}
+
+	/* Disallow Network errors to configure a device's network part */
+	if ((ns.conn >= C_TIMEOUT && ns.conn <= C_TEAR_DOWN) &&
+	    os.conn <= C_DISCONNECTING)
+		ns.conn = os.conn;
+
+	/* After a network error (+C_TEAR_DOWN) only C_UNCONNECTED or C_DISCONNECTING can follow.
+	 * If you try to go into some Sync* state, that shall fail (elsewhere). */
+	if (os.conn >= C_TIMEOUT && os.conn <= C_TEAR_DOWN &&
+	    ns.conn != C_UNCONNECTED && ns.conn != C_DISCONNECTING && ns.conn <= C_TEAR_DOWN)
+		ns.conn = os.conn;
+
+	/* we cannot fail (again) if we already detached */
+	if (ns.disk == D_FAILED && os.disk == D_DISKLESS)
+		ns.disk = D_DISKLESS;
+
+	/* if we are only D_ATTACHING yet,
+	 * we can (and should) go directly to D_DISKLESS. */
+	if (ns.disk == D_FAILED && os.disk == D_ATTACHING)
+		ns.disk = D_DISKLESS;
+
+	/* After C_DISCONNECTING only C_STANDALONE may follow */
+	if (os.conn == C_DISCONNECTING && ns.conn != C_STANDALONE)
+		ns.conn = os.conn;
+
+	if (ns.conn < C_CONNECTED) {
+		ns.peer_isp = 0;
+		ns.peer = R_UNKNOWN;
+		if (ns.pdsk > D_UNKNOWN || ns.pdsk < D_INCONSISTENT)
+			ns.pdsk = D_UNKNOWN;
+	}
+
+	/* Clear the aftr_isp when becoming unconfigured */
+	if (ns.conn == C_STANDALONE && ns.disk == D_DISKLESS && ns.role == R_SECONDARY)
+		ns.aftr_isp = 0;
+
+	/* Abort resync if a disk fails/detaches */
+	if (os.conn > C_CONNECTED && ns.conn > C_CONNECTED &&
+	    (ns.disk <= D_FAILED || ns.pdsk <= D_FAILED)) {
+		if (warn_sync_abort)
+			*warn_sync_abort =
+				os.conn == C_VERIFY_S || os.conn == C_VERIFY_T ?
+				"Online-verify" : "Resync";
+		ns.conn = C_CONNECTED;
+	}
+
+	/* Connection breaks down before we finished "Negotiating" */
+	if (ns.conn < C_CONNECTED && ns.disk == D_NEGOTIATING &&
+	    get_ldev_if_state(mdev, D_NEGOTIATING)) {
+		if (mdev->ed_uuid == mdev->ldev->md.uuid[UI_CURRENT]) {
+			ns.disk = mdev->new_state_tmp.disk;
+			ns.pdsk = mdev->new_state_tmp.pdsk;
+		} else {
+			dev_alert(DEV, "Connection lost while negotiating, no data!\n");
+			ns.disk = D_DISKLESS;
+			ns.pdsk = D_UNKNOWN;
+		}
+		put_ldev(mdev);
+	}
+
+	/* D_CONSISTENT and D_OUTDATED vanish when we get connected */
+	if (ns.conn >= C_CONNECTED && ns.conn < C_AHEAD) {
+		if (ns.disk == D_CONSISTENT || ns.disk == D_OUTDATED)
+			ns.disk = D_UP_TO_DATE;
+		if (ns.pdsk == D_CONSISTENT || ns.pdsk == D_OUTDATED)
+			ns.pdsk = D_UP_TO_DATE;
+	}
+
+	/* Implications of the connection stat on the disk states */
+	disk_min = D_DISKLESS;
+	disk_max = D_UP_TO_DATE;
+	pdsk_min = D_INCONSISTENT;
+	pdsk_max = D_UNKNOWN;
+	switch ((enum drbd_conns)ns.conn) {
+	case C_WF_BITMAP_T:
+	case C_PAUSED_SYNC_T:
+	case C_STARTING_SYNC_T:
+	case C_WF_SYNC_UUID:
+	case C_BEHIND:
+		disk_min = D_INCONSISTENT;
+		disk_max = D_OUTDATED;
+		pdsk_min = D_UP_TO_DATE;
+		pdsk_max = D_UP_TO_DATE;
+		break;
+	case C_VERIFY_S:
+	case C_VERIFY_T:
+		disk_min = D_UP_TO_DATE;
+		disk_max = D_UP_TO_DATE;
+		pdsk_min = D_UP_TO_DATE;
+		pdsk_max = D_UP_TO_DATE;
+		break;
+	case C_CONNECTED:
+		disk_min = D_DISKLESS;
+		disk_max = D_UP_TO_DATE;
+		pdsk_min = D_DISKLESS;
+		pdsk_max = D_UP_TO_DATE;
+		break;
+	case C_WF_BITMAP_S:
+	case C_PAUSED_SYNC_S:
+	case C_STARTING_SYNC_S:
+	case C_AHEAD:
+		disk_min = D_UP_TO_DATE;
+		disk_max = D_UP_TO_DATE;
+		pdsk_min = D_INCONSISTENT;
+		pdsk_max = D_CONSISTENT; /* D_OUTDATED would be nice. But explicit outdate necessary*/
+		break;
+	case C_SYNC_TARGET:
+		disk_min = D_INCONSISTENT;
+		disk_max = D_INCONSISTENT;
+		pdsk_min = D_UP_TO_DATE;
+		pdsk_max = D_UP_TO_DATE;
+		break;
+	case C_SYNC_SOURCE:
+		disk_min = D_UP_TO_DATE;
+		disk_max = D_UP_TO_DATE;
+		pdsk_min = D_INCONSISTENT;
+		pdsk_max = D_INCONSISTENT;
+		break;
+	case C_STANDALONE:
+	case C_DISCONNECTING:
+	case C_UNCONNECTED:
+	case C_TIMEOUT:
+	case C_BROKEN_PIPE:
+	case C_NETWORK_FAILURE:
+	case C_PROTOCOL_ERROR:
+	case C_TEAR_DOWN:
+	case C_WF_CONNECTION:
+	case C_WF_REPORT_PARAMS:
+	case C_MASK:
+		break;
+	}
+	if (ns.disk > disk_max)
+		ns.disk = disk_max;
+
+	if (ns.disk < disk_min) {
+		dev_warn(DEV, "Implicitly set disk from %s to %s\n",
+			 drbd_disk_str(ns.disk), drbd_disk_str(disk_min));
+		ns.disk = disk_min;
+	}
+	if (ns.pdsk > pdsk_max)
+		ns.pdsk = pdsk_max;
+
+	if (ns.pdsk < pdsk_min) {
+		dev_warn(DEV, "Implicitly set pdsk from %s to %s\n",
+			 drbd_disk_str(ns.pdsk), drbd_disk_str(pdsk_min));
+		ns.pdsk = pdsk_min;
+	}
+
+	if (fp == FP_STONITH &&
+	    (ns.role == R_PRIMARY && ns.conn < C_CONNECTED && ns.pdsk > D_OUTDATED) &&
+	    !(os.role == R_PRIMARY && os.conn < C_CONNECTED && os.pdsk > D_OUTDATED))
+		ns.susp_fen = 1; /* Suspend IO while fence-peer handler runs (peer lost) */
+
+	if (mdev->sync_conf.on_no_data == OND_SUSPEND_IO &&
+	    (ns.role == R_PRIMARY && ns.disk < D_UP_TO_DATE && ns.pdsk < D_UP_TO_DATE) &&
+	    !(os.role == R_PRIMARY && os.disk < D_UP_TO_DATE && os.pdsk < D_UP_TO_DATE))
+		ns.susp_nod = 1; /* Suspend IO while no data available (no accessible data available) */
+
+	if (ns.aftr_isp || ns.peer_isp || ns.user_isp) {
+		if (ns.conn == C_SYNC_SOURCE)
+			ns.conn = C_PAUSED_SYNC_S;
+		if (ns.conn == C_SYNC_TARGET)
+			ns.conn = C_PAUSED_SYNC_T;
+	} else {
+		if (ns.conn == C_PAUSED_SYNC_S)
+			ns.conn = C_SYNC_SOURCE;
+		if (ns.conn == C_PAUSED_SYNC_T)
+			ns.conn = C_SYNC_TARGET;
+	}
+
+	return ns;
+}
+
+/* helper for __drbd_set_state */
+static void set_ov_position(struct drbd_conf *mdev, enum drbd_conns cs)
+{
+	if (mdev->agreed_pro_version < 90)
+		mdev->ov_start_sector = 0;
+	mdev->rs_total = drbd_bm_bits(mdev);
+	mdev->ov_position = 0;
+	if (cs == C_VERIFY_T) {
+		/* starting online verify from an arbitrary position
+		 * does not fit well into the existing protocol.
+		 * on C_VERIFY_T, we initialize ov_left and friends
+		 * implicitly in receive_DataRequest once the
+		 * first P_OV_REQUEST is received */
+		mdev->ov_start_sector = ~(sector_t)0;
+	} else {
+		unsigned long bit = BM_SECT_TO_BIT(mdev->ov_start_sector);
+		if (bit >= mdev->rs_total) {
+			mdev->ov_start_sector =
+				BM_BIT_TO_SECT(mdev->rs_total - 1);
+			mdev->rs_total = 1;
+		} else
+			mdev->rs_total -= bit;
+		mdev->ov_position = mdev->ov_start_sector;
+	}
+	mdev->ov_left = mdev->rs_total;
+}
+
+static void drbd_resume_al(struct drbd_conf *mdev)
+{
+	if (test_and_clear_bit(AL_SUSPENDED, &mdev->flags))
+		dev_info(DEV, "Resumed AL updates\n");
+}
+
+/**
+ * __drbd_set_state() - Set a new DRBD state
+ * @mdev:	DRBD device.
+ * @ns:		new state.
+ * @flags:	Flags
+ * @done:	Optional completion, that will get completed after the after_state_ch() finished
+ *
+ * Caller needs to hold req_lock, and global_state_lock. Do not call directly.
+ */
+enum drbd_state_rv
+__drbd_set_state(struct drbd_conf *mdev, union drbd_state ns,
+	         enum chg_state_flags flags, struct completion *done)
+{
+	union drbd_state os;
+	enum drbd_state_rv rv = SS_SUCCESS;
+	const char *warn_sync_abort = NULL;
+	struct after_state_chg_work *ascw;
+
+	os = mdev->state;
+
+	ns = sanitize_state(mdev, os, ns, &warn_sync_abort);
+
+	if (ns.i == os.i)
+		return SS_NOTHING_TO_DO;
+
+	if (!(flags & CS_HARD)) {
+		/*  pre-state-change checks ; only look at ns  */
+		/* See drbd_state_sw_errors in drbd_strings.c */
+
+		rv = is_valid_state(mdev, ns);
+		if (rv < SS_SUCCESS) {
+			/* If the old state was illegal as well, then let
+			   this happen...*/
+
+			if (is_valid_state(mdev, os) == rv)
+				rv = is_valid_state_transition(mdev, ns, os);
+		} else
+			rv = is_valid_state_transition(mdev, ns, os);
+	}
+
+	if (rv < SS_SUCCESS) {
+		if (flags & CS_VERBOSE)
+			print_st_err(mdev, os, ns, rv);
+		return rv;
+	}
+
+	if (warn_sync_abort)
+		dev_warn(DEV, "%s aborted.\n", warn_sync_abort);
+
+	{
+	char *pbp, pb[300];
+	pbp = pb;
+	*pbp = 0;
+	if (ns.role != os.role)
+		pbp += sprintf(pbp, "role( %s -> %s ) ",
+			       drbd_role_str(os.role),
+			       drbd_role_str(ns.role));
+	if (ns.peer != os.peer)
+		pbp += sprintf(pbp, "peer( %s -> %s ) ",
+			       drbd_role_str(os.peer),
+			       drbd_role_str(ns.peer));
+	if (ns.conn != os.conn)
+		pbp += sprintf(pbp, "conn( %s -> %s ) ",
+			       drbd_conn_str(os.conn),
+			       drbd_conn_str(ns.conn));
+	if (ns.disk != os.disk)
+		pbp += sprintf(pbp, "disk( %s -> %s ) ",
+			       drbd_disk_str(os.disk),
+			       drbd_disk_str(ns.disk));
+	if (ns.pdsk != os.pdsk)
+		pbp += sprintf(pbp, "pdsk( %s -> %s ) ",
+			       drbd_disk_str(os.pdsk),
+			       drbd_disk_str(ns.pdsk));
+	if (is_susp(ns) != is_susp(os))
+		pbp += sprintf(pbp, "susp( %d -> %d ) ",
+			       is_susp(os),
+			       is_susp(ns));
+	if (ns.aftr_isp != os.aftr_isp)
+		pbp += sprintf(pbp, "aftr_isp( %d -> %d ) ",
+			       os.aftr_isp,
+			       ns.aftr_isp);
+	if (ns.peer_isp != os.peer_isp)
+		pbp += sprintf(pbp, "peer_isp( %d -> %d ) ",
+			       os.peer_isp,
+			       ns.peer_isp);
+	if (ns.user_isp != os.user_isp)
+		pbp += sprintf(pbp, "user_isp( %d -> %d ) ",
+			       os.user_isp,
+			       ns.user_isp);
+	dev_info(DEV, "%s\n", pb);
+	}
+
+	/* solve the race between becoming unconfigured,
+	 * worker doing the cleanup, and
+	 * admin reconfiguring us:
+	 * on (re)configure, first set CONFIG_PENDING,
+	 * then wait for a potentially exiting worker,
+	 * start the worker, and schedule one no_op.
+	 * then proceed with configuration.
+	 */
+	if (ns.disk == D_DISKLESS &&
+	    ns.conn == C_STANDALONE &&
+	    ns.role == R_SECONDARY &&
+	    !test_and_set_bit(CONFIG_PENDING, &mdev->flags))
+		set_bit(DEVICE_DYING, &mdev->flags);
+
+	/* if we are going -> D_FAILED or D_DISKLESS, grab one extra reference
+	 * on the ldev here, to be sure the transition -> D_DISKLESS resp.
+	 * drbd_ldev_destroy() won't happen before our corresponding
+	 * after_state_ch works run, where we put_ldev again. */
+	if ((os.disk != D_FAILED && ns.disk == D_FAILED) ||
+	    (os.disk != D_DISKLESS && ns.disk == D_DISKLESS))
+		atomic_inc(&mdev->local_cnt);
+
+	mdev->state = ns;
+
+	if (os.disk == D_ATTACHING && ns.disk >= D_NEGOTIATING)
+		drbd_print_uuids(mdev, "attached to UUIDs");
+
+	wake_up(&mdev->misc_wait);
+	wake_up(&mdev->state_wait);
+
+	/* aborted verify run. log the last position */
+	if ((os.conn == C_VERIFY_S || os.conn == C_VERIFY_T) &&
+	    ns.conn < C_CONNECTED) {
+		mdev->ov_start_sector =
+			BM_BIT_TO_SECT(drbd_bm_bits(mdev) - mdev->ov_left);
+		dev_info(DEV, "Online Verify reached sector %llu\n",
+			(unsigned long long)mdev->ov_start_sector);
+	}
+
+	if ((os.conn == C_PAUSED_SYNC_T || os.conn == C_PAUSED_SYNC_S) &&
+	    (ns.conn == C_SYNC_TARGET  || ns.conn == C_SYNC_SOURCE)) {
+		dev_info(DEV, "Syncer continues.\n");
+		mdev->rs_paused += (long)jiffies
+				  -(long)mdev->rs_mark_time[mdev->rs_last_mark];
+		if (ns.conn == C_SYNC_TARGET)
+			mod_timer(&mdev->resync_timer, jiffies);
+	}
+
+	if ((os.conn == C_SYNC_TARGET  || os.conn == C_SYNC_SOURCE) &&
+	    (ns.conn == C_PAUSED_SYNC_T || ns.conn == C_PAUSED_SYNC_S)) {
+		dev_info(DEV, "Resync suspended\n");
+		mdev->rs_mark_time[mdev->rs_last_mark] = jiffies;
+	}
+
+	if (os.conn == C_CONNECTED &&
+	    (ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T)) {
+		unsigned long now = jiffies;
+		int i;
+
+		set_ov_position(mdev, ns.conn);
+		mdev->rs_start = now;
+		mdev->rs_last_events = 0;
+		mdev->rs_last_sect_ev = 0;
+		mdev->ov_last_oos_size = 0;
+		mdev->ov_last_oos_start = 0;
+
+		for (i = 0; i < DRBD_SYNC_MARKS; i++) {
+			mdev->rs_mark_left[i] = mdev->ov_left;
+			mdev->rs_mark_time[i] = now;
+		}
+
+		drbd_rs_controller_reset(mdev);
+
+		if (ns.conn == C_VERIFY_S) {
+			dev_info(DEV, "Starting Online Verify from sector %llu\n",
+					(unsigned long long)mdev->ov_position);
+			mod_timer(&mdev->resync_timer, jiffies);
+		}
+	}
+
+	if (get_ldev(mdev)) {
+		u32 mdf = mdev->ldev->md.flags & ~(MDF_CONSISTENT|MDF_PRIMARY_IND|
+						 MDF_CONNECTED_IND|MDF_WAS_UP_TO_DATE|
+						 MDF_PEER_OUT_DATED|MDF_CRASHED_PRIMARY);
+
+		if (test_bit(CRASHED_PRIMARY, &mdev->flags))
+			mdf |= MDF_CRASHED_PRIMARY;
+		if (mdev->state.role == R_PRIMARY ||
+		    (mdev->state.pdsk < D_INCONSISTENT && mdev->state.peer == R_PRIMARY))
+			mdf |= MDF_PRIMARY_IND;
+		if (mdev->state.conn > C_WF_REPORT_PARAMS)
+			mdf |= MDF_CONNECTED_IND;
+		if (mdev->state.disk > D_INCONSISTENT)
+			mdf |= MDF_CONSISTENT;
+		if (mdev->state.disk > D_OUTDATED)
+			mdf |= MDF_WAS_UP_TO_DATE;
+		if (mdev->state.pdsk <= D_OUTDATED && mdev->state.pdsk >= D_INCONSISTENT)
+			mdf |= MDF_PEER_OUT_DATED;
+		if (mdf != mdev->ldev->md.flags) {
+			mdev->ldev->md.flags = mdf;
+			drbd_md_mark_dirty(mdev);
+		}
+		if (os.disk < D_CONSISTENT && ns.disk >= D_CONSISTENT)
+			drbd_set_ed_uuid(mdev, mdev->ldev->md.uuid[UI_CURRENT]);
+		put_ldev(mdev);
+	}
+
+	/* Peer was forced D_UP_TO_DATE & R_PRIMARY, consider to resync */
+	if (os.disk == D_INCONSISTENT && os.pdsk == D_INCONSISTENT &&
+	    os.peer == R_SECONDARY && ns.peer == R_PRIMARY)
+		set_bit(CONSIDER_RESYNC, &mdev->flags);
+
+	/* Receiver should clean up itself */
+	if (os.conn != C_DISCONNECTING && ns.conn == C_DISCONNECTING)
+		drbd_thread_stop_nowait(&mdev->receiver);
+
+	/* Now the receiver finished cleaning up itself, it should die */
+	if (os.conn != C_STANDALONE && ns.conn == C_STANDALONE)
+		drbd_thread_stop_nowait(&mdev->receiver);
+
+	/* Upon network failure, we need to restart the receiver. */
+	if (os.conn > C_TEAR_DOWN &&
+	    ns.conn <= C_TEAR_DOWN && ns.conn >= C_TIMEOUT)
+		drbd_thread_restart_nowait(&mdev->receiver);
+
+	/* Resume AL writing if we get a connection */
+	if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED)
+		drbd_resume_al(mdev);
+
+	ascw = kmalloc(sizeof(*ascw), GFP_ATOMIC);
+	if (ascw) {
+		ascw->os = os;
+		ascw->ns = ns;
+		ascw->flags = flags;
+		ascw->w.cb = w_after_state_ch;
+		ascw->done = done;
+		drbd_queue_work(&mdev->data.work, &ascw->w);
+	} else {
+		dev_warn(DEV, "Could not kmalloc an ascw\n");
+	}
+
+	return rv;
+}
+
+static int w_after_state_ch(struct drbd_conf *mdev, struct drbd_work *w, int unused)
+{
+	struct after_state_chg_work *ascw =
+		container_of(w, struct after_state_chg_work, w);
+	after_state_ch(mdev, ascw->os, ascw->ns, ascw->flags);
+	if (ascw->flags & CS_WAIT_COMPLETE) {
+		D_ASSERT(ascw->done != NULL);
+		complete(ascw->done);
+	}
+	kfree(ascw);
+
+	return 1;
+}
+
+static void abw_start_sync(struct drbd_conf *mdev, int rv)
+{
+	if (rv) {
+		dev_err(DEV, "Writing the bitmap failed not starting resync.\n");
+		_drbd_request_state(mdev, NS(conn, C_CONNECTED), CS_VERBOSE);
+		return;
+	}
+
+	switch (mdev->state.conn) {
+	case C_STARTING_SYNC_T:
+		_drbd_request_state(mdev, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE);
+		break;
+	case C_STARTING_SYNC_S:
+		drbd_start_resync(mdev, C_SYNC_SOURCE);
+		break;
+	}
+}
+
+int drbd_bitmap_io_from_worker(struct drbd_conf *mdev,
+		int (*io_fn)(struct drbd_conf *),
+		char *why, enum bm_flag flags)
+{
+	int rv;
+
+	D_ASSERT(current == mdev->worker.task);
+
+	/* open coded non-blocking drbd_suspend_io(mdev); */
+	set_bit(SUSPEND_IO, &mdev->flags);
+
+	drbd_bm_lock(mdev, why, flags);
+	rv = io_fn(mdev);
+	drbd_bm_unlock(mdev);
+
+	drbd_resume_io(mdev);
+
+	return rv;
+}
+
+/**
+ * after_state_ch() - Perform after state change actions that may sleep
+ * @mdev:	DRBD device.
+ * @os:		old state.
+ * @ns:		new state.
+ * @flags:	Flags
+ */
+static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
+			   union drbd_state ns, enum chg_state_flags flags)
+{
+	enum drbd_fencing_p fp;
+	enum drbd_req_event what = nothing;
+	union drbd_state nsm = (union drbd_state){ .i = -1 };
+
+	if (os.conn != C_CONNECTED && ns.conn == C_CONNECTED) {
+		clear_bit(CRASHED_PRIMARY, &mdev->flags);
+		if (mdev->p_uuid)
+			mdev->p_uuid[UI_FLAGS] &= ~((u64)2);
+	}
+
+	fp = FP_DONT_CARE;
+	if (get_ldev(mdev)) {
+		fp = mdev->ldev->dc.fencing;
+		put_ldev(mdev);
+	}
+
+	/* Inform userspace about the change... */
+	drbd_bcast_state(mdev, ns);
+
+	if (!(os.role == R_PRIMARY && os.disk < D_UP_TO_DATE && os.pdsk < D_UP_TO_DATE) &&
+	    (ns.role == R_PRIMARY && ns.disk < D_UP_TO_DATE && ns.pdsk < D_UP_TO_DATE))
+		drbd_khelper(mdev, "pri-on-incon-degr");
+
+	/* Here we have the actions that are performed after a
+	   state change. This function might sleep */
+
+	nsm.i = -1;
+	if (ns.susp_nod) {
+		if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED)
+			what = resend;
+
+		if (os.disk == D_ATTACHING && ns.disk > D_ATTACHING)
+			what = restart_frozen_disk_io;
+
+		if (what != nothing)
+			nsm.susp_nod = 0;
+	}
+
+	if (ns.susp_fen) {
+		/* case1: The outdate peer handler is successful: */
+		if (os.pdsk > D_OUTDATED  && ns.pdsk <= D_OUTDATED) {
+			tl_clear(mdev);
+			if (test_bit(NEW_CUR_UUID, &mdev->flags)) {
+				drbd_uuid_new_current(mdev);
+				clear_bit(NEW_CUR_UUID, &mdev->flags);
+			}
+			spin_lock_irq(&mdev->req_lock);
+			_drbd_set_state(_NS(mdev, susp_fen, 0), CS_VERBOSE, NULL);
+			spin_unlock_irq(&mdev->req_lock);
+		}
+		/* case2: The connection was established again: */
+		if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED) {
+			clear_bit(NEW_CUR_UUID, &mdev->flags);
+			what = resend;
+			nsm.susp_fen = 0;
+		}
+	}
+
+	if (what != nothing) {
+		spin_lock_irq(&mdev->req_lock);
+		_tl_restart(mdev, what);
+		nsm.i &= mdev->state.i;
+		_drbd_set_state(mdev, nsm, CS_VERBOSE, NULL);
+		spin_unlock_irq(&mdev->req_lock);
+	}
+
+	/* Became sync source.  With protocol >= 96, we still need to send out
+	 * the sync uuid now. Need to do that before any drbd_send_state, or
+	 * the other side may go "paused sync" before receiving the sync uuids,
+	 * which is unexpected. */
+	if ((os.conn != C_SYNC_SOURCE && os.conn != C_PAUSED_SYNC_S) &&
+	    (ns.conn == C_SYNC_SOURCE || ns.conn == C_PAUSED_SYNC_S) &&
+	    mdev->agreed_pro_version >= 96 && get_ldev(mdev)) {
+		drbd_gen_and_send_sync_uuid(mdev);
+		put_ldev(mdev);
+	}
+
+	/* Do not change the order of the if above and the two below... */
+	if (os.pdsk == D_DISKLESS && ns.pdsk > D_DISKLESS) {      /* attach on the peer */
+		drbd_send_uuids(mdev);
+		drbd_send_state(mdev);
+	}
+	/* No point in queuing send_bitmap if we don't have a connection
+	 * anymore, so check also the _current_ state, not only the new state
+	 * at the time this work was queued. */
+	if (os.conn != C_WF_BITMAP_S && ns.conn == C_WF_BITMAP_S &&
+	    mdev->state.conn == C_WF_BITMAP_S)
+		drbd_queue_bitmap_io(mdev, &drbd_send_bitmap, NULL,
+				"send_bitmap (WFBitMapS)",
+				BM_LOCKED_TEST_ALLOWED);
+
+	/* Lost contact to peer's copy of the data */
+	if ((os.pdsk >= D_INCONSISTENT &&
+	     os.pdsk != D_UNKNOWN &&
+	     os.pdsk != D_OUTDATED)
+	&&  (ns.pdsk < D_INCONSISTENT ||
+	     ns.pdsk == D_UNKNOWN ||
+	     ns.pdsk == D_OUTDATED)) {
+		if (get_ldev(mdev)) {
+			if ((ns.role == R_PRIMARY || ns.peer == R_PRIMARY) &&
+			    mdev->ldev->md.uuid[UI_BITMAP] == 0 && ns.disk >= D_UP_TO_DATE) {
+				if (is_susp(mdev->state)) {
+					set_bit(NEW_CUR_UUID, &mdev->flags);
+				} else {
+					drbd_uuid_new_current(mdev);
+					drbd_send_uuids(mdev);
+				}
+			}
+			put_ldev(mdev);
+		}
+	}
+
+	if (ns.pdsk < D_INCONSISTENT && get_ldev(mdev)) {
+		if (ns.peer == R_PRIMARY && mdev->ldev->md.uuid[UI_BITMAP] == 0) {
+			drbd_uuid_new_current(mdev);
+			drbd_send_uuids(mdev);
+		}
+
+		/* D_DISKLESS Peer becomes secondary */
+		if (os.peer == R_PRIMARY && ns.peer == R_SECONDARY)
+			/* We may still be Primary ourselves.
+			 * No harm done if the bitmap still changes,
+			 * redirtied pages will follow later. */
+			drbd_bitmap_io_from_worker(mdev, &drbd_bm_write,
+				"demote diskless peer", BM_LOCKED_SET_ALLOWED);
+		put_ldev(mdev);
+	}
+
+	/* Write out all changed bits on demote.
+	 * Though, no need to da that just yet
+	 * if there is a resync going on still */
+	if (os.role == R_PRIMARY && ns.role == R_SECONDARY &&
+		mdev->state.conn <= C_CONNECTED && get_ldev(mdev)) {
+		/* No changes to the bitmap expected this time, so assert that,
+		 * even though no harm was done if it did change. */
+		drbd_bitmap_io_from_worker(mdev, &drbd_bm_write,
+				"demote", BM_LOCKED_TEST_ALLOWED);
+		put_ldev(mdev);
+	}
+
+	/* Last part of the attaching process ... */
+	if (ns.conn >= C_CONNECTED &&
+	    os.disk == D_ATTACHING && ns.disk == D_NEGOTIATING) {
+		drbd_send_sizes(mdev, 0, 0);  /* to start sync... */
+		drbd_send_uuids(mdev);
+		drbd_send_state(mdev);
+	}
+
+	/* We want to pause/continue resync, tell peer. */
+	if (ns.conn >= C_CONNECTED &&
+	     ((os.aftr_isp != ns.aftr_isp) ||
+	      (os.user_isp != ns.user_isp)))
+		drbd_send_state(mdev);
+
+	/* In case one of the isp bits got set, suspend other devices. */
+	if ((!os.aftr_isp && !os.peer_isp && !os.user_isp) &&
+	    (ns.aftr_isp || ns.peer_isp || ns.user_isp))
+		suspend_other_sg(mdev);
+
+	/* Make sure the peer gets informed about eventual state
+	   changes (ISP bits) while we were in WFReportParams. */
+	if (os.conn == C_WF_REPORT_PARAMS && ns.conn >= C_CONNECTED)
+		drbd_send_state(mdev);
+
+	if (os.conn != C_AHEAD && ns.conn == C_AHEAD)
+		drbd_send_state(mdev);
+
+	/* We are in the progress to start a full sync... */
+	if ((os.conn != C_STARTING_SYNC_T && ns.conn == C_STARTING_SYNC_T) ||
+	    (os.conn != C_STARTING_SYNC_S && ns.conn == C_STARTING_SYNC_S))
+		/* no other bitmap changes expected during this phase */
+		drbd_queue_bitmap_io(mdev,
+			&drbd_bmio_set_n_write, &abw_start_sync,
+			"set_n_write from StartingSync", BM_LOCKED_TEST_ALLOWED);
+
+	/* We are invalidating our self... */
+	if (os.conn < C_CONNECTED && ns.conn < C_CONNECTED &&
+	    os.disk > D_INCONSISTENT && ns.disk == D_INCONSISTENT)
+		/* other bitmap operation expected during this phase */
+		drbd_queue_bitmap_io(mdev, &drbd_bmio_set_n_write, NULL,
+			"set_n_write from invalidate", BM_LOCKED_MASK);
+
+	/* first half of local IO error, failure to attach,
+	 * or administrative detach */
+	if (os.disk != D_FAILED && ns.disk == D_FAILED) {
+		enum drbd_io_error_p eh;
+		int was_io_error;
+		/* corresponding get_ldev was in __drbd_set_state, to serialize
+		 * our cleanup here with the transition to D_DISKLESS,
+		 * so it is safe to dreference ldev here. */
+		eh = mdev->ldev->dc.on_io_error;
+		was_io_error = test_and_clear_bit(WAS_IO_ERROR, &mdev->flags);
+
+		/* current state still has to be D_FAILED,
+		 * there is only one way out: to D_DISKLESS,
+		 * and that may only happen after our put_ldev below. */
+		if (mdev->state.disk != D_FAILED)
+			dev_err(DEV,
+				"ASSERT FAILED: disk is %s during detach\n",
+				drbd_disk_str(mdev->state.disk));
+
+		if (drbd_send_state(mdev))
+			dev_warn(DEV, "Notified peer that I am detaching my disk\n");
+		else
+			dev_err(DEV, "Sending state for detaching disk failed\n");
+
+		drbd_rs_cancel_all(mdev);
+
+		/* In case we want to get something to stable storage still,
+		 * this may be the last chance.
+		 * Following put_ldev may transition to D_DISKLESS. */
+		drbd_md_sync(mdev);
+		put_ldev(mdev);
+
+		if (was_io_error && eh == EP_CALL_HELPER)
+			drbd_khelper(mdev, "local-io-error");
+	}
+
+        /* second half of local IO error, failure to attach,
+         * or administrative detach,
+         * after local_cnt references have reached zero again */
+        if (os.disk != D_DISKLESS && ns.disk == D_DISKLESS) {
+                /* We must still be diskless,
+                 * re-attach has to be serialized with this! */
+                if (mdev->state.disk != D_DISKLESS)
+                        dev_err(DEV,
+                                "ASSERT FAILED: disk is %s while going diskless\n",
+                                drbd_disk_str(mdev->state.disk));
+
+                mdev->rs_total = 0;
+                mdev->rs_failed = 0;
+                atomic_set(&mdev->rs_pending_cnt, 0);
+
+		if (drbd_send_state(mdev))
+			dev_warn(DEV, "Notified peer that I'm now diskless.\n");
+		/* corresponding get_ldev in __drbd_set_state
+		 * this may finally trigger drbd_ldev_destroy. */
+		put_ldev(mdev);
+	}
+
+	/* Notify peer that I had a local IO error, and did not detached.. */
+	if (os.disk == D_UP_TO_DATE && ns.disk == D_INCONSISTENT)
+		drbd_send_state(mdev);
+
+	/* Disks got bigger while they were detached */
+	if (ns.disk > D_NEGOTIATING && ns.pdsk > D_NEGOTIATING &&
+	    test_and_clear_bit(RESYNC_AFTER_NEG, &mdev->flags)) {
+		if (ns.conn == C_CONNECTED)
+			resync_after_online_grow(mdev);
+	}
+
+	/* A resync finished or aborted, wake paused devices... */
+	if ((os.conn > C_CONNECTED && ns.conn <= C_CONNECTED) ||
+	    (os.peer_isp && !ns.peer_isp) ||
+	    (os.user_isp && !ns.user_isp))
+		resume_next_sg(mdev);
+
+	/* sync target done with resync.  Explicitly notify peer, even though
+	 * it should (at least for non-empty resyncs) already know itself. */
+	if (os.disk < D_UP_TO_DATE && os.conn >= C_SYNC_SOURCE && ns.conn == C_CONNECTED)
+		drbd_send_state(mdev);
+
+	/* This triggers bitmap writeout of potentially still unwritten pages
+	 * if the resync finished cleanly, or aborted because of peer disk
+	 * failure, or because of connection loss.
+	 * For resync aborted because of local disk failure, we cannot do
+	 * any bitmap writeout anymore.
+	 * No harm done if some bits change during this phase.
+	 */
+	if (os.conn > C_CONNECTED && ns.conn <= C_CONNECTED && get_ldev(mdev)) {
+		drbd_queue_bitmap_io(mdev, &drbd_bm_write, NULL,
+			"write from resync_finished", BM_LOCKED_SET_ALLOWED);
+		put_ldev(mdev);
+	}
+
+	/* free tl_hash if we Got thawed and are C_STANDALONE */
+	if (ns.conn == C_STANDALONE && !is_susp(ns) && mdev->tl_hash)
+		drbd_free_tl_hash(mdev);
+
+	/* Upon network connection, we need to start the receiver */
+	if (os.conn == C_STANDALONE && ns.conn == C_UNCONNECTED)
+		drbd_thread_start(&mdev->receiver);
+
+	/* Terminate worker thread if we are unconfigured - it will be
+	   restarted as needed... */
+	if (ns.disk == D_DISKLESS &&
+	    ns.conn == C_STANDALONE &&
+	    ns.role == R_SECONDARY) {
+		if (os.aftr_isp != ns.aftr_isp)
+			resume_next_sg(mdev);
+		/* set in __drbd_set_state, unless CONFIG_PENDING was set */
+		if (test_bit(DEVICE_DYING, &mdev->flags))
+			drbd_thread_stop_nowait(&mdev->worker);
+	}
+
+	drbd_md_sync(mdev);
+}
+
+
+static int drbd_thread_setup(void *arg)
+{
+	struct drbd_thread *thi = (struct drbd_thread *) arg;
+	struct drbd_conf *mdev = thi->mdev;
+	unsigned long flags;
+	int retval;
+
+restart:
+	retval = thi->function(thi);
+
+	spin_lock_irqsave(&thi->t_lock, flags);
+
+	/* if the receiver has been "Exiting", the last thing it did
+	 * was set the conn state to "StandAlone",
+	 * if now a re-connect request comes in, conn state goes C_UNCONNECTED,
+	 * and receiver thread will be "started".
+	 * drbd_thread_start needs to set "Restarting" in that case.
+	 * t_state check and assignment needs to be within the same spinlock,
+	 * so either thread_start sees Exiting, and can remap to Restarting,
+	 * or thread_start see None, and can proceed as normal.
+	 */
+
+	if (thi->t_state == Restarting) {
+		dev_info(DEV, "Restarting %s\n", current->comm);
+		thi->t_state = Running;
+		spin_unlock_irqrestore(&thi->t_lock, flags);
+		goto restart;
+	}
+
+	thi->task = NULL;
+	thi->t_state = None;
+	smp_mb();
+	complete(&thi->stop);
+	spin_unlock_irqrestore(&thi->t_lock, flags);
+
+	dev_info(DEV, "Terminating %s\n", current->comm);
+
+	/* Release mod reference taken when thread was started */
+	module_put(THIS_MODULE);
+	return retval;
+}
+
+static void drbd_thread_init(struct drbd_conf *mdev, struct drbd_thread *thi,
+		      int (*func) (struct drbd_thread *))
+{
+	spin_lock_init(&thi->t_lock);
+	thi->task    = NULL;
+	thi->t_state = None;
+	thi->function = func;
+	thi->mdev = mdev;
+}
+
+int drbd_thread_start(struct drbd_thread *thi)
+{
+	struct drbd_conf *mdev = thi->mdev;
+	struct task_struct *nt;
+	unsigned long flags;
+
+	const char *me =
+		thi == &mdev->receiver ? "receiver" :
+		thi == &mdev->asender  ? "asender"  :
+		thi == &mdev->worker   ? "worker"   : "NONSENSE";
+
+	/* is used from state engine doing drbd_thread_stop_nowait,
+	 * while holding the req lock irqsave */
+	spin_lock_irqsave(&thi->t_lock, flags);
+
+	switch (thi->t_state) {
+	case None:
+		dev_info(DEV, "Starting %s thread (from %s [%d])\n",
+				me, current->comm, current->pid);
+
+		/* Get ref on module for thread - this is released when thread exits */
+		if (!try_module_get(THIS_MODULE)) {
+			dev_err(DEV, "Failed to get module reference in drbd_thread_start\n");
+			spin_unlock_irqrestore(&thi->t_lock, flags);
+			return false;
+		}
+
+		init_completion(&thi->stop);
+		D_ASSERT(thi->task == NULL);
+		thi->reset_cpu_mask = 1;
+		thi->t_state = Running;
+		spin_unlock_irqrestore(&thi->t_lock, flags);
+		flush_signals(current); /* otherw. may get -ERESTARTNOINTR */
+
+		nt = kthread_create(drbd_thread_setup, (void *) thi,
+				    "drbd%d_%s", mdev_to_minor(mdev), me);
+
+		if (IS_ERR(nt)) {
+			dev_err(DEV, "Couldn't start thread\n");
+
+			module_put(THIS_MODULE);
+			return false;
+		}
+		spin_lock_irqsave(&thi->t_lock, flags);
+		thi->task = nt;
+		thi->t_state = Running;
+		spin_unlock_irqrestore(&thi->t_lock, flags);
+		wake_up_process(nt);
+		break;
+	case Exiting:
+		thi->t_state = Restarting;
+		dev_info(DEV, "Restarting %s thread (from %s [%d])\n",
+				me, current->comm, current->pid);
+		/* fall through */
+	case Running:
+	case Restarting:
+	default:
+		spin_unlock_irqrestore(&thi->t_lock, flags);
+		break;
+	}
+
+	return true;
+}
+
+
+void _drbd_thread_stop(struct drbd_thread *thi, int restart, int wait)
+{
+	unsigned long flags;
+
+	enum drbd_thread_state ns = restart ? Restarting : Exiting;
+
+	/* may be called from state engine, holding the req lock irqsave */
+	spin_lock_irqsave(&thi->t_lock, flags);
+
+	if (thi->t_state == None) {
+		spin_unlock_irqrestore(&thi->t_lock, flags);
+		if (restart)
+			drbd_thread_start(thi);
+		return;
+	}
+
+	if (thi->t_state != ns) {
+		if (thi->task == NULL) {
+			spin_unlock_irqrestore(&thi->t_lock, flags);
+			return;
+		}
+
+		thi->t_state = ns;
+		smp_mb();
+		init_completion(&thi->stop);
+		if (thi->task != current)
+			force_sig(DRBD_SIGKILL, thi->task);
+
+	}
+
+	spin_unlock_irqrestore(&thi->t_lock, flags);
+
+	if (wait)
+		wait_for_completion(&thi->stop);
+}
+
+#ifdef CONFIG_SMP
+/**
+ * drbd_calc_cpu_mask() - Generate CPU masks, spread over all CPUs
+ * @mdev:	DRBD device.
+ *
+ * Forces all threads of a device onto the same CPU. This is beneficial for
+ * DRBD's performance. May be overwritten by user's configuration.
+ */
+void drbd_calc_cpu_mask(struct drbd_conf *mdev)
+{
+	int ord, cpu;
+
+	/* user override. */
+	if (cpumask_weight(mdev->cpu_mask))
+		return;
+
+	ord = mdev_to_minor(mdev) % cpumask_weight(cpu_online_mask);
+	for_each_online_cpu(cpu) {
+		if (ord-- == 0) {
+			cpumask_set_cpu(cpu, mdev->cpu_mask);
+			return;
+		}
+	}
+	/* should not be reached */
+	cpumask_setall(mdev->cpu_mask);
+}
+
+/**
+ * drbd_thread_current_set_cpu() - modifies the cpu mask of the _current_ thread
+ * @mdev:	DRBD device.
+ *
+ * call in the "main loop" of _all_ threads, no need for any mutex, current won't die
+ * prematurely.
+ */
+void drbd_thread_current_set_cpu(struct drbd_conf *mdev)
+{
+	struct task_struct *p = current;
+	struct drbd_thread *thi =
+		p == mdev->asender.task  ? &mdev->asender  :
+		p == mdev->receiver.task ? &mdev->receiver :
+		p == mdev->worker.task   ? &mdev->worker   :
+		NULL;
+	ERR_IF(thi == NULL)
+		return;
+	if (!thi->reset_cpu_mask)
+		return;
+	thi->reset_cpu_mask = 0;
+	set_cpus_allowed_ptr(p, mdev->cpu_mask);
+}
+#endif
+
+/* the appropriate socket mutex must be held already */
+int _drbd_send_cmd(struct drbd_conf *mdev, struct socket *sock,
+			  enum drbd_packets cmd, struct p_header80 *h,
+			  size_t size, unsigned msg_flags)
+{
+	int sent, ok;
+
+	ERR_IF(!h) return false;
+	ERR_IF(!size) return false;
+
+	h->magic   = BE_DRBD_MAGIC;
+	h->command = cpu_to_be16(cmd);
+	h->length  = cpu_to_be16(size-sizeof(struct p_header80));
+
+	sent = drbd_send(mdev, sock, h, size, msg_flags);
+
+	ok = (sent == size);
+	if (!ok && !signal_pending(current))
+		dev_warn(DEV, "short sent %s size=%d sent=%d\n",
+		    cmdname(cmd), (int)size, sent);
+	return ok;
+}
+
+/* don't pass the socket. we may only look at it
+ * when we hold the appropriate socket mutex.
+ */
+int drbd_send_cmd(struct drbd_conf *mdev, int use_data_socket,
+		  enum drbd_packets cmd, struct p_header80 *h, size_t size)
+{
+	int ok = 0;
+	struct socket *sock;
+
+	if (use_data_socket) {
+		mutex_lock(&mdev->data.mutex);
+		sock = mdev->data.socket;
+	} else {
+		mutex_lock(&mdev->meta.mutex);
+		sock = mdev->meta.socket;
+	}
+
+	/* drbd_disconnect() could have called drbd_free_sock()
+	 * while we were waiting in down()... */
+	if (likely(sock != NULL))
+		ok = _drbd_send_cmd(mdev, sock, cmd, h, size, 0);
+
+	if (use_data_socket)
+		mutex_unlock(&mdev->data.mutex);
+	else
+		mutex_unlock(&mdev->meta.mutex);
+	return ok;
+}
+
+int drbd_send_cmd2(struct drbd_conf *mdev, enum drbd_packets cmd, char *data,
+		   size_t size)
+{
+	struct p_header80 h;
+	int ok;
+
+	h.magic   = BE_DRBD_MAGIC;
+	h.command = cpu_to_be16(cmd);
+	h.length  = cpu_to_be16(size);
+
+	if (!drbd_get_data_sock(mdev))
+		return 0;
+
+	ok = (sizeof(h) ==
+		drbd_send(mdev, mdev->data.socket, &h, sizeof(h), 0));
+	ok = ok && (size ==
+		drbd_send(mdev, mdev->data.socket, data, size, 0));
+
+	drbd_put_data_sock(mdev);
+
+	return ok;
+}
+
+int drbd_send_sync_param(struct drbd_conf *mdev, struct syncer_conf *sc)
+{
+	struct p_rs_param_95 *p;
+	struct socket *sock;
+	int size, rv;
+	const int apv = mdev->agreed_pro_version;
+
+	size = apv <= 87 ? sizeof(struct p_rs_param)
+		: apv == 88 ? sizeof(struct p_rs_param)
+			+ strlen(mdev->sync_conf.verify_alg) + 1
+		: apv <= 94 ? sizeof(struct p_rs_param_89)
+		: /* apv >= 95 */ sizeof(struct p_rs_param_95);
+
+	/* used from admin command context and receiver/worker context.
+	 * to avoid kmalloc, grab the socket right here,
+	 * then use the pre-allocated sbuf there */
+	mutex_lock(&mdev->data.mutex);
+	sock = mdev->data.socket;
+
+	if (likely(sock != NULL)) {
+		enum drbd_packets cmd = apv >= 89 ? P_SYNC_PARAM89 : P_SYNC_PARAM;
+
+		p = &mdev->data.sbuf.rs_param_95;
+
+		/* initialize verify_alg and csums_alg */
+		memset(p->verify_alg, 0, 2 * SHARED_SECRET_MAX);
+
+		p->rate = cpu_to_be32(sc->rate);
+		p->c_plan_ahead = cpu_to_be32(sc->c_plan_ahead);
+		p->c_delay_target = cpu_to_be32(sc->c_delay_target);
+		p->c_fill_target = cpu_to_be32(sc->c_fill_target);
+		p->c_max_rate = cpu_to_be32(sc->c_max_rate);
+
+		if (apv >= 88)
+			strcpy(p->verify_alg, mdev->sync_conf.verify_alg);
+		if (apv >= 89)
+			strcpy(p->csums_alg, mdev->sync_conf.csums_alg);
+
+		rv = _drbd_send_cmd(mdev, sock, cmd, &p->head, size, 0);
+	} else
+		rv = 0; /* not ok */
+
+	mutex_unlock(&mdev->data.mutex);
+
+	return rv;
+}
+
+int drbd_send_protocol(struct drbd_conf *mdev)
+{
+	struct p_protocol *p;
+	int size, cf, rv;
+
+	size = sizeof(struct p_protocol);
+
+	if (mdev->agreed_pro_version >= 87)
+		size += strlen(mdev->net_conf->integrity_alg) + 1;
+
+	/* we must not recurse into our own queue,
+	 * as that is blocked during handshake */
+	p = kmalloc(size, GFP_NOIO);
+	if (p == NULL)
+		return 0;
+
+	p->protocol      = cpu_to_be32(mdev->net_conf->wire_protocol);
+	p->after_sb_0p   = cpu_to_be32(mdev->net_conf->after_sb_0p);
+	p->after_sb_1p   = cpu_to_be32(mdev->net_conf->after_sb_1p);
+	p->after_sb_2p   = cpu_to_be32(mdev->net_conf->after_sb_2p);
+	p->two_primaries = cpu_to_be32(mdev->net_conf->two_primaries);
+
+	cf = 0;
+	if (mdev->net_conf->want_lose)
+		cf |= CF_WANT_LOSE;
+	if (mdev->net_conf->dry_run) {
+		if (mdev->agreed_pro_version >= 92)
+			cf |= CF_DRY_RUN;
+		else {
+			dev_err(DEV, "--dry-run is not supported by peer");
+			kfree(p);
+			return -1;
+		}
+	}
+	p->conn_flags    = cpu_to_be32(cf);
+
+	if (mdev->agreed_pro_version >= 87)
+		strcpy(p->integrity_alg, mdev->net_conf->integrity_alg);
+
+	rv = drbd_send_cmd(mdev, USE_DATA_SOCKET, P_PROTOCOL,
+			   (struct p_header80 *)p, size);
+	kfree(p);
+	return rv;
+}
+
+int _drbd_send_uuids(struct drbd_conf *mdev, u64 uuid_flags)
+{
+	struct p_uuids p;
+	int i;
+
+	if (!get_ldev_if_state(mdev, D_NEGOTIATING))
+		return 1;
+
+	for (i = UI_CURRENT; i < UI_SIZE; i++)
+		p.uuid[i] = mdev->ldev ? cpu_to_be64(mdev->ldev->md.uuid[i]) : 0;
+
+	mdev->comm_bm_set = drbd_bm_total_weight(mdev);
+	p.uuid[UI_SIZE] = cpu_to_be64(mdev->comm_bm_set);
+	uuid_flags |= mdev->net_conf->want_lose ? 1 : 0;
+	uuid_flags |= test_bit(CRASHED_PRIMARY, &mdev->flags) ? 2 : 0;
+	uuid_flags |= mdev->new_state_tmp.disk == D_INCONSISTENT ? 4 : 0;
+	p.uuid[UI_FLAGS] = cpu_to_be64(uuid_flags);
+
+	put_ldev(mdev);
+
+	return drbd_send_cmd(mdev, USE_DATA_SOCKET, P_UUIDS,
+			     (struct p_header80 *)&p, sizeof(p));
+}
+
+int drbd_send_uuids(struct drbd_conf *mdev)
+{
+	return _drbd_send_uuids(mdev, 0);
+}
+
+int drbd_send_uuids_skip_initial_sync(struct drbd_conf *mdev)
+{
+	return _drbd_send_uuids(mdev, 8);
+}
+
+void drbd_print_uuids(struct drbd_conf *mdev, const char *text)
+{
+	if (get_ldev_if_state(mdev, D_NEGOTIATING)) {
+		u64 *uuid = mdev->ldev->md.uuid;
+		dev_info(DEV, "%s %016llX:%016llX:%016llX:%016llX\n",
+		     text,
+		     (unsigned long long)uuid[UI_CURRENT],
+		     (unsigned long long)uuid[UI_BITMAP],
+		     (unsigned long long)uuid[UI_HISTORY_START],
+		     (unsigned long long)uuid[UI_HISTORY_END]);
+		put_ldev(mdev);
+	} else {
+		dev_info(DEV, "%s effective data uuid: %016llX\n",
+				text,
+				(unsigned long long)mdev->ed_uuid);
+	}
+}
+
+int drbd_gen_and_send_sync_uuid(struct drbd_conf *mdev)
+{
+	struct p_rs_uuid p;
+	u64 uuid;
+
+	D_ASSERT(mdev->state.disk == D_UP_TO_DATE);
+
+	uuid = mdev->ldev->md.uuid[UI_BITMAP] + UUID_NEW_BM_OFFSET;
+	drbd_uuid_set(mdev, UI_BITMAP, uuid);
+	drbd_print_uuids(mdev, "updated sync UUID");
+	drbd_md_sync(mdev);
+	p.uuid = cpu_to_be64(uuid);
+
+	return drbd_send_cmd(mdev, USE_DATA_SOCKET, P_SYNC_UUID,
+			     (struct p_header80 *)&p, sizeof(p));
+}
+
+int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply, enum dds_flags flags)
+{
+	struct p_sizes p;
+	sector_t d_size, u_size;
+	int q_order_type, max_bio_size;
+	int ok;
+
+	if (get_ldev_if_state(mdev, D_NEGOTIATING)) {
+		D_ASSERT(mdev->ldev->backing_bdev);
+		d_size = drbd_get_max_capacity(mdev->ldev);
+		u_size = mdev->ldev->dc.disk_size;
+		q_order_type = drbd_queue_order_type(mdev);
+		max_bio_size = queue_max_hw_sectors(mdev->ldev->backing_bdev->bd_disk->queue) << 9;
+		max_bio_size = min_t(int, max_bio_size, DRBD_MAX_BIO_SIZE);
+		put_ldev(mdev);
+	} else {
+		d_size = 0;
+		u_size = 0;
+		q_order_type = QUEUE_ORDERED_NONE;
+		max_bio_size = DRBD_MAX_BIO_SIZE; /* ... multiple BIOs per peer_request */
+	}
+
+	p.d_size = cpu_to_be64(d_size);
+	p.u_size = cpu_to_be64(u_size);
+	p.c_size = cpu_to_be64(trigger_reply ? 0 : drbd_get_capacity(mdev->this_bdev));
+	p.max_bio_size = cpu_to_be32(max_bio_size);
+	p.queue_order_type = cpu_to_be16(q_order_type);
+	p.dds_flags = cpu_to_be16(flags);
+
+	ok = drbd_send_cmd(mdev, USE_DATA_SOCKET, P_SIZES,
+			   (struct p_header80 *)&p, sizeof(p));
+	return ok;
+}
+
+/**
+ * drbd_send_state() - Sends the drbd state to the peer
+ * @mdev:	DRBD device.
+ */
+int drbd_send_state(struct drbd_conf *mdev)
+{
+	struct socket *sock;
+	struct p_state p;
+	int ok = 0;
+
+	/* Grab state lock so we wont send state if we're in the middle
+	 * of a cluster wide state change on another thread */
+	drbd_state_lock(mdev);
+
+	mutex_lock(&mdev->data.mutex);
+
+	p.state = cpu_to_be32(mdev->state.i); /* Within the send mutex */
+	sock = mdev->data.socket;
+
+	if (likely(sock != NULL)) {
+		ok = _drbd_send_cmd(mdev, sock, P_STATE,
+				    (struct p_header80 *)&p, sizeof(p), 0);
+	}
+
+	mutex_unlock(&mdev->data.mutex);
+
+	drbd_state_unlock(mdev);
+	return ok;
+}
+
+int drbd_send_state_req(struct drbd_conf *mdev,
+	union drbd_state mask, union drbd_state val)
+{
+	struct p_req_state p;
+
+	p.mask    = cpu_to_be32(mask.i);
+	p.val     = cpu_to_be32(val.i);
+
+	return drbd_send_cmd(mdev, USE_DATA_SOCKET, P_STATE_CHG_REQ,
+			     (struct p_header80 *)&p, sizeof(p));
+}
+
+int drbd_send_sr_reply(struct drbd_conf *mdev, enum drbd_state_rv retcode)
+{
+	struct p_req_state_reply p;
+
+	p.retcode    = cpu_to_be32(retcode);
+
+	return drbd_send_cmd(mdev, USE_META_SOCKET, P_STATE_CHG_REPLY,
+			     (struct p_header80 *)&p, sizeof(p));
+}
+
+int fill_bitmap_rle_bits(struct drbd_conf *mdev,
+	struct p_compressed_bm *p,
+	struct bm_xfer_ctx *c)
+{
+	struct bitstream bs;
+	unsigned long plain_bits;
+	unsigned long tmp;
+	unsigned long rl;
+	unsigned len;
+	unsigned toggle;
+	int bits;
+
+	/* may we use this feature? */
+	if ((mdev->sync_conf.use_rle == 0) ||
+		(mdev->agreed_pro_version < 90))
+			return 0;
+
+	if (c->bit_offset >= c->bm_bits)
+		return 0; /* nothing to do. */
+
+	/* use at most thus many bytes */
+	bitstream_init(&bs, p->code, BM_PACKET_VLI_BYTES_MAX, 0);
+	memset(p->code, 0, BM_PACKET_VLI_BYTES_MAX);
+	/* plain bits covered in this code string */
+	plain_bits = 0;
+
+	/* p->encoding & 0x80 stores whether the first run length is set.
+	 * bit offset is implicit.
+	 * start with toggle == 2 to be able to tell the first iteration */
+	toggle = 2;
+
+	/* see how much plain bits we can stuff into one packet
+	 * using RLE and VLI. */
+	do {
+		tmp = (toggle == 0) ? _drbd_bm_find_next_zero(mdev, c->bit_offset)
+				    : _drbd_bm_find_next(mdev, c->bit_offset);
+		if (tmp == -1UL)
+			tmp = c->bm_bits;
+		rl = tmp - c->bit_offset;
+
+		if (toggle == 2) { /* first iteration */
+			if (rl == 0) {
+				/* the first checked bit was set,
+				 * store start value, */
+				DCBP_set_start(p, 1);
+				/* but skip encoding of zero run length */
+				toggle = !toggle;
+				continue;
+			}
+			DCBP_set_start(p, 0);
+		}
+
+		/* paranoia: catch zero runlength.
+		 * can only happen if bitmap is modified while we scan it. */
+		if (rl == 0) {
+			dev_err(DEV, "unexpected zero runlength while encoding bitmap "
+			    "t:%u bo:%lu\n", toggle, c->bit_offset);
+			return -1;
+		}
+
+		bits = vli_encode_bits(&bs, rl);
+		if (bits == -ENOBUFS) /* buffer full */
+			break;
+		if (bits <= 0) {
+			dev_err(DEV, "error while encoding bitmap: %d\n", bits);
+			return 0;
+		}
+
+		toggle = !toggle;
+		plain_bits += rl;
+		c->bit_offset = tmp;
+	} while (c->bit_offset < c->bm_bits);
+
+	len = bs.cur.b - p->code + !!bs.cur.bit;
+
+	if (plain_bits < (len << 3)) {
+		/* incompressible with this method.
+		 * we need to rewind both word and bit position. */
+		c->bit_offset -= plain_bits;
+		bm_xfer_ctx_bit_to_word_offset(c);
+		c->bit_offset = c->word_offset * BITS_PER_LONG;
+		return 0;
+	}
+
+	/* RLE + VLI was able to compress it just fine.
+	 * update c->word_offset. */
+	bm_xfer_ctx_bit_to_word_offset(c);
+
+	/* store pad_bits */
+	DCBP_set_pad_bits(p, (8 - bs.cur.bit) & 0x7);
+
+	return len;
+}
+
+/**
+ * send_bitmap_rle_or_plain
+ *
+ * Return 0 when done, 1 when another iteration is needed, and a negative error
+ * code upon failure.
+ */
+static int
+send_bitmap_rle_or_plain(struct drbd_conf *mdev,
+			 struct p_header80 *h, struct bm_xfer_ctx *c)
+{
+	struct p_compressed_bm *p = (void*)h;
+	unsigned long num_words;
+	int len;
+	int ok;
+
+	len = fill_bitmap_rle_bits(mdev, p, c);
+
+	if (len < 0)
+		return -EIO;
+
+	if (len) {
+		DCBP_set_code(p, RLE_VLI_Bits);
+		ok = _drbd_send_cmd(mdev, mdev->data.socket, P_COMPRESSED_BITMAP, h,
+			sizeof(*p) + len, 0);
+
+		c->packets[0]++;
+		c->bytes[0] += sizeof(*p) + len;
+
+		if (c->bit_offset >= c->bm_bits)
+			len = 0; /* DONE */
+	} else {
+		/* was not compressible.
+		 * send a buffer full of plain text bits instead. */
+		num_words = min_t(size_t, BM_PACKET_WORDS, c->bm_words - c->word_offset);
+		len = num_words * sizeof(long);
+		if (len)
+			drbd_bm_get_lel(mdev, c->word_offset, num_words, (unsigned long*)h->payload);
+		ok = _drbd_send_cmd(mdev, mdev->data.socket, P_BITMAP,
+				   h, sizeof(struct p_header80) + len, 0);
+		c->word_offset += num_words;
+		c->bit_offset = c->word_offset * BITS_PER_LONG;
+
+		c->packets[1]++;
+		c->bytes[1] += sizeof(struct p_header80) + len;
+
+		if (c->bit_offset > c->bm_bits)
+			c->bit_offset = c->bm_bits;
+	}
+	if (ok) {
+		if (len == 0) {
+			INFO_bm_xfer_stats(mdev, "send", c);
+			return 0;
+		} else
+			return 1;
+	}
+	return -EIO;
+}
+
+/* See the comment at receive_bitmap() */
+int _drbd_send_bitmap(struct drbd_conf *mdev)
+{
+	struct bm_xfer_ctx c;
+	struct p_header80 *p;
+	int err;
+
+	ERR_IF(!mdev->bitmap) return false;
+
+	/* maybe we should use some per thread scratch page,
+	 * and allocate that during initial device creation? */
+	p = (struct p_header80 *) __get_free_page(GFP_NOIO);
+	if (!p) {
+		dev_err(DEV, "failed to allocate one page buffer in %s\n", __func__);
+		return false;
+	}
+
+	if (get_ldev(mdev)) {
+		if (drbd_md_test_flag(mdev->ldev, MDF_FULL_SYNC)) {
+			dev_info(DEV, "Writing the whole bitmap, MDF_FullSync was set.\n");
+			drbd_bm_set_all(mdev);
+			if (drbd_bm_write(mdev)) {
+				/* write_bm did fail! Leave full sync flag set in Meta P_DATA
+				 * but otherwise process as per normal - need to tell other
+				 * side that a full resync is required! */
+				dev_err(DEV, "Failed to write bitmap to disk!\n");
+			} else {
+				drbd_md_clear_flag(mdev, MDF_FULL_SYNC);
+				drbd_md_sync(mdev);
+			}
+		}
+		put_ldev(mdev);
+	}
+
+	c = (struct bm_xfer_ctx) {
+		.bm_bits = drbd_bm_bits(mdev),
+		.bm_words = drbd_bm_words(mdev),
+	};
+
+	do {
+		err = send_bitmap_rle_or_plain(mdev, p, &c);
+	} while (err > 0);
+
+	free_page((unsigned long) p);
+	return err == 0;
+}
+
+int drbd_send_bitmap(struct drbd_conf *mdev)
+{
+	int err;
+
+	if (!drbd_get_data_sock(mdev))
+		return -1;
+	err = !_drbd_send_bitmap(mdev);
+	drbd_put_data_sock(mdev);
+	return err;
+}
+
+int drbd_send_b_ack(struct drbd_conf *mdev, u32 barrier_nr, u32 set_size)
+{
+	int ok;
+	struct p_barrier_ack p;
+
+	p.barrier  = barrier_nr;
+	p.set_size = cpu_to_be32(set_size);
+
+	if (mdev->state.conn < C_CONNECTED)
+		return false;
+	ok = drbd_send_cmd(mdev, USE_META_SOCKET, P_BARRIER_ACK,
+			(struct p_header80 *)&p, sizeof(p));
+	return ok;
+}
+
+/**
+ * _drbd_send_ack() - Sends an ack packet
+ * @mdev:	DRBD device.
+ * @cmd:	Packet command code.
+ * @sector:	sector, needs to be in big endian byte order
+ * @blksize:	size in byte, needs to be in big endian byte order
+ * @block_id:	Id, big endian byte order
+ */
+static int _drbd_send_ack(struct drbd_conf *mdev, enum drbd_packets cmd,
+			  u64 sector,
+			  u32 blksize,
+			  u64 block_id)
+{
+	int ok;
+	struct p_block_ack p;
+
+	p.sector   = sector;
+	p.block_id = block_id;
+	p.blksize  = blksize;
+	p.seq_num  = cpu_to_be32(atomic_add_return(1, &mdev->packet_seq));
+
+	if (!mdev->meta.socket || mdev->state.conn < C_CONNECTED)
+		return false;
+	ok = drbd_send_cmd(mdev, USE_META_SOCKET, cmd,
+				(struct p_header80 *)&p, sizeof(p));
+	return ok;
+}
+
+/* dp->sector and dp->block_id already/still in network byte order,
+ * data_size is payload size according to dp->head,
+ * and may need to be corrected for digest size. */
+int drbd_send_ack_dp(struct drbd_conf *mdev, enum drbd_packets cmd,
+		     struct p_data *dp, int data_size)
+{
+	data_size -= (mdev->agreed_pro_version >= 87 && mdev->integrity_r_tfm) ?
+		crypto_hash_digestsize(mdev->integrity_r_tfm) : 0;
+	return _drbd_send_ack(mdev, cmd, dp->sector, cpu_to_be32(data_size),
+			      dp->block_id);
+}
+
+int drbd_send_ack_rp(struct drbd_conf *mdev, enum drbd_packets cmd,
+		     struct p_block_req *rp)
+{
+	return _drbd_send_ack(mdev, cmd, rp->sector, rp->blksize, rp->block_id);
+}
+
+/**
+ * drbd_send_ack() - Sends an ack packet
+ * @mdev:	DRBD device.
+ * @cmd:	Packet command code.
+ * @e:		Epoch entry.
+ */
+int drbd_send_ack(struct drbd_conf *mdev,
+	enum drbd_packets cmd, struct drbd_epoch_entry *e)
+{
+	return _drbd_send_ack(mdev, cmd,
+			      cpu_to_be64(e->sector),
+			      cpu_to_be32(e->size),
+			      e->block_id);
+}
+
+/* This function misuses the block_id field to signal if the blocks
+ * are is sync or not. */
+int drbd_send_ack_ex(struct drbd_conf *mdev, enum drbd_packets cmd,
+		     sector_t sector, int blksize, u64 block_id)
+{
+	return _drbd_send_ack(mdev, cmd,
+			      cpu_to_be64(sector),
+			      cpu_to_be32(blksize),
+			      cpu_to_be64(block_id));
+}
+
+int drbd_send_drequest(struct drbd_conf *mdev, int cmd,
+		       sector_t sector, int size, u64 block_id)
+{
+	int ok;
+	struct p_block_req p;
+
+	p.sector   = cpu_to_be64(sector);
+	p.block_id = block_id;
+	p.blksize  = cpu_to_be32(size);
+
+	ok = drbd_send_cmd(mdev, USE_DATA_SOCKET, cmd,
+				(struct p_header80 *)&p, sizeof(p));
+	return ok;
+}
+
+int drbd_send_drequest_csum(struct drbd_conf *mdev,
+			    sector_t sector, int size,
+			    void *digest, int digest_size,
+			    enum drbd_packets cmd)
+{
+	int ok;
+	struct p_block_req p;
+
+	p.sector   = cpu_to_be64(sector);
+	p.block_id = BE_DRBD_MAGIC + 0xbeef;
+	p.blksize  = cpu_to_be32(size);
+
+	p.head.magic   = BE_DRBD_MAGIC;
+	p.head.command = cpu_to_be16(cmd);
+	p.head.length  = cpu_to_be16(sizeof(p) - sizeof(struct p_header80) + digest_size);
+
+	mutex_lock(&mdev->data.mutex);
+
+	ok = (sizeof(p) == drbd_send(mdev, mdev->data.socket, &p, sizeof(p), 0));
+	ok = ok && (digest_size == drbd_send(mdev, mdev->data.socket, digest, digest_size, 0));
+
+	mutex_unlock(&mdev->data.mutex);
+
+	return ok;
+}
+
+int drbd_send_ov_request(struct drbd_conf *mdev, sector_t sector, int size)
+{
+	int ok;
+	struct p_block_req p;
+
+	p.sector   = cpu_to_be64(sector);
+	p.block_id = BE_DRBD_MAGIC + 0xbabe;
+	p.blksize  = cpu_to_be32(size);
+
+	ok = drbd_send_cmd(mdev, USE_DATA_SOCKET, P_OV_REQUEST,
+			   (struct p_header80 *)&p, sizeof(p));
+	return ok;
+}
+
+/* called on sndtimeo
+ * returns false if we should retry,
+ * true if we think connection is dead
+ */
+static int we_should_drop_the_connection(struct drbd_conf *mdev, struct socket *sock)
+{
+	int drop_it;
+	/* long elapsed = (long)(jiffies - mdev->last_received); */
+
+	drop_it =   mdev->meta.socket == sock
+		|| !mdev->asender.task
+		|| get_t_state(&mdev->asender) != Running
+		|| mdev->state.conn < C_CONNECTED;
+
+	if (drop_it)
+		return true;
+
+	drop_it = !--mdev->ko_count;
+	if (!drop_it) {
+		dev_err(DEV, "[%s/%d] sock_sendmsg time expired, ko = %u\n",
+		       current->comm, current->pid, mdev->ko_count);
+		request_ping(mdev);
+	}
+
+	return drop_it; /* && (mdev->state == R_PRIMARY) */;
+}
+
+/* The idea of sendpage seems to be to put some kind of reference
+ * to the page into the skb, and to hand it over to the NIC. In
+ * this process get_page() gets called.
+ *
+ * As soon as the page was really sent over the network put_page()
+ * gets called by some part of the network layer. [ NIC driver? ]
+ *
+ * [ get_page() / put_page() increment/decrement the count. If count
+ *   reaches 0 the page will be freed. ]
+ *
+ * This works nicely with pages from FSs.
+ * But this means that in protocol A we might signal IO completion too early!
+ *
+ * In order not to corrupt data during a resync we must make sure
+ * that we do not reuse our own buffer pages (EEs) to early, therefore
+ * we have the net_ee list.
+ *
+ * XFS seems to have problems, still, it submits pages with page_count == 0!
+ * As a workaround, we disable sendpage on pages
+ * with page_count == 0 or PageSlab.
+ */
+static int _drbd_no_send_page(struct drbd_conf *mdev, struct page *page,
+		   int offset, size_t size, unsigned msg_flags)
+{
+	int sent = drbd_send(mdev, mdev->data.socket, kmap(page) + offset, size, msg_flags);
+	kunmap(page);
+	if (sent == size)
+		mdev->send_cnt += size>>9;
+	return sent == size;
+}
+
+static int _drbd_send_page(struct drbd_conf *mdev, struct page *page,
+		    int offset, size_t size, unsigned msg_flags)
+{
+	mm_segment_t oldfs = get_fs();
+	int sent, ok;
+	int len = size;
+
+	/* e.g. XFS meta- & log-data is in slab pages, which have a
+	 * page_count of 0 and/or have PageSlab() set.
+	 * we cannot use send_page for those, as that does get_page();
+	 * put_page(); and would cause either a VM_BUG directly, or
+	 * __page_cache_release a page that would actually still be referenced
+	 * by someone, leading to some obscure delayed Oops somewhere else. */
+	if (disable_sendpage || (page_count(page) < 1) || PageSlab(page))
+		return _drbd_no_send_page(mdev, page, offset, size, msg_flags);
+
+	msg_flags |= MSG_NOSIGNAL;
+	drbd_update_congested(mdev);
+	set_fs(KERNEL_DS);
+	do {
+		sent = mdev->data.socket->ops->sendpage(mdev->data.socket, page,
+							offset, len,
+							msg_flags);
+		if (sent == -EAGAIN) {
+			if (we_should_drop_the_connection(mdev,
+							  mdev->data.socket))
+				break;
+			else
+				continue;
+		}
+		if (sent <= 0) {
+			dev_warn(DEV, "%s: size=%d len=%d sent=%d\n",
+			     __func__, (int)size, len, sent);
+			break;
+		}
+		len    -= sent;
+		offset += sent;
+	} while (len > 0 /* THINK && mdev->cstate >= C_CONNECTED*/);
+	set_fs(oldfs);
+	clear_bit(NET_CONGESTED, &mdev->flags);
+
+	ok = (len == 0);
+	if (likely(ok))
+		mdev->send_cnt += size>>9;
+	return ok;
+}
+
+static int _drbd_send_bio(struct drbd_conf *mdev, struct bio *bio)
+{
+	struct bio_vec *bvec;
+	int i;
+	/* hint all but last page with MSG_MORE */
+	__bio_for_each_segment(bvec, bio, i, 0) {
+		if (!_drbd_no_send_page(mdev, bvec->bv_page,
+				     bvec->bv_offset, bvec->bv_len,
+				     i == bio->bi_vcnt -1 ? 0 : MSG_MORE))
+			return 0;
+	}
+	return 1;
+}
+
+static int _drbd_send_zc_bio(struct drbd_conf *mdev, struct bio *bio)
+{
+	struct bio_vec *bvec;
+	int i;
+	/* hint all but last page with MSG_MORE */
+	__bio_for_each_segment(bvec, bio, i, 0) {
+		if (!_drbd_send_page(mdev, bvec->bv_page,
+				     bvec->bv_offset, bvec->bv_len,
+				     i == bio->bi_vcnt -1 ? 0 : MSG_MORE))
+			return 0;
+	}
+	return 1;
+}
+
+static int _drbd_send_zc_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e)
+{
+	struct page *page = e->pages;
+	unsigned len = e->size;
+	/* hint all but last page with MSG_MORE */
+	page_chain_for_each(page) {
+		unsigned l = min_t(unsigned, len, PAGE_SIZE);
+		if (!_drbd_send_page(mdev, page, 0, l,
+				page_chain_next(page) ? MSG_MORE : 0))
+			return 0;
+		len -= l;
+	}
+	return 1;
+}
+
+static u32 bio_flags_to_wire(struct drbd_conf *mdev, unsigned long bi_rw)
+{
+	if (mdev->agreed_pro_version >= 95)
+		return  (bi_rw & REQ_SYNC ? DP_RW_SYNC : 0) |
+			(bi_rw & REQ_FUA ? DP_FUA : 0) |
+			(bi_rw & REQ_FLUSH ? DP_FLUSH : 0) |
+			(bi_rw & REQ_DISCARD ? DP_DISCARD : 0);
+	else
+		return bi_rw & REQ_SYNC ? DP_RW_SYNC : 0;
+}
+
+/* Used to send write requests
+ * R_PRIMARY -> Peer	(P_DATA)
+ */
+int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req)
+{
+	int ok = 1;
+	struct p_data p;
+	unsigned int dp_flags = 0;
+	void *dgb;
+	int dgs;
+
+	if (!drbd_get_data_sock(mdev))
+		return 0;
+
+	dgs = (mdev->agreed_pro_version >= 87 && mdev->integrity_w_tfm) ?
+		crypto_hash_digestsize(mdev->integrity_w_tfm) : 0;
+
+	if (req->size <= DRBD_MAX_SIZE_H80_PACKET) {
+		p.head.h80.magic   = BE_DRBD_MAGIC;
+		p.head.h80.command = cpu_to_be16(P_DATA);
+		p.head.h80.length  =
+			cpu_to_be16(sizeof(p) - sizeof(union p_header) + dgs + req->size);
+	} else {
+		p.head.h95.magic   = BE_DRBD_MAGIC_BIG;
+		p.head.h95.command = cpu_to_be16(P_DATA);
+		p.head.h95.length  =
+			cpu_to_be32(sizeof(p) - sizeof(union p_header) + dgs + req->size);
+	}
+
+	p.sector   = cpu_to_be64(req->sector);
+	p.block_id = (unsigned long)req;
+	p.seq_num  = cpu_to_be32(req->seq_num =
+				 atomic_add_return(1, &mdev->packet_seq));
+
+	dp_flags = bio_flags_to_wire(mdev, req->master_bio->bi_rw);
+
+	if (mdev->state.conn >= C_SYNC_SOURCE &&
+	    mdev->state.conn <= C_PAUSED_SYNC_T)
+		dp_flags |= DP_MAY_SET_IN_SYNC;
+
+	p.dp_flags = cpu_to_be32(dp_flags);
+	set_bit(UNPLUG_REMOTE, &mdev->flags);
+	ok = (sizeof(p) ==
+		drbd_send(mdev, mdev->data.socket, &p, sizeof(p), dgs ? MSG_MORE : 0));
+	if (ok && dgs) {
+		dgb = mdev->int_dig_out;
+		drbd_csum_bio(mdev, mdev->integrity_w_tfm, req->master_bio, dgb);
+		ok = dgs == drbd_send(mdev, mdev->data.socket, dgb, dgs, 0);
+	}
+	if (ok) {
+		/* For protocol A, we have to memcpy the payload into
+		 * socket buffers, as we may complete right away
+		 * as soon as we handed it over to tcp, at which point the data
+		 * pages may become invalid.
+		 *
+		 * For data-integrity enabled, we copy it as well, so we can be
+		 * sure that even if the bio pages may still be modified, it
+		 * won't change the data on the wire, thus if the digest checks
+		 * out ok after sending on this side, but does not fit on the
+		 * receiving side, we sure have detected corruption elsewhere.
+		 */
+		if (mdev->net_conf->wire_protocol == DRBD_PROT_A || dgs)
+			ok = _drbd_send_bio(mdev, req->master_bio);
+		else
+			ok = _drbd_send_zc_bio(mdev, req->master_bio);
+
+		/* double check digest, sometimes buffers have been modified in flight. */
+		if (dgs > 0 && dgs <= 64) {
+			/* 64 byte, 512 bit, is the largest digest size
+			 * currently supported in kernel crypto. */
+			unsigned char digest[64];
+			drbd_csum_bio(mdev, mdev->integrity_w_tfm, req->master_bio, digest);
+			if (memcmp(mdev->int_dig_out, digest, dgs)) {
+				dev_warn(DEV,
+					"Digest mismatch, buffer modified by upper layers during write: %llus +%u\n",
+					(unsigned long long)req->sector, req->size);
+			}
+		} /* else if (dgs > 64) {
+		     ... Be noisy about digest too large ...
+		} */
+	}
+
+	drbd_put_data_sock(mdev);
+
+	return ok;
+}
+
+/* answer packet, used to send data back for read requests:
+ *  Peer       -> (diskless) R_PRIMARY   (P_DATA_REPLY)
+ *  C_SYNC_SOURCE -> C_SYNC_TARGET         (P_RS_DATA_REPLY)
+ */
+int drbd_send_block(struct drbd_conf *mdev, enum drbd_packets cmd,
+		    struct drbd_epoch_entry *e)
+{
+	int ok;
+	struct p_data p;
+	void *dgb;
+	int dgs;
+
+	dgs = (mdev->agreed_pro_version >= 87 && mdev->integrity_w_tfm) ?
+		crypto_hash_digestsize(mdev->integrity_w_tfm) : 0;
+
+	if (e->size <= DRBD_MAX_SIZE_H80_PACKET) {
+		p.head.h80.magic   = BE_DRBD_MAGIC;
+		p.head.h80.command = cpu_to_be16(cmd);
+		p.head.h80.length  =
+			cpu_to_be16(sizeof(p) - sizeof(struct p_header80) + dgs + e->size);
+	} else {
+		p.head.h95.magic   = BE_DRBD_MAGIC_BIG;
+		p.head.h95.command = cpu_to_be16(cmd);
+		p.head.h95.length  =
+			cpu_to_be32(sizeof(p) - sizeof(struct p_header80) + dgs + e->size);
+	}
+
+	p.sector   = cpu_to_be64(e->sector);
+	p.block_id = e->block_id;
+	/* p.seq_num  = 0;    No sequence numbers here.. */
+
+	/* Only called by our kernel thread.
+	 * This one may be interrupted by DRBD_SIG and/or DRBD_SIGKILL
+	 * in response to admin command or module unload.
+	 */
+	if (!drbd_get_data_sock(mdev))
+		return 0;
+
+	ok = sizeof(p) == drbd_send(mdev, mdev->data.socket, &p, sizeof(p), dgs ? MSG_MORE : 0);
+	if (ok && dgs) {
+		dgb = mdev->int_dig_out;
+		drbd_csum_ee(mdev, mdev->integrity_w_tfm, e, dgb);
+		ok = dgs == drbd_send(mdev, mdev->data.socket, dgb, dgs, 0);
+	}
+	if (ok)
+		ok = _drbd_send_zc_ee(mdev, e);
+
+	drbd_put_data_sock(mdev);
+
+	return ok;
+}
+
+int drbd_send_oos(struct drbd_conf *mdev, struct drbd_request *req)
+{
+	struct p_block_desc p;
+
+	p.sector  = cpu_to_be64(req->sector);
+	p.blksize = cpu_to_be32(req->size);
+
+	return drbd_send_cmd(mdev, USE_DATA_SOCKET, P_OUT_OF_SYNC, &p.head, sizeof(p));
+}
+
+/*
+  drbd_send distinguishes two cases:
+
+  Packets sent via the data socket "sock"
+  and packets sent via the meta data socket "msock"
+
+		    sock                      msock
+  -----------------+-------------------------+------------------------------
+  timeout           conf.timeout / 2          conf.timeout / 2
+  timeout action    send a ping via msock     Abort communication
+					      and close all sockets
+*/
+
+/*
+ * you must have down()ed the appropriate [m]sock_mutex elsewhere!
+ */
+int drbd_send(struct drbd_conf *mdev, struct socket *sock,
+	      void *buf, size_t size, unsigned msg_flags)
+{
+	struct kvec iov;
+	struct msghdr msg;
+	int rv, sent = 0;
+
+	if (!sock)
+		return -1000;
+
+	/* THINK  if (signal_pending) return ... ? */
+
+	iov.iov_base = buf;
+	iov.iov_len  = size;
+
+	msg.msg_name       = NULL;
+	msg.msg_namelen    = 0;
+	msg.msg_control    = NULL;
+	msg.msg_controllen = 0;
+	msg.msg_flags      = msg_flags | MSG_NOSIGNAL;
+
+	if (sock == mdev->data.socket) {
+		mdev->ko_count = mdev->net_conf->ko_count;
+		drbd_update_congested(mdev);
+	}
+	do {
+		/* STRANGE
+		 * tcp_sendmsg does _not_ use its size parameter at all ?
+		 *
+		 * -EAGAIN on timeout, -EINTR on signal.
+		 */
+/* THINK
+ * do we need to block DRBD_SIG if sock == &meta.socket ??
+ * otherwise wake_asender() might interrupt some send_*Ack !
+ */
+		rv = kernel_sendmsg(sock, &msg, &iov, 1, size);
+		if (rv == -EAGAIN) {
+			if (we_should_drop_the_connection(mdev, sock))
+				break;
+			else
+				continue;
+		}
+		D_ASSERT(rv != 0);
+		if (rv == -EINTR) {
+			flush_signals(current);
+			rv = 0;
+		}
+		if (rv < 0)
+			break;
+		sent += rv;
+		iov.iov_base += rv;
+		iov.iov_len  -= rv;
+	} while (sent < size);
+
+	if (sock == mdev->data.socket)
+		clear_bit(NET_CONGESTED, &mdev->flags);
+
+	if (rv <= 0) {
+		if (rv != -EAGAIN) {
+			dev_err(DEV, "%s_sendmsg returned %d\n",
+			    sock == mdev->meta.socket ? "msock" : "sock",
+			    rv);
+			drbd_force_state(mdev, NS(conn, C_BROKEN_PIPE));
+		} else
+			drbd_force_state(mdev, NS(conn, C_TIMEOUT));
+	}
+
+	return sent;
+}
+
+static int drbd_open(struct block_device *bdev, fmode_t mode)
+{
+	struct drbd_conf *mdev = bdev->bd_disk->private_data;
+	unsigned long flags;
+	int rv = 0;
+
+	mutex_lock(&drbd_main_mutex);
+	spin_lock_irqsave(&mdev->req_lock, flags);
+	/* to have a stable mdev->state.role
+	 * and no race with updating open_cnt */
+
+	if (mdev->state.role != R_PRIMARY) {
+		if (mode & FMODE_WRITE)
+			rv = -EROFS;
+		else if (!allow_oos)
+			rv = -EMEDIUMTYPE;
+	}
+
+	if (!rv)
+		mdev->open_cnt++;
+	spin_unlock_irqrestore(&mdev->req_lock, flags);
+	mutex_unlock(&drbd_main_mutex);
+
+	return rv;
+}
+
+static int drbd_release(struct gendisk *gd, fmode_t mode)
+{
+	struct drbd_conf *mdev = gd->private_data;
+	mutex_lock(&drbd_main_mutex);
+	mdev->open_cnt--;
+	mutex_unlock(&drbd_main_mutex);
+	return 0;
+}
+
+static void drbd_set_defaults(struct drbd_conf *mdev)
+{
+	/* This way we get a compile error when sync_conf grows,
+	   and we forgot to initialize it here */
+	mdev->sync_conf = (struct syncer_conf) {
+		/* .rate = */		DRBD_RATE_DEF,
+		/* .after = */		DRBD_AFTER_DEF,
+		/* .al_extents = */	DRBD_AL_EXTENTS_DEF,
+		/* .verify_alg = */	{}, 0,
+		/* .cpu_mask = */	{}, 0,
+		/* .csums_alg = */	{}, 0,
+		/* .use_rle = */	0,
+		/* .on_no_data = */	DRBD_ON_NO_DATA_DEF,
+		/* .c_plan_ahead = */	DRBD_C_PLAN_AHEAD_DEF,
+		/* .c_delay_target = */	DRBD_C_DELAY_TARGET_DEF,
+		/* .c_fill_target = */	DRBD_C_FILL_TARGET_DEF,
+		/* .c_max_rate = */	DRBD_C_MAX_RATE_DEF,
+		/* .c_min_rate = */	DRBD_C_MIN_RATE_DEF
+	};
+
+	/* Have to use that way, because the layout differs between
+	   big endian and little endian */
+	mdev->state = (union drbd_state) {
+		{ .role = R_SECONDARY,
+		  .peer = R_UNKNOWN,
+		  .conn = C_STANDALONE,
+		  .disk = D_DISKLESS,
+		  .pdsk = D_UNKNOWN,
+		  .susp = 0,
+		  .susp_nod = 0,
+		  .susp_fen = 0
+		} };
+}
+
+void drbd_init_set_defaults(struct drbd_conf *mdev)
+{
+	/* the memset(,0,) did most of this.
+	 * note: only assignments, no allocation in here */
+
+	drbd_set_defaults(mdev);
+
+	atomic_set(&mdev->ap_bio_cnt, 0);
+	atomic_set(&mdev->ap_pending_cnt, 0);
+	atomic_set(&mdev->rs_pending_cnt, 0);
+	atomic_set(&mdev->unacked_cnt, 0);
+	atomic_set(&mdev->local_cnt, 0);
+	atomic_set(&mdev->net_cnt, 0);
+	atomic_set(&mdev->packet_seq, 0);
+	atomic_set(&mdev->pp_in_use, 0);
+	atomic_set(&mdev->pp_in_use_by_net, 0);
+	atomic_set(&mdev->rs_sect_in, 0);
+	atomic_set(&mdev->rs_sect_ev, 0);
+	atomic_set(&mdev->ap_in_flight, 0);
+
+	mutex_init(&mdev->md_io_mutex);
+	mutex_init(&mdev->data.mutex);
+	mutex_init(&mdev->meta.mutex);
+	sema_init(&mdev->data.work.s, 0);
+	sema_init(&mdev->meta.work.s, 0);
+	mutex_init(&mdev->state_mutex);
+
+	spin_lock_init(&mdev->data.work.q_lock);
+	spin_lock_init(&mdev->meta.work.q_lock);
+
+	spin_lock_init(&mdev->al_lock);
+	spin_lock_init(&mdev->req_lock);
+	spin_lock_init(&mdev->peer_seq_lock);
+	spin_lock_init(&mdev->epoch_lock);
+
+	INIT_LIST_HEAD(&mdev->active_ee);
+	INIT_LIST_HEAD(&mdev->sync_ee);
+	INIT_LIST_HEAD(&mdev->done_ee);
+	INIT_LIST_HEAD(&mdev->read_ee);
+	INIT_LIST_HEAD(&mdev->net_ee);
+	INIT_LIST_HEAD(&mdev->resync_reads);
+	INIT_LIST_HEAD(&mdev->data.work.q);
+	INIT_LIST_HEAD(&mdev->meta.work.q);
+	INIT_LIST_HEAD(&mdev->resync_work.list);
+	INIT_LIST_HEAD(&mdev->unplug_work.list);
+	INIT_LIST_HEAD(&mdev->go_diskless.list);
+	INIT_LIST_HEAD(&mdev->md_sync_work.list);
+	INIT_LIST_HEAD(&mdev->start_resync_work.list);
+	INIT_LIST_HEAD(&mdev->bm_io_work.w.list);
+
+	mdev->resync_work.cb  = w_resync_timer;
+	mdev->unplug_work.cb  = w_send_write_hint;
+	mdev->go_diskless.cb  = w_go_diskless;
+	mdev->md_sync_work.cb = w_md_sync;
+	mdev->bm_io_work.w.cb = w_bitmap_io;
+	mdev->start_resync_work.cb = w_start_resync;
+	init_timer(&mdev->resync_timer);
+	init_timer(&mdev->md_sync_timer);
+	init_timer(&mdev->start_resync_timer);
+	init_timer(&mdev->request_timer);
+	mdev->resync_timer.function = resync_timer_fn;
+	mdev->resync_timer.data = (unsigned long) mdev;
+	mdev->md_sync_timer.function = md_sync_timer_fn;
+	mdev->md_sync_timer.data = (unsigned long) mdev;
+	mdev->start_resync_timer.function = start_resync_timer_fn;
+	mdev->start_resync_timer.data = (unsigned long) mdev;
+	mdev->request_timer.function = request_timer_fn;
+	mdev->request_timer.data = (unsigned long) mdev;
+
+	init_waitqueue_head(&mdev->misc_wait);
+	init_waitqueue_head(&mdev->state_wait);
+	init_waitqueue_head(&mdev->net_cnt_wait);
+	init_waitqueue_head(&mdev->ee_wait);
+	init_waitqueue_head(&mdev->al_wait);
+	init_waitqueue_head(&mdev->seq_wait);
+
+	drbd_thread_init(mdev, &mdev->receiver, drbdd_init);
+	drbd_thread_init(mdev, &mdev->worker, drbd_worker);
+	drbd_thread_init(mdev, &mdev->asender, drbd_asender);
+
+	mdev->agreed_pro_version = PRO_VERSION_MAX;
+	mdev->write_ordering = WO_bdev_flush;
+	mdev->resync_wenr = LC_FREE;
+	mdev->peer_max_bio_size = DRBD_MAX_BIO_SIZE_SAFE;
+	mdev->local_max_bio_size = DRBD_MAX_BIO_SIZE_SAFE;
+}
+
+void drbd_mdev_cleanup(struct drbd_conf *mdev)
+{
+	int i;
+	if (mdev->receiver.t_state != None)
+		dev_err(DEV, "ASSERT FAILED: receiver t_state == %d expected 0.\n",
+				mdev->receiver.t_state);
+
+	/* no need to lock it, I'm the only thread alive */
+	if (atomic_read(&mdev->current_epoch->epoch_size) !=  0)
+		dev_err(DEV, "epoch_size:%d\n", atomic_read(&mdev->current_epoch->epoch_size));
+	mdev->al_writ_cnt  =
+	mdev->bm_writ_cnt  =
+	mdev->read_cnt     =
+	mdev->recv_cnt     =
+	mdev->send_cnt     =
+	mdev->writ_cnt     =
+	mdev->p_size       =
+	mdev->rs_start     =
+	mdev->rs_total     =
+	mdev->rs_failed    = 0;
+	mdev->rs_last_events = 0;
+	mdev->rs_last_sect_ev = 0;
+	for (i = 0; i < DRBD_SYNC_MARKS; i++) {
+		mdev->rs_mark_left[i] = 0;
+		mdev->rs_mark_time[i] = 0;
+	}
+	D_ASSERT(mdev->net_conf == NULL);
+
+	drbd_set_my_capacity(mdev, 0);
+	if (mdev->bitmap) {
+		/* maybe never allocated. */
+		drbd_bm_resize(mdev, 0, 1);
+		drbd_bm_cleanup(mdev);
+	}
+
+	drbd_free_resources(mdev);
+	clear_bit(AL_SUSPENDED, &mdev->flags);
+
+	/*
+	 * currently we drbd_init_ee only on module load, so
+	 * we may do drbd_release_ee only on module unload!
+	 */
+	D_ASSERT(list_empty(&mdev->active_ee));
+	D_ASSERT(list_empty(&mdev->sync_ee));
+	D_ASSERT(list_empty(&mdev->done_ee));
+	D_ASSERT(list_empty(&mdev->read_ee));
+	D_ASSERT(list_empty(&mdev->net_ee));
+	D_ASSERT(list_empty(&mdev->resync_reads));
+	D_ASSERT(list_empty(&mdev->data.work.q));
+	D_ASSERT(list_empty(&mdev->meta.work.q));
+	D_ASSERT(list_empty(&mdev->resync_work.list));
+	D_ASSERT(list_empty(&mdev->unplug_work.list));
+	D_ASSERT(list_empty(&mdev->go_diskless.list));
+
+	drbd_set_defaults(mdev);
+}
+
+
+static void drbd_destroy_mempools(void)
+{
+	struct page *page;
+
+	while (drbd_pp_pool) {
+		page = drbd_pp_pool;
+		drbd_pp_pool = (struct page *)page_private(page);
+		__free_page(page);
+		drbd_pp_vacant--;
+	}
+
+	/* D_ASSERT(atomic_read(&drbd_pp_vacant)==0); */
+
+	if (drbd_ee_mempool)
+		mempool_destroy(drbd_ee_mempool);
+	if (drbd_request_mempool)
+		mempool_destroy(drbd_request_mempool);
+	if (drbd_ee_cache)
+		kmem_cache_destroy(drbd_ee_cache);
+	if (drbd_request_cache)
+		kmem_cache_destroy(drbd_request_cache);
+	if (drbd_bm_ext_cache)
+		kmem_cache_destroy(drbd_bm_ext_cache);
+	if (drbd_al_ext_cache)
+		kmem_cache_destroy(drbd_al_ext_cache);
+
+	drbd_ee_mempool      = NULL;
+	drbd_request_mempool = NULL;
+	drbd_ee_cache        = NULL;
+	drbd_request_cache   = NULL;
+	drbd_bm_ext_cache    = NULL;
+	drbd_al_ext_cache    = NULL;
+
+	return;
+}
+
+static int drbd_create_mempools(void)
+{
+	struct page *page;
+	const int number = (DRBD_MAX_BIO_SIZE/PAGE_SIZE) * minor_count;
+	int i;
+
+	/* prepare our caches and mempools */
+	drbd_request_mempool = NULL;
+	drbd_ee_cache        = NULL;
+	drbd_request_cache   = NULL;
+	drbd_bm_ext_cache    = NULL;
+	drbd_al_ext_cache    = NULL;
+	drbd_pp_pool         = NULL;
+
+	/* caches */
+	drbd_request_cache = kmem_cache_create(
+		"drbd_req", sizeof(struct drbd_request), 0, 0, NULL);
+	if (drbd_request_cache == NULL)
+		goto Enomem;
+
+	drbd_ee_cache = kmem_cache_create(
+		"drbd_ee", sizeof(struct drbd_epoch_entry), 0, 0, NULL);
+	if (drbd_ee_cache == NULL)
+		goto Enomem;
+
+	drbd_bm_ext_cache = kmem_cache_create(
+		"drbd_bm", sizeof(struct bm_extent), 0, 0, NULL);
+	if (drbd_bm_ext_cache == NULL)
+		goto Enomem;
+
+	drbd_al_ext_cache = kmem_cache_create(
+		"drbd_al", sizeof(struct lc_element), 0, 0, NULL);
+	if (drbd_al_ext_cache == NULL)
+		goto Enomem;
+
+	/* mempools */
+	drbd_request_mempool = mempool_create(number,
+		mempool_alloc_slab, mempool_free_slab, drbd_request_cache);
+	if (drbd_request_mempool == NULL)
+		goto Enomem;
+
+	drbd_ee_mempool = mempool_create(number,
+		mempool_alloc_slab, mempool_free_slab, drbd_ee_cache);
+	if (drbd_ee_mempool == NULL)
+		goto Enomem;
+
+	/* drbd's page pool */
+	spin_lock_init(&drbd_pp_lock);
+
+	for (i = 0; i < number; i++) {
+		page = alloc_page(GFP_HIGHUSER);
+		if (!page)
+			goto Enomem;
+		set_page_private(page, (unsigned long)drbd_pp_pool);
+		drbd_pp_pool = page;
+	}
+	drbd_pp_vacant = number;
+
+	return 0;
+
+Enomem:
+	drbd_destroy_mempools(); /* in case we allocated some */
+	return -ENOMEM;
+}
+
+static int drbd_notify_sys(struct notifier_block *this, unsigned long code,
+	void *unused)
+{
+	/* just so we have it.  you never know what interesting things we
+	 * might want to do here some day...
+	 */
+
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block drbd_notifier = {
+	.notifier_call = drbd_notify_sys,
+};
+
+static void drbd_release_ee_lists(struct drbd_conf *mdev)
+{
+	int rr;
+
+	rr = drbd_release_ee(mdev, &mdev->active_ee);
+	if (rr)
+		dev_err(DEV, "%d EEs in active list found!\n", rr);
+
+	rr = drbd_release_ee(mdev, &mdev->sync_ee);
+	if (rr)
+		dev_err(DEV, "%d EEs in sync list found!\n", rr);
+
+	rr = drbd_release_ee(mdev, &mdev->read_ee);
+	if (rr)
+		dev_err(DEV, "%d EEs in read list found!\n", rr);
+
+	rr = drbd_release_ee(mdev, &mdev->done_ee);
+	if (rr)
+		dev_err(DEV, "%d EEs in done list found!\n", rr);
+
+	rr = drbd_release_ee(mdev, &mdev->net_ee);
+	if (rr)
+		dev_err(DEV, "%d EEs in net list found!\n", rr);
+}
+
+/* caution. no locking.
+ * currently only used from module cleanup code. */
+static void drbd_delete_device(unsigned int minor)
+{
+	struct drbd_conf *mdev = minor_to_mdev(minor);
+
+	if (!mdev)
+		return;
+
+	/* paranoia asserts */
+	if (mdev->open_cnt != 0)
+		dev_err(DEV, "open_cnt = %d in %s:%u", mdev->open_cnt,
+				__FILE__ , __LINE__);
+
+	ERR_IF (!list_empty(&mdev->data.work.q)) {
+		struct list_head *lp;
+		list_for_each(lp, &mdev->data.work.q) {
+			dev_err(DEV, "lp = %p\n", lp);
+		}
+	};
+	/* end paranoia asserts */
+
+	del_gendisk(mdev->vdisk);
+
+	/* cleanup stuff that may have been allocated during
+	 * device (re-)configuration or state changes */
+
+	if (mdev->this_bdev)
+		bdput(mdev->this_bdev);
+
+	drbd_free_resources(mdev);
+
+	drbd_release_ee_lists(mdev);
+
+	/* should be freed on disconnect? */
+	kfree(mdev->ee_hash);
+	/*
+	mdev->ee_hash_s = 0;
+	mdev->ee_hash = NULL;
+	*/
+
+	lc_destroy(mdev->act_log);
+	lc_destroy(mdev->resync);
+
+	kfree(mdev->p_uuid);
+	/* mdev->p_uuid = NULL; */
+
+	kfree(mdev->int_dig_out);
+	kfree(mdev->int_dig_in);
+	kfree(mdev->int_dig_vv);
+
+	/* cleanup the rest that has been
+	 * allocated from drbd_new_device
+	 * and actually free the mdev itself */
+	drbd_free_mdev(mdev);
+}
+
+static void drbd_cleanup(void)
+{
+	unsigned int i;
+
+	unregister_reboot_notifier(&drbd_notifier);
+
+	/* first remove proc,
+	 * drbdsetup uses it's presence to detect
+	 * whether DRBD is loaded.
+	 * If we would get stuck in proc removal,
+	 * but have netlink already deregistered,
+	 * some drbdsetup commands may wait forever
+	 * for an answer.
+	 */
+	if (drbd_proc)
+		remove_proc_entry("drbd", NULL);
+
+	drbd_nl_cleanup();
+
+	if (minor_table) {
+		i = minor_count;
+		while (i--)
+			drbd_delete_device(i);
+		drbd_destroy_mempools();
+	}
+
+	kfree(minor_table);
+
+	unregister_blkdev(DRBD_MAJOR, "drbd");
+
+	printk(KERN_INFO "drbd: module cleanup done.\n");
+}
+
+/**
+ * drbd_congested() - Callback for pdflush
+ * @congested_data:	User data
+ * @bdi_bits:		Bits pdflush is currently interested in
+ *
+ * Returns 1<<BDI_async_congested and/or 1<<BDI_sync_congested if we are congested.
+ */
+static int drbd_congested(void *congested_data, int bdi_bits)
+{
+	struct drbd_conf *mdev = congested_data;
+	struct request_queue *q;
+	char reason = '-';
+	int r = 0;
+
+	if (!may_inc_ap_bio(mdev)) {
+		/* DRBD has frozen IO */
+		r = bdi_bits;
+		reason = 'd';
+		goto out;
+	}
+
+	if (get_ldev(mdev)) {
+		q = bdev_get_queue(mdev->ldev->backing_bdev);
+		r = bdi_congested(&q->backing_dev_info, bdi_bits);
+		put_ldev(mdev);
+		if (r)
+			reason = 'b';
+	}
+
+	if (bdi_bits & (1 << BDI_async_congested) && test_bit(NET_CONGESTED, &mdev->flags)) {
+		r |= (1 << BDI_async_congested);
+		reason = reason == 'b' ? 'a' : 'n';
+	}
+
+out:
+	mdev->congestion_reason = reason;
+	return r;
+}
+
+struct drbd_conf *drbd_new_device(unsigned int minor)
+{
+	struct drbd_conf *mdev;
+	struct gendisk *disk;
+	struct request_queue *q;
+
+	/* GFP_KERNEL, we are outside of all write-out paths */
+	mdev = kzalloc(sizeof(struct drbd_conf), GFP_KERNEL);
+	if (!mdev)
+		return NULL;
+	if (!zalloc_cpumask_var(&mdev->cpu_mask, GFP_KERNEL))
+		goto out_no_cpumask;
+
+	mdev->minor = minor;
+
+	drbd_init_set_defaults(mdev);
+
+	q = blk_alloc_queue(GFP_KERNEL);
+	if (!q)
+		goto out_no_q;
+	mdev->rq_queue = q;
+	q->queuedata   = mdev;
+
+	disk = alloc_disk(1);
+	if (!disk)
+		goto out_no_disk;
+	mdev->vdisk = disk;
+
+	set_disk_ro(disk, true);
+
+	disk->queue = q;
+	disk->major = DRBD_MAJOR;
+	disk->first_minor = minor;
+	disk->fops = &drbd_ops;
+	sprintf(disk->disk_name, "drbd%d", minor);
+	disk->private_data = mdev;
+
+	mdev->this_bdev = bdget(MKDEV(DRBD_MAJOR, minor));
+	/* we have no partitions. we contain only ourselves. */
+	mdev->this_bdev->bd_contains = mdev->this_bdev;
+
+	q->backing_dev_info.congested_fn = drbd_congested;
+	q->backing_dev_info.congested_data = mdev;
+
+	blk_queue_make_request(q, drbd_make_request);
+	/* Setting the max_hw_sectors to an odd value of 8kibyte here
+	   This triggers a max_bio_size message upon first attach or connect */
+	blk_queue_max_hw_sectors(q, DRBD_MAX_BIO_SIZE_SAFE >> 8);
+	blk_queue_bounce_limit(q, BLK_BOUNCE_ANY);
+	blk_queue_merge_bvec(q, drbd_merge_bvec);
+	q->queue_lock = &mdev->req_lock;
+
+	mdev->md_io_page = alloc_page(GFP_KERNEL);
+	if (!mdev->md_io_page)
+		goto out_no_io_page;
+
+	if (drbd_bm_init(mdev))
+		goto out_no_bitmap;
+	/* no need to lock access, we are still initializing this minor device. */
+	if (!tl_init(mdev))
+		goto out_no_tl;
+
+	mdev->app_reads_hash = kzalloc(APP_R_HSIZE*sizeof(void *), GFP_KERNEL);
+	if (!mdev->app_reads_hash)
+		goto out_no_app_reads;
+
+	mdev->current_epoch = kzalloc(sizeof(struct drbd_epoch), GFP_KERNEL);
+	if (!mdev->current_epoch)
+		goto out_no_epoch;
+
+	INIT_LIST_HEAD(&mdev->current_epoch->list);
+	mdev->epochs = 1;
+
+	return mdev;
+
+/* out_whatever_else:
+	kfree(mdev->current_epoch); */
+out_no_epoch:
+	kfree(mdev->app_reads_hash);
+out_no_app_reads:
+	tl_cleanup(mdev);
+out_no_tl:
+	drbd_bm_cleanup(mdev);
+out_no_bitmap:
+	__free_page(mdev->md_io_page);
+out_no_io_page:
+	put_disk(disk);
+out_no_disk:
+	blk_cleanup_queue(q);
+out_no_q:
+	free_cpumask_var(mdev->cpu_mask);
+out_no_cpumask:
+	kfree(mdev);
+	return NULL;
+}
+
+/* counterpart of drbd_new_device.
+ * last part of drbd_delete_device. */
+void drbd_free_mdev(struct drbd_conf *mdev)
+{
+	kfree(mdev->current_epoch);
+	kfree(mdev->app_reads_hash);
+	tl_cleanup(mdev);
+	if (mdev->bitmap) /* should no longer be there. */
+		drbd_bm_cleanup(mdev);
+	__free_page(mdev->md_io_page);
+	put_disk(mdev->vdisk);
+	blk_cleanup_queue(mdev->rq_queue);
+	free_cpumask_var(mdev->cpu_mask);
+	drbd_free_tl_hash(mdev);
+	kfree(mdev);
+}
+
+
+int __init drbd_init(void)
+{
+	int err;
+
+	if (sizeof(struct p_handshake) != 80) {
+		printk(KERN_ERR
+		       "drbd: never change the size or layout "
+		       "of the HandShake packet.\n");
+		return -EINVAL;
+	}
+
+	if (minor_count < DRBD_MINOR_COUNT_MIN || minor_count > DRBD_MINOR_COUNT_MAX) {
+		printk(KERN_ERR
+			"drbd: invalid minor_count (%d)\n", minor_count);
+#ifdef MODULE
+		return -EINVAL;
+#else
+		minor_count = 8;
+#endif
+	}
+
+	err = drbd_nl_init();
+	if (err)
+		return err;
+
+	err = register_blkdev(DRBD_MAJOR, "drbd");
+	if (err) {
+		printk(KERN_ERR
+		       "drbd: unable to register block device major %d\n",
+		       DRBD_MAJOR);
+		return err;
+	}
+
+	register_reboot_notifier(&drbd_notifier);
+
+	/*
+	 * allocate all necessary structs
+	 */
+	err = -ENOMEM;
+
+	init_waitqueue_head(&drbd_pp_wait);
+
+	drbd_proc = NULL; /* play safe for drbd_cleanup */
+	minor_table = kzalloc(sizeof(struct drbd_conf *)*minor_count,
+				GFP_KERNEL);
+	if (!minor_table)
+		goto Enomem;
+
+	err = drbd_create_mempools();
+	if (err)
+		goto Enomem;
+
+	drbd_proc = proc_create_data("drbd", S_IFREG | S_IRUGO , NULL, &drbd_proc_fops, NULL);
+	if (!drbd_proc)	{
+		printk(KERN_ERR "drbd: unable to register proc file\n");
+		goto Enomem;
+	}
+
+	rwlock_init(&global_state_lock);
+
+	printk(KERN_INFO "drbd: initialized. "
+	       "Version: " REL_VERSION " (api:%d/proto:%d-%d)\n",
+	       API_VERSION, PRO_VERSION_MIN, PRO_VERSION_MAX);
+	printk(KERN_INFO "drbd: %s\n", drbd_buildtag());
+	printk(KERN_INFO "drbd: registered as block device major %d\n",
+		DRBD_MAJOR);
+	printk(KERN_INFO "drbd: minor_table @ 0x%p\n", minor_table);
+
+	return 0; /* Success! */
+
+Enomem:
+	drbd_cleanup();
+	if (err == -ENOMEM)
+		/* currently always the case */
+		printk(KERN_ERR "drbd: ran out of memory\n");
+	else
+		printk(KERN_ERR "drbd: initialization failure\n");
+	return err;
+}
+
+void drbd_free_bc(struct drbd_backing_dev *ldev)
+{
+	if (ldev == NULL)
+		return;
+
+	blkdev_put(ldev->backing_bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
+	blkdev_put(ldev->md_bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
+
+	kfree(ldev);
+}
+
+void drbd_free_sock(struct drbd_conf *mdev)
+{
+	if (mdev->data.socket) {
+		mutex_lock(&mdev->data.mutex);
+		kernel_sock_shutdown(mdev->data.socket, SHUT_RDWR);
+		sock_release(mdev->data.socket);
+		mdev->data.socket = NULL;
+		mutex_unlock(&mdev->data.mutex);
+	}
+	if (mdev->meta.socket) {
+		mutex_lock(&mdev->meta.mutex);
+		kernel_sock_shutdown(mdev->meta.socket, SHUT_RDWR);
+		sock_release(mdev->meta.socket);
+		mdev->meta.socket = NULL;
+		mutex_unlock(&mdev->meta.mutex);
+	}
+}
+
+
+void drbd_free_resources(struct drbd_conf *mdev)
+{
+	crypto_free_hash(mdev->csums_tfm);
+	mdev->csums_tfm = NULL;
+	crypto_free_hash(mdev->verify_tfm);
+	mdev->verify_tfm = NULL;
+	crypto_free_hash(mdev->cram_hmac_tfm);
+	mdev->cram_hmac_tfm = NULL;
+	crypto_free_hash(mdev->integrity_w_tfm);
+	mdev->integrity_w_tfm = NULL;
+	crypto_free_hash(mdev->integrity_r_tfm);
+	mdev->integrity_r_tfm = NULL;
+
+	drbd_free_sock(mdev);
+
+	__no_warn(local,
+		  drbd_free_bc(mdev->ldev);
+		  mdev->ldev = NULL;);
+}
+
+/* meta data management */
+
+struct meta_data_on_disk {
+	u64 la_size;           /* last agreed size. */
+	u64 uuid[UI_SIZE];   /* UUIDs. */
+	u64 device_uuid;
+	u64 reserved_u64_1;
+	u32 flags;             /* MDF */
+	u32 magic;
+	u32 md_size_sect;
+	u32 al_offset;         /* offset to this block */
+	u32 al_nr_extents;     /* important for restoring the AL */
+	      /* `-- act_log->nr_elements <-- sync_conf.al_extents */
+	u32 bm_offset;         /* offset to the bitmap, from here */
+	u32 bm_bytes_per_bit;  /* BM_BLOCK_SIZE */
+	u32 la_peer_max_bio_size;   /* last peer max_bio_size */
+	u32 reserved_u32[3];
+
+} __packed;
+
+/**
+ * drbd_md_sync() - Writes the meta data super block if the MD_DIRTY flag bit is set
+ * @mdev:	DRBD device.
+ */
+void drbd_md_sync(struct drbd_conf *mdev)
+{
+	struct meta_data_on_disk *buffer;
+	sector_t sector;
+	int i;
+
+	del_timer(&mdev->md_sync_timer);
+	/* timer may be rearmed by drbd_md_mark_dirty() now. */
+	if (!test_and_clear_bit(MD_DIRTY, &mdev->flags))
+		return;
+
+	/* We use here D_FAILED and not D_ATTACHING because we try to write
+	 * metadata even if we detach due to a disk failure! */
+	if (!get_ldev_if_state(mdev, D_FAILED))
+		return;
+
+	mutex_lock(&mdev->md_io_mutex);
+	buffer = (struct meta_data_on_disk *)page_address(mdev->md_io_page);
+	memset(buffer, 0, 512);
+
+	buffer->la_size = cpu_to_be64(drbd_get_capacity(mdev->this_bdev));
+	for (i = UI_CURRENT; i < UI_SIZE; i++)
+		buffer->uuid[i] = cpu_to_be64(mdev->ldev->md.uuid[i]);
+	buffer->flags = cpu_to_be32(mdev->ldev->md.flags);
+	buffer->magic = cpu_to_be32(DRBD_MD_MAGIC);
+
+	buffer->md_size_sect  = cpu_to_be32(mdev->ldev->md.md_size_sect);
+	buffer->al_offset     = cpu_to_be32(mdev->ldev->md.al_offset);
+	buffer->al_nr_extents = cpu_to_be32(mdev->act_log->nr_elements);
+	buffer->bm_bytes_per_bit = cpu_to_be32(BM_BLOCK_SIZE);
+	buffer->device_uuid = cpu_to_be64(mdev->ldev->md.device_uuid);
+
+	buffer->bm_offset = cpu_to_be32(mdev->ldev->md.bm_offset);
+	buffer->la_peer_max_bio_size = cpu_to_be32(mdev->peer_max_bio_size);
+
+	D_ASSERT(drbd_md_ss__(mdev, mdev->ldev) == mdev->ldev->md.md_offset);
+	sector = mdev->ldev->md.md_offset;
+
+	if (!drbd_md_sync_page_io(mdev, mdev->ldev, sector, WRITE)) {
+		/* this was a try anyways ... */
+		dev_err(DEV, "meta data update failed!\n");
+		drbd_chk_io_error(mdev, 1, true);
+	}
+
+	/* Update mdev->ldev->md.la_size_sect,
+	 * since we updated it on metadata. */
+	mdev->ldev->md.la_size_sect = drbd_get_capacity(mdev->this_bdev);
+
+	mutex_unlock(&mdev->md_io_mutex);
+	put_ldev(mdev);
+}
+
+/**
+ * drbd_md_read() - Reads in the meta data super block
+ * @mdev:	DRBD device.
+ * @bdev:	Device from which the meta data should be read in.
+ *
+ * Return 0 (NO_ERROR) on success, and an enum drbd_ret_code in case
+ * something goes wrong.  Currently only: ERR_IO_MD_DISK, ERR_MD_INVALID.
+ */
+int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev)
+{
+	struct meta_data_on_disk *buffer;
+	int i, rv = NO_ERROR;
+
+	if (!get_ldev_if_state(mdev, D_ATTACHING))
+		return ERR_IO_MD_DISK;
+
+	mutex_lock(&mdev->md_io_mutex);
+	buffer = (struct meta_data_on_disk *)page_address(mdev->md_io_page);
+
+	if (!drbd_md_sync_page_io(mdev, bdev, bdev->md.md_offset, READ)) {
+		/* NOTE: can't do normal error processing here as this is
+		   called BEFORE disk is attached */
+		dev_err(DEV, "Error while reading metadata.\n");
+		rv = ERR_IO_MD_DISK;
+		goto err;
+	}
+
+	if (be32_to_cpu(buffer->magic) != DRBD_MD_MAGIC) {
+		dev_err(DEV, "Error while reading metadata, magic not found.\n");
+		rv = ERR_MD_INVALID;
+		goto err;
+	}
+	if (be32_to_cpu(buffer->al_offset) != bdev->md.al_offset) {
+		dev_err(DEV, "unexpected al_offset: %d (expected %d)\n",
+		    be32_to_cpu(buffer->al_offset), bdev->md.al_offset);
+		rv = ERR_MD_INVALID;
+		goto err;
+	}
+	if (be32_to_cpu(buffer->bm_offset) != bdev->md.bm_offset) {
+		dev_err(DEV, "unexpected bm_offset: %d (expected %d)\n",
+		    be32_to_cpu(buffer->bm_offset), bdev->md.bm_offset);
+		rv = ERR_MD_INVALID;
+		goto err;
+	}
+	if (be32_to_cpu(buffer->md_size_sect) != bdev->md.md_size_sect) {
+		dev_err(DEV, "unexpected md_size: %u (expected %u)\n",
+		    be32_to_cpu(buffer->md_size_sect), bdev->md.md_size_sect);
+		rv = ERR_MD_INVALID;
+		goto err;
+	}
+
+	if (be32_to_cpu(buffer->bm_bytes_per_bit) != BM_BLOCK_SIZE) {
+		dev_err(DEV, "unexpected bm_bytes_per_bit: %u (expected %u)\n",
+		    be32_to_cpu(buffer->bm_bytes_per_bit), BM_BLOCK_SIZE);
+		rv = ERR_MD_INVALID;
+		goto err;
+	}
+
+	bdev->md.la_size_sect = be64_to_cpu(buffer->la_size);
+	for (i = UI_CURRENT; i < UI_SIZE; i++)
+		bdev->md.uuid[i] = be64_to_cpu(buffer->uuid[i]);
+	bdev->md.flags = be32_to_cpu(buffer->flags);
+	mdev->sync_conf.al_extents = be32_to_cpu(buffer->al_nr_extents);
+	bdev->md.device_uuid = be64_to_cpu(buffer->device_uuid);
+
+	spin_lock_irq(&mdev->req_lock);
+	if (mdev->state.conn < C_CONNECTED) {
+		int peer;
+		peer = be32_to_cpu(buffer->la_peer_max_bio_size);
+		peer = max_t(int, peer, DRBD_MAX_BIO_SIZE_SAFE);
+		mdev->peer_max_bio_size = peer;
+	}
+	spin_unlock_irq(&mdev->req_lock);
+
+	if (mdev->sync_conf.al_extents < 7)
+		mdev->sync_conf.al_extents = 127;
+
+ err:
+	mutex_unlock(&mdev->md_io_mutex);
+	put_ldev(mdev);
+
+	return rv;
+}
+
+/**
+ * drbd_md_mark_dirty() - Mark meta data super block as dirty
+ * @mdev:	DRBD device.
+ *
+ * Call this function if you change anything that should be written to
+ * the meta-data super block. This function sets MD_DIRTY, and starts a
+ * timer that ensures that within five seconds you have to call drbd_md_sync().
+ */
+#ifdef DEBUG
+void drbd_md_mark_dirty_(struct drbd_conf *mdev, unsigned int line, const char *func)
+{
+	if (!test_and_set_bit(MD_DIRTY, &mdev->flags)) {
+		mod_timer(&mdev->md_sync_timer, jiffies + HZ);
+		mdev->last_md_mark_dirty.line = line;
+		mdev->last_md_mark_dirty.func = func;
+	}
+}
+#else
+void drbd_md_mark_dirty(struct drbd_conf *mdev)
+{
+	if (!test_and_set_bit(MD_DIRTY, &mdev->flags))
+		mod_timer(&mdev->md_sync_timer, jiffies + 5*HZ);
+}
+#endif
+
+static void drbd_uuid_move_history(struct drbd_conf *mdev) __must_hold(local)
+{
+	int i;
+
+	for (i = UI_HISTORY_START; i < UI_HISTORY_END; i++)
+		mdev->ldev->md.uuid[i+1] = mdev->ldev->md.uuid[i];
+}
+
+void _drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local)
+{
+	if (idx == UI_CURRENT) {
+		if (mdev->state.role == R_PRIMARY)
+			val |= 1;
+		else
+			val &= ~((u64)1);
+
+		drbd_set_ed_uuid(mdev, val);
+	}
+
+	mdev->ldev->md.uuid[idx] = val;
+	drbd_md_mark_dirty(mdev);
+}
+
+
+void drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local)
+{
+	if (mdev->ldev->md.uuid[idx]) {
+		drbd_uuid_move_history(mdev);
+		mdev->ldev->md.uuid[UI_HISTORY_START] = mdev->ldev->md.uuid[idx];
+	}
+	_drbd_uuid_set(mdev, idx, val);
+}
+
+/**
+ * drbd_uuid_new_current() - Creates a new current UUID
+ * @mdev:	DRBD device.
+ *
+ * Creates a new current UUID, and rotates the old current UUID into
+ * the bitmap slot. Causes an incremental resync upon next connect.
+ */
+void drbd_uuid_new_current(struct drbd_conf *mdev) __must_hold(local)
+{
+	u64 val;
+	unsigned long long bm_uuid = mdev->ldev->md.uuid[UI_BITMAP];
+
+	if (bm_uuid)
+		dev_warn(DEV, "bm UUID was already set: %llX\n", bm_uuid);
+
+	mdev->ldev->md.uuid[UI_BITMAP] = mdev->ldev->md.uuid[UI_CURRENT];
+
+	get_random_bytes(&val, sizeof(u64));
+	_drbd_uuid_set(mdev, UI_CURRENT, val);
+	drbd_print_uuids(mdev, "new current UUID");
+	/* get it to stable storage _now_ */
+	drbd_md_sync(mdev);
+}
+
+void drbd_uuid_set_bm(struct drbd_conf *mdev, u64 val) __must_hold(local)
+{
+	if (mdev->ldev->md.uuid[UI_BITMAP] == 0 && val == 0)
+		return;
+
+	if (val == 0) {
+		drbd_uuid_move_history(mdev);
+		mdev->ldev->md.uuid[UI_HISTORY_START] = mdev->ldev->md.uuid[UI_BITMAP];
+		mdev->ldev->md.uuid[UI_BITMAP] = 0;
+	} else {
+		unsigned long long bm_uuid = mdev->ldev->md.uuid[UI_BITMAP];
+		if (bm_uuid)
+			dev_warn(DEV, "bm UUID was already set: %llX\n", bm_uuid);
+
+		mdev->ldev->md.uuid[UI_BITMAP] = val & ~((u64)1);
+	}
+	drbd_md_mark_dirty(mdev);
+}
+
+/**
+ * drbd_bmio_set_n_write() - io_fn for drbd_queue_bitmap_io() or drbd_bitmap_io()
+ * @mdev:	DRBD device.
+ *
+ * Sets all bits in the bitmap and writes the whole bitmap to stable storage.
+ */
+int drbd_bmio_set_n_write(struct drbd_conf *mdev)
+{
+	int rv = -EIO;
+
+	if (get_ldev_if_state(mdev, D_ATTACHING)) {
+		drbd_md_set_flag(mdev, MDF_FULL_SYNC);
+		drbd_md_sync(mdev);
+		drbd_bm_set_all(mdev);
+
+		rv = drbd_bm_write(mdev);
+
+		if (!rv) {
+			drbd_md_clear_flag(mdev, MDF_FULL_SYNC);
+			drbd_md_sync(mdev);
+		}
+
+		put_ldev(mdev);
+	}
+
+	return rv;
+}
+
+/**
+ * drbd_bmio_clear_n_write() - io_fn for drbd_queue_bitmap_io() or drbd_bitmap_io()
+ * @mdev:	DRBD device.
+ *
+ * Clears all bits in the bitmap and writes the whole bitmap to stable storage.
+ */
+int drbd_bmio_clear_n_write(struct drbd_conf *mdev)
+{
+	int rv = -EIO;
+
+	drbd_resume_al(mdev);
+	if (get_ldev_if_state(mdev, D_ATTACHING)) {
+		drbd_bm_clear_all(mdev);
+		rv = drbd_bm_write(mdev);
+		put_ldev(mdev);
+	}
+
+	return rv;
+}
+
+static int w_bitmap_io(struct drbd_conf *mdev, struct drbd_work *w, int unused)
+{
+	struct bm_io_work *work = container_of(w, struct bm_io_work, w);
+	int rv = -EIO;
+
+	D_ASSERT(atomic_read(&mdev->ap_bio_cnt) == 0);
+
+	if (get_ldev(mdev)) {
+		drbd_bm_lock(mdev, work->why, work->flags);
+		rv = work->io_fn(mdev);
+		drbd_bm_unlock(mdev);
+		put_ldev(mdev);
+	}
+
+	clear_bit(BITMAP_IO, &mdev->flags);
+	smp_mb__after_clear_bit();
+	wake_up(&mdev->misc_wait);
+
+	if (work->done)
+		work->done(mdev, rv);
+
+	clear_bit(BITMAP_IO_QUEUED, &mdev->flags);
+	work->why = NULL;
+	work->flags = 0;
+
+	return 1;
+}
+
+void drbd_ldev_destroy(struct drbd_conf *mdev)
+{
+	lc_destroy(mdev->resync);
+	mdev->resync = NULL;
+	lc_destroy(mdev->act_log);
+	mdev->act_log = NULL;
+	__no_warn(local,
+		drbd_free_bc(mdev->ldev);
+		mdev->ldev = NULL;);
+
+	if (mdev->md_io_tmpp) {
+		__free_page(mdev->md_io_tmpp);
+		mdev->md_io_tmpp = NULL;
+	}
+	clear_bit(GO_DISKLESS, &mdev->flags);
+}
+
+static int w_go_diskless(struct drbd_conf *mdev, struct drbd_work *w, int unused)
+{
+	D_ASSERT(mdev->state.disk == D_FAILED);
+	/* we cannot assert local_cnt == 0 here, as get_ldev_if_state will
+	 * inc/dec it frequently. Once we are D_DISKLESS, no one will touch
+	 * the protected members anymore, though, so once put_ldev reaches zero
+	 * again, it will be safe to free them. */
+	drbd_force_state(mdev, NS(disk, D_DISKLESS));
+	return 1;
+}
+
+void drbd_go_diskless(struct drbd_conf *mdev)
+{
+	D_ASSERT(mdev->state.disk == D_FAILED);
+	if (!test_and_set_bit(GO_DISKLESS, &mdev->flags))
+		drbd_queue_work(&mdev->data.work, &mdev->go_diskless);
+}
+
+/**
+ * drbd_queue_bitmap_io() - Queues an IO operation on the whole bitmap
+ * @mdev:	DRBD device.
+ * @io_fn:	IO callback to be called when bitmap IO is possible
+ * @done:	callback to be called after the bitmap IO was performed
+ * @why:	Descriptive text of the reason for doing the IO
+ *
+ * While IO on the bitmap happens we freeze application IO thus we ensure
+ * that drbd_set_out_of_sync() can not be called. This function MAY ONLY be
+ * called from worker context. It MUST NOT be used while a previous such
+ * work is still pending!
+ */
+void drbd_queue_bitmap_io(struct drbd_conf *mdev,
+			  int (*io_fn)(struct drbd_conf *),
+			  void (*done)(struct drbd_conf *, int),
+			  char *why, enum bm_flag flags)
+{
+	D_ASSERT(current == mdev->worker.task);
+
+	D_ASSERT(!test_bit(BITMAP_IO_QUEUED, &mdev->flags));
+	D_ASSERT(!test_bit(BITMAP_IO, &mdev->flags));
+	D_ASSERT(list_empty(&mdev->bm_io_work.w.list));
+	if (mdev->bm_io_work.why)
+		dev_err(DEV, "FIXME going to queue '%s' but '%s' still pending?\n",
+			why, mdev->bm_io_work.why);
+
+	mdev->bm_io_work.io_fn = io_fn;
+	mdev->bm_io_work.done = done;
+	mdev->bm_io_work.why = why;
+	mdev->bm_io_work.flags = flags;
+
+	spin_lock_irq(&mdev->req_lock);
+	set_bit(BITMAP_IO, &mdev->flags);
+	if (atomic_read(&mdev->ap_bio_cnt) == 0) {
+		if (!test_and_set_bit(BITMAP_IO_QUEUED, &mdev->flags))
+			drbd_queue_work(&mdev->data.work, &mdev->bm_io_work.w);
+	}
+	spin_unlock_irq(&mdev->req_lock);
+}
+
+/**
+ * drbd_bitmap_io() -  Does an IO operation on the whole bitmap
+ * @mdev:	DRBD device.
+ * @io_fn:	IO callback to be called when bitmap IO is possible
+ * @why:	Descriptive text of the reason for doing the IO
+ *
+ * freezes application IO while that the actual IO operations runs. This
+ * functions MAY NOT be called from worker context.
+ */
+int drbd_bitmap_io(struct drbd_conf *mdev, int (*io_fn)(struct drbd_conf *),
+		char *why, enum bm_flag flags)
+{
+	int rv;
+
+	D_ASSERT(current != mdev->worker.task);
+
+	if ((flags & BM_LOCKED_SET_ALLOWED) == 0)
+		drbd_suspend_io(mdev);
+
+	drbd_bm_lock(mdev, why, flags);
+	rv = io_fn(mdev);
+	drbd_bm_unlock(mdev);
+
+	if ((flags & BM_LOCKED_SET_ALLOWED) == 0)
+		drbd_resume_io(mdev);
+
+	return rv;
+}
+
+void drbd_md_set_flag(struct drbd_conf *mdev, int flag) __must_hold(local)
+{
+	if ((mdev->ldev->md.flags & flag) != flag) {
+		drbd_md_mark_dirty(mdev);
+		mdev->ldev->md.flags |= flag;
+	}
+}
+
+void drbd_md_clear_flag(struct drbd_conf *mdev, int flag) __must_hold(local)
+{
+	if ((mdev->ldev->md.flags & flag) != 0) {
+		drbd_md_mark_dirty(mdev);
+		mdev->ldev->md.flags &= ~flag;
+	}
+}
+int drbd_md_test_flag(struct drbd_backing_dev *bdev, int flag)
+{
+	return (bdev->md.flags & flag) != 0;
+}
+
+static void md_sync_timer_fn(unsigned long data)
+{
+	struct drbd_conf *mdev = (struct drbd_conf *) data;
+
+	drbd_queue_work_front(&mdev->data.work, &mdev->md_sync_work);
+}
+
+static int w_md_sync(struct drbd_conf *mdev, struct drbd_work *w, int unused)
+{
+	dev_warn(DEV, "md_sync_timer expired! Worker calls drbd_md_sync().\n");
+#ifdef DEBUG
+	dev_warn(DEV, "last md_mark_dirty: %s:%u\n",
+		mdev->last_md_mark_dirty.func, mdev->last_md_mark_dirty.line);
+#endif
+	drbd_md_sync(mdev);
+	return 1;
+}
+
+#ifdef CONFIG_DRBD_FAULT_INJECTION
+/* Fault insertion support including random number generator shamelessly
+ * stolen from kernel/rcutorture.c */
+struct fault_random_state {
+	unsigned long state;
+	unsigned long count;
+};
+
+#define FAULT_RANDOM_MULT 39916801  /* prime */
+#define FAULT_RANDOM_ADD	479001701 /* prime */
+#define FAULT_RANDOM_REFRESH 10000
+
+/*
+ * Crude but fast random-number generator.  Uses a linear congruential
+ * generator, with occasional help from get_random_bytes().
+ */
+static unsigned long
+_drbd_fault_random(struct fault_random_state *rsp)
+{
+	long refresh;
+
+	if (!rsp->count--) {
+		get_random_bytes(&refresh, sizeof(refresh));
+		rsp->state += refresh;
+		rsp->count = FAULT_RANDOM_REFRESH;
+	}
+	rsp->state = rsp->state * FAULT_RANDOM_MULT + FAULT_RANDOM_ADD;
+	return swahw32(rsp->state);
+}
+
+static char *
+_drbd_fault_str(unsigned int type) {
+	static char *_faults[] = {
+		[DRBD_FAULT_MD_WR] = "Meta-data write",
+		[DRBD_FAULT_MD_RD] = "Meta-data read",
+		[DRBD_FAULT_RS_WR] = "Resync write",
+		[DRBD_FAULT_RS_RD] = "Resync read",
+		[DRBD_FAULT_DT_WR] = "Data write",
+		[DRBD_FAULT_DT_RD] = "Data read",
+		[DRBD_FAULT_DT_RA] = "Data read ahead",
+		[DRBD_FAULT_BM_ALLOC] = "BM allocation",
+		[DRBD_FAULT_AL_EE] = "EE allocation",
+		[DRBD_FAULT_RECEIVE] = "receive data corruption",
+	};
+
+	return (type < DRBD_FAULT_MAX) ? _faults[type] : "**Unknown**";
+}
+
+unsigned int
+_drbd_insert_fault(struct drbd_conf *mdev, unsigned int type)
+{
+	static struct fault_random_state rrs = {0, 0};
+
+	unsigned int ret = (
+		(fault_devs == 0 ||
+			((1 << mdev_to_minor(mdev)) & fault_devs) != 0) &&
+		(((_drbd_fault_random(&rrs) % 100) + 1) <= fault_rate));
+
+	if (ret) {
+		fault_count++;
+
+		if (__ratelimit(&drbd_ratelimit_state))
+			dev_warn(DEV, "***Simulating %s failure\n",
+				_drbd_fault_str(type));
+	}
+
+	return ret;
+}
+#endif
+
+const char *drbd_buildtag(void)
+{
+	/* DRBD built from external sources has here a reference to the
+	   git hash of the source code. */
+
+	static char buildtag[38] = "\0uilt-in";
+
+	if (buildtag[0] == 0) {
+#ifdef CONFIG_MODULES
+		if (THIS_MODULE != NULL)
+			sprintf(buildtag, "srcversion: %-24s", THIS_MODULE->srcversion);
+		else
+#endif
+			buildtag[0] = 'b';
+	}
+
+	return buildtag;
+}
+
+module_init(drbd_init)
+module_exit(drbd_cleanup)
+
+EXPORT_SYMBOL(drbd_conn_str);
+EXPORT_SYMBOL(drbd_role_str);
+EXPORT_SYMBOL(drbd_disk_str);
+EXPORT_SYMBOL(drbd_set_st_err_str);
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
new file mode 100644
index 00000000..515bcd94
--- /dev/null
+++ b/drivers/block/drbd/drbd_nl.c
@@ -0,0 +1,2651 @@
+/*
+   drbd_nl.c
+
+   This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
+
+   Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
+   Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
+   Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
+
+   drbd is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2, or (at your option)
+   any later version.
+
+   drbd is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with drbd; see the file COPYING.  If not, write to
+   the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ */
+
+#include <linux/module.h>
+#include <linux/drbd.h>
+#include <linux/in.h>
+#include <linux/fs.h>
+#include <linux/file.h>
+#include <linux/slab.h>
+#include <linux/connector.h>
+#include <linux/blkpg.h>
+#include <linux/cpumask.h>
+#include "drbd_int.h"
+#include "drbd_req.h"
+#include "drbd_wrappers.h"
+#include <asm/unaligned.h>
+#include <linux/drbd_tag_magic.h>
+#include <linux/drbd_limits.h>
+#include <linux/compiler.h>
+#include <linux/kthread.h>
+
+static unsigned short *tl_add_blob(unsigned short *, enum drbd_tags, const void *, int);
+static unsigned short *tl_add_str(unsigned short *, enum drbd_tags, const char *);
+static unsigned short *tl_add_int(unsigned short *, enum drbd_tags, const void *);
+
+/* see get_sb_bdev and bd_claim */
+static char *drbd_m_holder = "Hands off! this is DRBD's meta data device.";
+
+/* Generate the tag_list to struct functions */
+#define NL_PACKET(name, number, fields) \
+static int name ## _from_tags(struct drbd_conf *mdev, \
+	unsigned short *tags, struct name *arg) __attribute__ ((unused)); \
+static int name ## _from_tags(struct drbd_conf *mdev, \
+	unsigned short *tags, struct name *arg) \
+{ \
+	int tag; \
+	int dlen; \
+	\
+	while ((tag = get_unaligned(tags++)) != TT_END) {	\
+		dlen = get_unaligned(tags++);			\
+		switch (tag_number(tag)) { \
+		fields \
+		default: \
+			if (tag & T_MANDATORY) { \
+				dev_err(DEV, "Unknown tag: %d\n", tag_number(tag)); \
+				return 0; \
+			} \
+		} \
+		tags = (unsigned short *)((char *)tags + dlen); \
+	} \
+	return 1; \
+}
+#define NL_INTEGER(pn, pr, member) \
+	case pn: /* D_ASSERT( tag_type(tag) == TT_INTEGER ); */ \
+		arg->member = get_unaligned((int *)(tags));	\
+		break;
+#define NL_INT64(pn, pr, member) \
+	case pn: /* D_ASSERT( tag_type(tag) == TT_INT64 ); */ \
+		arg->member = get_unaligned((u64 *)(tags));	\
+		break;
+#define NL_BIT(pn, pr, member) \
+	case pn: /* D_ASSERT( tag_type(tag) == TT_BIT ); */ \
+		arg->member = *(char *)(tags) ? 1 : 0; \
+		break;
+#define NL_STRING(pn, pr, member, len) \
+	case pn: /* D_ASSERT( tag_type(tag) == TT_STRING ); */ \
+		if (dlen > len) { \
+			dev_err(DEV, "arg too long: %s (%u wanted, max len: %u bytes)\n", \
+				#member, dlen, (unsigned int)len); \
+			return 0; \
+		} \
+		 arg->member ## _len = dlen; \
+		 memcpy(arg->member, tags, min_t(size_t, dlen, len)); \
+		 break;
+#include "linux/drbd_nl.h"
+
+/* Generate the struct to tag_list functions */
+#define NL_PACKET(name, number, fields) \
+static unsigned short* \
+name ## _to_tags(struct drbd_conf *mdev, \
+	struct name *arg, unsigned short *tags) __attribute__ ((unused)); \
+static unsigned short* \
+name ## _to_tags(struct drbd_conf *mdev, \
+	struct name *arg, unsigned short *tags) \
+{ \
+	fields \
+	return tags; \
+}
+
+#define NL_INTEGER(pn, pr, member) \
+	put_unaligned(pn | pr | TT_INTEGER, tags++);	\
+	put_unaligned(sizeof(int), tags++);		\
+	put_unaligned(arg->member, (int *)tags);	\
+	tags = (unsigned short *)((char *)tags+sizeof(int));
+#define NL_INT64(pn, pr, member) \
+	put_unaligned(pn | pr | TT_INT64, tags++);	\
+	put_unaligned(sizeof(u64), tags++);		\
+	put_unaligned(arg->member, (u64 *)tags);	\
+	tags = (unsigned short *)((char *)tags+sizeof(u64));
+#define NL_BIT(pn, pr, member) \
+	put_unaligned(pn | pr | TT_BIT, tags++);	\
+	put_unaligned(sizeof(char), tags++);		\
+	*(char *)tags = arg->member; \
+	tags = (unsigned short *)((char *)tags+sizeof(char));
+#define NL_STRING(pn, pr, member, len) \
+	put_unaligned(pn | pr | TT_STRING, tags++);	\
+	put_unaligned(arg->member ## _len, tags++);	\
+	memcpy(tags, arg->member, arg->member ## _len); \
+	tags = (unsigned short *)((char *)tags + arg->member ## _len);
+#include "linux/drbd_nl.h"
+
+void drbd_bcast_ev_helper(struct drbd_conf *mdev, char *helper_name);
+void drbd_nl_send_reply(struct cn_msg *, int);
+
+int drbd_khelper(struct drbd_conf *mdev, char *cmd)
+{
+	char *envp[] = { "HOME=/",
+			"TERM=linux",
+			"PATH=/sbin:/usr/sbin:/bin:/usr/bin",
+			NULL, /* Will be set to address family */
+			NULL, /* Will be set to address */
+			NULL };
+
+	char mb[12], af[20], ad[60], *afs;
+	char *argv[] = {usermode_helper, cmd, mb, NULL };
+	int ret;
+
+	snprintf(mb, 12, "minor-%d", mdev_to_minor(mdev));
+
+	if (get_net_conf(mdev)) {
+		switch (((struct sockaddr *)mdev->net_conf->peer_addr)->sa_family) {
+		case AF_INET6:
+			afs = "ipv6";
+			snprintf(ad, 60, "DRBD_PEER_ADDRESS=%pI6",
+				 &((struct sockaddr_in6 *)mdev->net_conf->peer_addr)->sin6_addr);
+			break;
+		case AF_INET:
+			afs = "ipv4";
+			snprintf(ad, 60, "DRBD_PEER_ADDRESS=%pI4",
+				 &((struct sockaddr_in *)mdev->net_conf->peer_addr)->sin_addr);
+			break;
+		default:
+			afs = "ssocks";
+			snprintf(ad, 60, "DRBD_PEER_ADDRESS=%pI4",
+				 &((struct sockaddr_in *)mdev->net_conf->peer_addr)->sin_addr);
+		}
+		snprintf(af, 20, "DRBD_PEER_AF=%s", afs);
+		envp[3]=af;
+		envp[4]=ad;
+		put_net_conf(mdev);
+	}
+
+	/* The helper may take some time.
+	 * write out any unsynced meta data changes now */
+	drbd_md_sync(mdev);
+
+	dev_info(DEV, "helper command: %s %s %s\n", usermode_helper, cmd, mb);
+
+	drbd_bcast_ev_helper(mdev, cmd);
+	ret = call_usermodehelper(usermode_helper, argv, envp, 1);
+	if (ret)
+		dev_warn(DEV, "helper command: %s %s %s exit code %u (0x%x)\n",
+				usermode_helper, cmd, mb,
+				(ret >> 8) & 0xff, ret);
+	else
+		dev_info(DEV, "helper command: %s %s %s exit code %u (0x%x)\n",
+				usermode_helper, cmd, mb,
+				(ret >> 8) & 0xff, ret);
+
+	if (ret < 0) /* Ignore any ERRNOs we got. */
+		ret = 0;
+
+	return ret;
+}
+
+enum drbd_disk_state drbd_try_outdate_peer(struct drbd_conf *mdev)
+{
+	char *ex_to_string;
+	int r;
+	enum drbd_disk_state nps;
+	enum drbd_fencing_p fp;
+
+	D_ASSERT(mdev->state.pdsk == D_UNKNOWN);
+
+	if (get_ldev_if_state(mdev, D_CONSISTENT)) {
+		fp = mdev->ldev->dc.fencing;
+		put_ldev(mdev);
+	} else {
+		dev_warn(DEV, "Not fencing peer, I'm not even Consistent myself.\n");
+		nps = mdev->state.pdsk;
+		goto out;
+	}
+
+	r = drbd_khelper(mdev, "fence-peer");
+
+	switch ((r>>8) & 0xff) {
+	case 3: /* peer is inconsistent */
+		ex_to_string = "peer is inconsistent or worse";
+		nps = D_INCONSISTENT;
+		break;
+	case 4: /* peer got outdated, or was already outdated */
+		ex_to_string = "peer was fenced";
+		nps = D_OUTDATED;
+		break;
+	case 5: /* peer was down */
+		if (mdev->state.disk == D_UP_TO_DATE) {
+			/* we will(have) create(d) a new UUID anyways... */
+			ex_to_string = "peer is unreachable, assumed to be dead";
+			nps = D_OUTDATED;
+		} else {
+			ex_to_string = "peer unreachable, doing nothing since disk != UpToDate";
+			nps = mdev->state.pdsk;
+		}
+		break;
+	case 6: /* Peer is primary, voluntarily outdate myself.
+		 * This is useful when an unconnected R_SECONDARY is asked to
+		 * become R_PRIMARY, but finds the other peer being active. */
+		ex_to_string = "peer is active";
+		dev_warn(DEV, "Peer is primary, outdating myself.\n");
+		nps = D_UNKNOWN;
+		_drbd_request_state(mdev, NS(disk, D_OUTDATED), CS_WAIT_COMPLETE);
+		break;
+	case 7:
+		if (fp != FP_STONITH)
+			dev_err(DEV, "fence-peer() = 7 && fencing != Stonith !!!\n");
+		ex_to_string = "peer was stonithed";
+		nps = D_OUTDATED;
+		break;
+	default:
+		/* The script is broken ... */
+		nps = D_UNKNOWN;
+		dev_err(DEV, "fence-peer helper broken, returned %d\n", (r>>8)&0xff);
+		return nps;
+	}
+
+	dev_info(DEV, "fence-peer helper returned %d (%s)\n",
+			(r>>8) & 0xff, ex_to_string);
+
+out:
+	if (mdev->state.susp_fen && nps >= D_UNKNOWN) {
+		/* The handler was not successful... unfreeze here, the
+		   state engine can not unfreeze... */
+		_drbd_request_state(mdev, NS(susp_fen, 0), CS_VERBOSE);
+	}
+
+	return nps;
+}
+
+static int _try_outdate_peer_async(void *data)
+{
+	struct drbd_conf *mdev = (struct drbd_conf *)data;
+	enum drbd_disk_state nps;
+	union drbd_state ns;
+
+	nps = drbd_try_outdate_peer(mdev);
+
+	/* Not using
+	   drbd_request_state(mdev, NS(pdsk, nps));
+	   here, because we might were able to re-establish the connection
+	   in the meantime. This can only partially be solved in the state's
+	   engine is_valid_state() and is_valid_state_transition()
+	   functions.
+
+	   nps can be D_INCONSISTENT, D_OUTDATED or D_UNKNOWN.
+	   pdsk == D_INCONSISTENT while conn >= C_CONNECTED is valid,
+	   therefore we have to have the pre state change check here.
+	*/
+	spin_lock_irq(&mdev->req_lock);
+	ns = mdev->state;
+	if (ns.conn < C_WF_REPORT_PARAMS) {
+		ns.pdsk = nps;
+		_drbd_set_state(mdev, ns, CS_VERBOSE, NULL);
+	}
+	spin_unlock_irq(&mdev->req_lock);
+
+	return 0;
+}
+
+void drbd_try_outdate_peer_async(struct drbd_conf *mdev)
+{
+	struct task_struct *opa;
+
+	opa = kthread_run(_try_outdate_peer_async, mdev, "drbd%d_a_helper", mdev_to_minor(mdev));
+	if (IS_ERR(opa))
+		dev_err(DEV, "out of mem, failed to invoke fence-peer helper\n");
+}
+
+enum drbd_state_rv
+drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force)
+{
+	const int max_tries = 4;
+	enum drbd_state_rv rv = SS_UNKNOWN_ERROR;
+	int try = 0;
+	int forced = 0;
+	union drbd_state mask, val;
+	enum drbd_disk_state nps;
+
+	if (new_role == R_PRIMARY)
+		request_ping(mdev); /* Detect a dead peer ASAP */
+
+	mutex_lock(&mdev->state_mutex);
+
+	mask.i = 0; mask.role = R_MASK;
+	val.i  = 0; val.role  = new_role;
+
+	while (try++ < max_tries) {
+		rv = _drbd_request_state(mdev, mask, val, CS_WAIT_COMPLETE);
+
+		/* in case we first succeeded to outdate,
+		 * but now suddenly could establish a connection */
+		if (rv == SS_CW_FAILED_BY_PEER && mask.pdsk != 0) {
+			val.pdsk = 0;
+			mask.pdsk = 0;
+			continue;
+		}
+
+		if (rv == SS_NO_UP_TO_DATE_DISK && force &&
+		    (mdev->state.disk < D_UP_TO_DATE &&
+		     mdev->state.disk >= D_INCONSISTENT)) {
+			mask.disk = D_MASK;
+			val.disk  = D_UP_TO_DATE;
+			forced = 1;
+			continue;
+		}
+
+		if (rv == SS_NO_UP_TO_DATE_DISK &&
+		    mdev->state.disk == D_CONSISTENT && mask.pdsk == 0) {
+			D_ASSERT(mdev->state.pdsk == D_UNKNOWN);
+			nps = drbd_try_outdate_peer(mdev);
+
+			if (nps == D_OUTDATED || nps == D_INCONSISTENT) {
+				val.disk = D_UP_TO_DATE;
+				mask.disk = D_MASK;
+			}
+
+			val.pdsk = nps;
+			mask.pdsk = D_MASK;
+
+			continue;
+		}
+
+		if (rv == SS_NOTHING_TO_DO)
+			goto fail;
+		if (rv == SS_PRIMARY_NOP && mask.pdsk == 0) {
+			nps = drbd_try_outdate_peer(mdev);
+
+			if (force && nps > D_OUTDATED) {
+				dev_warn(DEV, "Forced into split brain situation!\n");
+				nps = D_OUTDATED;
+			}
+
+			mask.pdsk = D_MASK;
+			val.pdsk  = nps;
+
+			continue;
+		}
+		if (rv == SS_TWO_PRIMARIES) {
+			/* Maybe the peer is detected as dead very soon...
+			   retry at most once more in this case. */
+			schedule_timeout_interruptible((mdev->net_conf->ping_timeo+1)*HZ/10);
+			if (try < max_tries)
+				try = max_tries - 1;
+			continue;
+		}
+		if (rv < SS_SUCCESS) {
+			rv = _drbd_request_state(mdev, mask, val,
+						CS_VERBOSE + CS_WAIT_COMPLETE);
+			if (rv < SS_SUCCESS)
+				goto fail;
+		}
+		break;
+	}
+
+	if (rv < SS_SUCCESS)
+		goto fail;
+
+	if (forced)
+		dev_warn(DEV, "Forced to consider local data as UpToDate!\n");
+
+	/* Wait until nothing is on the fly :) */
+	wait_event(mdev->misc_wait, atomic_read(&mdev->ap_pending_cnt) == 0);
+
+	if (new_role == R_SECONDARY) {
+		set_disk_ro(mdev->vdisk, true);
+		if (get_ldev(mdev)) {
+			mdev->ldev->md.uuid[UI_CURRENT] &= ~(u64)1;
+			put_ldev(mdev);
+		}
+	} else {
+		if (get_net_conf(mdev)) {
+			mdev->net_conf->want_lose = 0;
+			put_net_conf(mdev);
+		}
+		set_disk_ro(mdev->vdisk, false);
+		if (get_ldev(mdev)) {
+			if (((mdev->state.conn < C_CONNECTED ||
+			       mdev->state.pdsk <= D_FAILED)
+			      && mdev->ldev->md.uuid[UI_BITMAP] == 0) || forced)
+				drbd_uuid_new_current(mdev);
+
+			mdev->ldev->md.uuid[UI_CURRENT] |=  (u64)1;
+			put_ldev(mdev);
+		}
+	}
+
+	/* writeout of activity log covered areas of the bitmap
+	 * to stable storage done in after state change already */
+
+	if (mdev->state.conn >= C_WF_REPORT_PARAMS) {
+		/* if this was forced, we should consider sync */
+		if (forced)
+			drbd_send_uuids(mdev);
+		drbd_send_state(mdev);
+	}
+
+	drbd_md_sync(mdev);
+
+	kobject_uevent(&disk_to_dev(mdev->vdisk)->kobj, KOBJ_CHANGE);
+ fail:
+	mutex_unlock(&mdev->state_mutex);
+	return rv;
+}
+
+static struct drbd_conf *ensure_mdev(int minor, int create)
+{
+	struct drbd_conf *mdev;
+
+	if (minor >= minor_count)
+		return NULL;
+
+	mdev = minor_to_mdev(minor);
+
+	if (!mdev && create) {
+		struct gendisk *disk = NULL;
+		mdev = drbd_new_device(minor);
+
+		spin_lock_irq(&drbd_pp_lock);
+		if (minor_table[minor] == NULL) {
+			minor_table[minor] = mdev;
+			disk = mdev->vdisk;
+			mdev = NULL;
+		} /* else: we lost the race */
+		spin_unlock_irq(&drbd_pp_lock);
+
+		if (disk) /* we won the race above */
+			/* in case we ever add a drbd_delete_device(),
+			 * don't forget the del_gendisk! */
+			add_disk(disk);
+		else /* we lost the race above */
+			drbd_free_mdev(mdev);
+
+		mdev = minor_to_mdev(minor);
+	}
+
+	return mdev;
+}
+
+static int drbd_nl_primary(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
+			   struct drbd_nl_cfg_reply *reply)
+{
+	struct primary primary_args;
+
+	memset(&primary_args, 0, sizeof(struct primary));
+	if (!primary_from_tags(mdev, nlp->tag_list, &primary_args)) {
+		reply->ret_code = ERR_MANDATORY_TAG;
+		return 0;
+	}
+
+	reply->ret_code =
+		drbd_set_role(mdev, R_PRIMARY, primary_args.primary_force);
+
+	return 0;
+}
+
+static int drbd_nl_secondary(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
+			     struct drbd_nl_cfg_reply *reply)
+{
+	reply->ret_code = drbd_set_role(mdev, R_SECONDARY, 0);
+
+	return 0;
+}
+
+/* initializes the md.*_offset members, so we are able to find
+ * the on disk meta data */
+static void drbd_md_set_sector_offsets(struct drbd_conf *mdev,
+				       struct drbd_backing_dev *bdev)
+{
+	sector_t md_size_sect = 0;
+	switch (bdev->dc.meta_dev_idx) {
+	default:
+		/* v07 style fixed size indexed meta data */
+		bdev->md.md_size_sect = MD_RESERVED_SECT;
+		bdev->md.md_offset = drbd_md_ss__(mdev, bdev);
+		bdev->md.al_offset = MD_AL_OFFSET;
+		bdev->md.bm_offset = MD_BM_OFFSET;
+		break;
+	case DRBD_MD_INDEX_FLEX_EXT:
+		/* just occupy the full device; unit: sectors */
+		bdev->md.md_size_sect = drbd_get_capacity(bdev->md_bdev);
+		bdev->md.md_offset = 0;
+		bdev->md.al_offset = MD_AL_OFFSET;
+		bdev->md.bm_offset = MD_BM_OFFSET;
+		break;
+	case DRBD_MD_INDEX_INTERNAL:
+	case DRBD_MD_INDEX_FLEX_INT:
+		bdev->md.md_offset = drbd_md_ss__(mdev, bdev);
+		/* al size is still fixed */
+		bdev->md.al_offset = -MD_AL_MAX_SIZE;
+		/* we need (slightly less than) ~ this much bitmap sectors: */
+		md_size_sect = drbd_get_capacity(bdev->backing_bdev);
+		md_size_sect = ALIGN(md_size_sect, BM_SECT_PER_EXT);
+		md_size_sect = BM_SECT_TO_EXT(md_size_sect);
+		md_size_sect = ALIGN(md_size_sect, 8);
+
+		/* plus the "drbd meta data super block",
+		 * and the activity log; */
+		md_size_sect += MD_BM_OFFSET;
+
+		bdev->md.md_size_sect = md_size_sect;
+		/* bitmap offset is adjusted by 'super' block size */
+		bdev->md.bm_offset   = -md_size_sect + MD_AL_OFFSET;
+		break;
+	}
+}
+
+/* input size is expected to be in KB */
+char *ppsize(char *buf, unsigned long long size)
+{
+	/* Needs 9 bytes at max including trailing NUL:
+	 * -1ULL ==> "16384 EB" */
+	static char units[] = { 'K', 'M', 'G', 'T', 'P', 'E' };
+	int base = 0;
+	while (size >= 10000 && base < sizeof(units)-1) {
+		/* shift + round */
+		size = (size >> 10) + !!(size & (1<<9));
+		base++;
+	}
+	sprintf(buf, "%u %cB", (unsigned)size, units[base]);
+
+	return buf;
+}
+
+/* there is still a theoretical deadlock when called from receiver
+ * on an D_INCONSISTENT R_PRIMARY:
+ *  remote READ does inc_ap_bio, receiver would need to receive answer
+ *  packet from remote to dec_ap_bio again.
+ *  receiver receive_sizes(), comes here,
+ *  waits for ap_bio_cnt == 0. -> deadlock.
+ * but this cannot happen, actually, because:
+ *  R_PRIMARY D_INCONSISTENT, and peer's disk is unreachable
+ *  (not connected, or bad/no disk on peer):
+ *  see drbd_fail_request_early, ap_bio_cnt is zero.
+ *  R_PRIMARY D_INCONSISTENT, and C_SYNC_TARGET:
+ *  peer may not initiate a resize.
+ */
+void drbd_suspend_io(struct drbd_conf *mdev)
+{
+	set_bit(SUSPEND_IO, &mdev->flags);
+	if (is_susp(mdev->state))
+		return;
+	wait_event(mdev->misc_wait, !atomic_read(&mdev->ap_bio_cnt));
+}
+
+void drbd_resume_io(struct drbd_conf *mdev)
+{
+	clear_bit(SUSPEND_IO, &mdev->flags);
+	wake_up(&mdev->misc_wait);
+}
+
+/**
+ * drbd_determine_dev_size() -  Sets the right device size obeying all constraints
+ * @mdev:	DRBD device.
+ *
+ * Returns 0 on success, negative return values indicate errors.
+ * You should call drbd_md_sync() after calling this function.
+ */
+enum determine_dev_size drbd_determine_dev_size(struct drbd_conf *mdev, enum dds_flags flags) __must_hold(local)
+{
+	sector_t prev_first_sect, prev_size; /* previous meta location */
+	sector_t la_size;
+	sector_t size;
+	char ppb[10];
+
+	int md_moved, la_size_changed;
+	enum determine_dev_size rv = unchanged;
+
+	/* race:
+	 * application request passes inc_ap_bio,
+	 * but then cannot get an AL-reference.
+	 * this function later may wait on ap_bio_cnt == 0. -> deadlock.
+	 *
+	 * to avoid that:
+	 * Suspend IO right here.
+	 * still lock the act_log to not trigger ASSERTs there.
+	 */
+	drbd_suspend_io(mdev);
+
+	/* no wait necessary anymore, actually we could assert that */
+	wait_event(mdev->al_wait, lc_try_lock(mdev->act_log));
+
+	prev_first_sect = drbd_md_first_sector(mdev->ldev);
+	prev_size = mdev->ldev->md.md_size_sect;
+	la_size = mdev->ldev->md.la_size_sect;
+
+	/* TODO: should only be some assert here, not (re)init... */
+	drbd_md_set_sector_offsets(mdev, mdev->ldev);
+
+	size = drbd_new_dev_size(mdev, mdev->ldev, flags & DDSF_FORCED);
+
+	if (drbd_get_capacity(mdev->this_bdev) != size ||
+	    drbd_bm_capacity(mdev) != size) {
+		int err;
+		err = drbd_bm_resize(mdev, size, !(flags & DDSF_NO_RESYNC));
+		if (unlikely(err)) {
+			/* currently there is only one error: ENOMEM! */
+			size = drbd_bm_capacity(mdev)>>1;
+			if (size == 0) {
+				dev_err(DEV, "OUT OF MEMORY! "
+				    "Could not allocate bitmap!\n");
+			} else {
+				dev_err(DEV, "BM resizing failed. "
+				    "Leaving size unchanged at size = %lu KB\n",
+				    (unsigned long)size);
+			}
+			rv = dev_size_error;
+		}
+		/* racy, see comments above. */
+		drbd_set_my_capacity(mdev, size);
+		mdev->ldev->md.la_size_sect = size;
+		dev_info(DEV, "size = %s (%llu KB)\n", ppsize(ppb, size>>1),
+		     (unsigned long long)size>>1);
+	}
+	if (rv == dev_size_error)
+		goto out;
+
+	la_size_changed = (la_size != mdev->ldev->md.la_size_sect);
+
+	md_moved = prev_first_sect != drbd_md_first_sector(mdev->ldev)
+		|| prev_size	   != mdev->ldev->md.md_size_sect;
+
+	if (la_size_changed || md_moved) {
+		int err;
+
+		drbd_al_shrink(mdev); /* All extents inactive. */
+		dev_info(DEV, "Writing the whole bitmap, %s\n",
+			 la_size_changed && md_moved ? "size changed and md moved" :
+			 la_size_changed ? "size changed" : "md moved");
+		/* next line implicitly does drbd_suspend_io()+drbd_resume_io() */
+		err = drbd_bitmap_io(mdev, &drbd_bm_write,
+				"size changed", BM_LOCKED_MASK);
+		if (err) {
+			rv = dev_size_error;
+			goto out;
+		}
+		drbd_md_mark_dirty(mdev);
+	}
+
+	if (size > la_size)
+		rv = grew;
+	if (size < la_size)
+		rv = shrunk;
+out:
+	lc_unlock(mdev->act_log);
+	wake_up(&mdev->al_wait);
+	drbd_resume_io(mdev);
+
+	return rv;
+}
+
+sector_t
+drbd_new_dev_size(struct drbd_conf *mdev, struct drbd_backing_dev *bdev, int assume_peer_has_space)
+{
+	sector_t p_size = mdev->p_size;   /* partner's disk size. */
+	sector_t la_size = bdev->md.la_size_sect; /* last agreed size. */
+	sector_t m_size; /* my size */
+	sector_t u_size = bdev->dc.disk_size; /* size requested by user. */
+	sector_t size = 0;
+
+	m_size = drbd_get_max_capacity(bdev);
+
+	if (mdev->state.conn < C_CONNECTED && assume_peer_has_space) {
+		dev_warn(DEV, "Resize while not connected was forced by the user!\n");
+		p_size = m_size;
+	}
+
+	if (p_size && m_size) {
+		size = min_t(sector_t, p_size, m_size);
+	} else {
+		if (la_size) {
+			size = la_size;
+			if (m_size && m_size < size)
+				size = m_size;
+			if (p_size && p_size < size)
+				size = p_size;
+		} else {
+			if (m_size)
+				size = m_size;
+			if (p_size)
+				size = p_size;
+		}
+	}
+
+	if (size == 0)
+		dev_err(DEV, "Both nodes diskless!\n");
+
+	if (u_size) {
+		if (u_size > size)
+			dev_err(DEV, "Requested disk size is too big (%lu > %lu)\n",
+			    (unsigned long)u_size>>1, (unsigned long)size>>1);
+		else
+			size = u_size;
+	}
+
+	return size;
+}
+
+/**
+ * drbd_check_al_size() - Ensures that the AL is of the right size
+ * @mdev:	DRBD device.
+ *
+ * Returns -EBUSY if current al lru is still used, -ENOMEM when allocation
+ * failed, and 0 on success. You should call drbd_md_sync() after you called
+ * this function.
+ */
+static int drbd_check_al_size(struct drbd_conf *mdev)
+{
+	struct lru_cache *n, *t;
+	struct lc_element *e;
+	unsigned int in_use;
+	int i;
+
+	ERR_IF(mdev->sync_conf.al_extents < 7)
+		mdev->sync_conf.al_extents = 127;
+
+	if (mdev->act_log &&
+	    mdev->act_log->nr_elements == mdev->sync_conf.al_extents)
+		return 0;
+
+	in_use = 0;
+	t = mdev->act_log;
+	n = lc_create("act_log", drbd_al_ext_cache,
+		mdev->sync_conf.al_extents, sizeof(struct lc_element), 0);
+
+	if (n == NULL) {
+		dev_err(DEV, "Cannot allocate act_log lru!\n");
+		return -ENOMEM;
+	}
+	spin_lock_irq(&mdev->al_lock);
+	if (t) {
+		for (i = 0; i < t->nr_elements; i++) {
+			e = lc_element_by_index(t, i);
+			if (e->refcnt)
+				dev_err(DEV, "refcnt(%d)==%d\n",
+				    e->lc_number, e->refcnt);
+			in_use += e->refcnt;
+		}
+	}
+	if (!in_use)
+		mdev->act_log = n;
+	spin_unlock_irq(&mdev->al_lock);
+	if (in_use) {
+		dev_err(DEV, "Activity log still in use!\n");
+		lc_destroy(n);
+		return -EBUSY;
+	} else {
+		if (t)
+			lc_destroy(t);
+	}
+	drbd_md_mark_dirty(mdev); /* we changed mdev->act_log->nr_elemens */
+	return 0;
+}
+
+static void drbd_setup_queue_param(struct drbd_conf *mdev, unsigned int max_bio_size)
+{
+	struct request_queue * const q = mdev->rq_queue;
+	int max_hw_sectors = max_bio_size >> 9;
+	int max_segments = 0;
+
+	if (get_ldev_if_state(mdev, D_ATTACHING)) {
+		struct request_queue * const b = mdev->ldev->backing_bdev->bd_disk->queue;
+
+		max_hw_sectors = min(queue_max_hw_sectors(b), max_bio_size >> 9);
+		max_segments = mdev->ldev->dc.max_bio_bvecs;
+		put_ldev(mdev);
+	}
+
+	blk_queue_logical_block_size(q, 512);
+	blk_queue_max_hw_sectors(q, max_hw_sectors);
+	/* This is the workaround for "bio would need to, but cannot, be split" */
+	blk_queue_max_segments(q, max_segments ? max_segments : BLK_MAX_SEGMENTS);
+	blk_queue_segment_boundary(q, PAGE_CACHE_SIZE-1);
+
+	if (get_ldev_if_state(mdev, D_ATTACHING)) {
+		struct request_queue * const b = mdev->ldev->backing_bdev->bd_disk->queue;
+
+		blk_queue_stack_limits(q, b);
+
+		if (q->backing_dev_info.ra_pages != b->backing_dev_info.ra_pages) {
+			dev_info(DEV, "Adjusting my ra_pages to backing device's (%lu -> %lu)\n",
+				 q->backing_dev_info.ra_pages,
+				 b->backing_dev_info.ra_pages);
+			q->backing_dev_info.ra_pages = b->backing_dev_info.ra_pages;
+		}
+		put_ldev(mdev);
+	}
+}
+
+void drbd_reconsider_max_bio_size(struct drbd_conf *mdev)
+{
+	int now, new, local, peer;
+
+	now = queue_max_hw_sectors(mdev->rq_queue) << 9;
+	local = mdev->local_max_bio_size; /* Eventually last known value, from volatile memory */
+	peer = mdev->peer_max_bio_size; /* Eventually last known value, from meta data */
+
+	if (get_ldev_if_state(mdev, D_ATTACHING)) {
+		local = queue_max_hw_sectors(mdev->ldev->backing_bdev->bd_disk->queue) << 9;
+		mdev->local_max_bio_size = local;
+		put_ldev(mdev);
+	}
+
+	/* We may ignore peer limits if the peer is modern enough.
+	   Because new from 8.3.8 onwards the peer can use multiple
+	   BIOs for a single peer_request */
+	if (mdev->state.conn >= C_CONNECTED) {
+		if (mdev->agreed_pro_version < 94)
+			peer = mdev->peer_max_bio_size;
+		else if (mdev->agreed_pro_version == 94)
+			peer = DRBD_MAX_SIZE_H80_PACKET;
+		else /* drbd 8.3.8 onwards */
+			peer = DRBD_MAX_BIO_SIZE;
+	}
+
+	new = min_t(int, local, peer);
+
+	if (mdev->state.role == R_PRIMARY && new < now)
+		dev_err(DEV, "ASSERT FAILED new < now; (%d < %d)\n", new, now);
+
+	if (new != now)
+		dev_info(DEV, "max BIO size = %u\n", new);
+
+	drbd_setup_queue_param(mdev, new);
+}
+
+/* serialize deconfig (worker exiting, doing cleanup)
+ * and reconfig (drbdsetup disk, drbdsetup net)
+ *
+ * Wait for a potentially exiting worker, then restart it,
+ * or start a new one.  Flush any pending work, there may still be an
+ * after_state_change queued.
+ */
+static void drbd_reconfig_start(struct drbd_conf *mdev)
+{
+	wait_event(mdev->state_wait, !test_and_set_bit(CONFIG_PENDING, &mdev->flags));
+	wait_event(mdev->state_wait, !test_bit(DEVICE_DYING, &mdev->flags));
+	drbd_thread_start(&mdev->worker);
+	drbd_flush_workqueue(mdev);
+}
+
+/* if still unconfigured, stops worker again.
+ * if configured now, clears CONFIG_PENDING.
+ * wakes potential waiters */
+static void drbd_reconfig_done(struct drbd_conf *mdev)
+{
+	spin_lock_irq(&mdev->req_lock);
+	if (mdev->state.disk == D_DISKLESS &&
+	    mdev->state.conn == C_STANDALONE &&
+	    mdev->state.role == R_SECONDARY) {
+		set_bit(DEVICE_DYING, &mdev->flags);
+		drbd_thread_stop_nowait(&mdev->worker);
+	} else
+		clear_bit(CONFIG_PENDING, &mdev->flags);
+	spin_unlock_irq(&mdev->req_lock);
+	wake_up(&mdev->state_wait);
+}
+
+/* Make sure IO is suspended before calling this function(). */
+static void drbd_suspend_al(struct drbd_conf *mdev)
+{
+	int s = 0;
+
+	if (lc_try_lock(mdev->act_log)) {
+		drbd_al_shrink(mdev);
+		lc_unlock(mdev->act_log);
+	} else {
+		dev_warn(DEV, "Failed to lock al in drbd_suspend_al()\n");
+		return;
+	}
+
+	spin_lock_irq(&mdev->req_lock);
+	if (mdev->state.conn < C_CONNECTED)
+		s = !test_and_set_bit(AL_SUSPENDED, &mdev->flags);
+
+	spin_unlock_irq(&mdev->req_lock);
+
+	if (s)
+		dev_info(DEV, "Suspended AL updates\n");
+}
+
+/* does always return 0;
+ * interesting return code is in reply->ret_code */
+static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
+			     struct drbd_nl_cfg_reply *reply)
+{
+	enum drbd_ret_code retcode;
+	enum determine_dev_size dd;
+	sector_t max_possible_sectors;
+	sector_t min_md_device_sectors;
+	struct drbd_backing_dev *nbc = NULL; /* new_backing_conf */
+	struct block_device *bdev;
+	struct lru_cache *resync_lru = NULL;
+	union drbd_state ns, os;
+	enum drbd_state_rv rv;
+	int cp_discovered = 0;
+	int logical_block_size;
+
+	drbd_reconfig_start(mdev);
+
+	/* if you want to reconfigure, please tear down first */
+	if (mdev->state.disk > D_DISKLESS) {
+		retcode = ERR_DISK_CONFIGURED;
+		goto fail;
+	}
+	/* It may just now have detached because of IO error.  Make sure
+	 * drbd_ldev_destroy is done already, we may end up here very fast,
+	 * e.g. if someone calls attach from the on-io-error handler,
+	 * to realize a "hot spare" feature (not that I'd recommend that) */
+	wait_event(mdev->misc_wait, !atomic_read(&mdev->local_cnt));
+
+	/* allocation not in the IO path, cqueue thread context */
+	nbc = kzalloc(sizeof(struct drbd_backing_dev), GFP_KERNEL);
+	if (!nbc) {
+		retcode = ERR_NOMEM;
+		goto fail;
+	}
+
+	nbc->dc.disk_size     = DRBD_DISK_SIZE_SECT_DEF;
+	nbc->dc.on_io_error   = DRBD_ON_IO_ERROR_DEF;
+	nbc->dc.fencing       = DRBD_FENCING_DEF;
+	nbc->dc.max_bio_bvecs = DRBD_MAX_BIO_BVECS_DEF;
+
+	if (!disk_conf_from_tags(mdev, nlp->tag_list, &nbc->dc)) {
+		retcode = ERR_MANDATORY_TAG;
+		goto fail;
+	}
+
+	if (nbc->dc.meta_dev_idx < DRBD_MD_INDEX_FLEX_INT) {
+		retcode = ERR_MD_IDX_INVALID;
+		goto fail;
+	}
+
+	if (get_net_conf(mdev)) {
+		int prot = mdev->net_conf->wire_protocol;
+		put_net_conf(mdev);
+		if (nbc->dc.fencing == FP_STONITH && prot == DRBD_PROT_A) {
+			retcode = ERR_STONITH_AND_PROT_A;
+			goto fail;
+		}
+	}
+
+	bdev = blkdev_get_by_path(nbc->dc.backing_dev,
+				  FMODE_READ | FMODE_WRITE | FMODE_EXCL, mdev);
+	if (IS_ERR(bdev)) {
+		dev_err(DEV, "open(\"%s\") failed with %ld\n", nbc->dc.backing_dev,
+			PTR_ERR(bdev));
+		retcode = ERR_OPEN_DISK;
+		goto fail;
+	}
+	nbc->backing_bdev = bdev;
+
+	/*
+	 * meta_dev_idx >= 0: external fixed size, possibly multiple
+	 * drbd sharing one meta device.  TODO in that case, paranoia
+	 * check that [md_bdev, meta_dev_idx] is not yet used by some
+	 * other drbd minor!  (if you use drbd.conf + drbdadm, that
+	 * should check it for you already; but if you don't, or
+	 * someone fooled it, we need to double check here)
+	 */
+	bdev = blkdev_get_by_path(nbc->dc.meta_dev,
+				  FMODE_READ | FMODE_WRITE | FMODE_EXCL,
+				  (nbc->dc.meta_dev_idx < 0) ?
+				  (void *)mdev : (void *)drbd_m_holder);
+	if (IS_ERR(bdev)) {
+		dev_err(DEV, "open(\"%s\") failed with %ld\n", nbc->dc.meta_dev,
+			PTR_ERR(bdev));
+		retcode = ERR_OPEN_MD_DISK;
+		goto fail;
+	}
+	nbc->md_bdev = bdev;
+
+	if ((nbc->backing_bdev == nbc->md_bdev) !=
+	    (nbc->dc.meta_dev_idx == DRBD_MD_INDEX_INTERNAL ||
+	     nbc->dc.meta_dev_idx == DRBD_MD_INDEX_FLEX_INT)) {
+		retcode = ERR_MD_IDX_INVALID;
+		goto fail;
+	}
+
+	resync_lru = lc_create("resync", drbd_bm_ext_cache,
+			61, sizeof(struct bm_extent),
+			offsetof(struct bm_extent, lce));
+	if (!resync_lru) {
+		retcode = ERR_NOMEM;
+		goto fail;
+	}
+
+	/* RT - for drbd_get_max_capacity() DRBD_MD_INDEX_FLEX_INT */
+	drbd_md_set_sector_offsets(mdev, nbc);
+
+	if (drbd_get_max_capacity(nbc) < nbc->dc.disk_size) {
+		dev_err(DEV, "max capacity %llu smaller than disk size %llu\n",
+			(unsigned long long) drbd_get_max_capacity(nbc),
+			(unsigned long long) nbc->dc.disk_size);
+		retcode = ERR_DISK_TO_SMALL;
+		goto fail;
+	}
+
+	if (nbc->dc.meta_dev_idx < 0) {
+		max_possible_sectors = DRBD_MAX_SECTORS_FLEX;
+		/* at least one MB, otherwise it does not make sense */
+		min_md_device_sectors = (2<<10);
+	} else {
+		max_possible_sectors = DRBD_MAX_SECTORS;
+		min_md_device_sectors = MD_RESERVED_SECT * (nbc->dc.meta_dev_idx + 1);
+	}
+
+	if (drbd_get_capacity(nbc->md_bdev) < min_md_device_sectors) {
+		retcode = ERR_MD_DISK_TO_SMALL;
+		dev_warn(DEV, "refusing attach: md-device too small, "
+		     "at least %llu sectors needed for this meta-disk type\n",
+		     (unsigned long long) min_md_device_sectors);
+		goto fail;
+	}
+
+	/* Make sure the new disk is big enough
+	 * (we may currently be R_PRIMARY with no local disk...) */
+	if (drbd_get_max_capacity(nbc) <
+	    drbd_get_capacity(mdev->this_bdev)) {
+		retcode = ERR_DISK_TO_SMALL;
+		goto fail;
+	}
+
+	nbc->known_size = drbd_get_capacity(nbc->backing_bdev);
+
+	if (nbc->known_size > max_possible_sectors) {
+		dev_warn(DEV, "==> truncating very big lower level device "
+			"to currently maximum possible %llu sectors <==\n",
+			(unsigned long long) max_possible_sectors);
+		if (nbc->dc.meta_dev_idx >= 0)
+			dev_warn(DEV, "==>> using internal or flexible "
+				      "meta data may help <<==\n");
+	}
+
+	drbd_suspend_io(mdev);
+	/* also wait for the last barrier ack. */
+	wait_event(mdev->misc_wait, !atomic_read(&mdev->ap_pending_cnt) || is_susp(mdev->state));
+	/* and for any other previously queued work */
+	drbd_flush_workqueue(mdev);
+
+	rv = _drbd_request_state(mdev, NS(disk, D_ATTACHING), CS_VERBOSE);
+	retcode = rv;  /* FIXME: Type mismatch. */
+	drbd_resume_io(mdev);
+	if (rv < SS_SUCCESS)
+		goto fail;
+
+	if (!get_ldev_if_state(mdev, D_ATTACHING))
+		goto force_diskless;
+
+	drbd_md_set_sector_offsets(mdev, nbc);
+
+	/* allocate a second IO page if logical_block_size != 512 */
+	logical_block_size = bdev_logical_block_size(nbc->md_bdev);
+	if (logical_block_size == 0)
+		logical_block_size = MD_SECTOR_SIZE;
+
+	if (logical_block_size != MD_SECTOR_SIZE) {
+		if (!mdev->md_io_tmpp) {
+			struct page *page = alloc_page(GFP_NOIO);
+			if (!page)
+				goto force_diskless_dec;
+
+			dev_warn(DEV, "Meta data's bdev logical_block_size = %d != %d\n",
+			     logical_block_size, MD_SECTOR_SIZE);
+			dev_warn(DEV, "Workaround engaged (has performance impact).\n");
+
+			mdev->md_io_tmpp = page;
+		}
+	}
+
+	if (!mdev->bitmap) {
+		if (drbd_bm_init(mdev)) {
+			retcode = ERR_NOMEM;
+			goto force_diskless_dec;
+		}
+	}
+
+	retcode = drbd_md_read(mdev, nbc);
+	if (retcode != NO_ERROR)
+		goto force_diskless_dec;
+
+	if (mdev->state.conn < C_CONNECTED &&
+	    mdev->state.role == R_PRIMARY &&
+	    (mdev->ed_uuid & ~((u64)1)) != (nbc->md.uuid[UI_CURRENT] & ~((u64)1))) {
+		dev_err(DEV, "Can only attach to data with current UUID=%016llX\n",
+		    (unsigned long long)mdev->ed_uuid);
+		retcode = ERR_DATA_NOT_CURRENT;
+		goto force_diskless_dec;
+	}
+
+	/* Since we are diskless, fix the activity log first... */
+	if (drbd_check_al_size(mdev)) {
+		retcode = ERR_NOMEM;
+		goto force_diskless_dec;
+	}
+
+	/* Prevent shrinking of consistent devices ! */
+	if (drbd_md_test_flag(nbc, MDF_CONSISTENT) &&
+	    drbd_new_dev_size(mdev, nbc, 0) < nbc->md.la_size_sect) {
+		dev_warn(DEV, "refusing to truncate a consistent device\n");
+		retcode = ERR_DISK_TO_SMALL;
+		goto force_diskless_dec;
+	}
+
+	if (!drbd_al_read_log(mdev, nbc)) {
+		retcode = ERR_IO_MD_DISK;
+		goto force_diskless_dec;
+	}
+
+	/* Reset the "barriers don't work" bits here, then force meta data to
+	 * be written, to ensure we determine if barriers are supported. */
+	if (nbc->dc.no_md_flush)
+		set_bit(MD_NO_FUA, &mdev->flags);
+	else
+		clear_bit(MD_NO_FUA, &mdev->flags);
+
+	/* Point of no return reached.
+	 * Devices and memory are no longer released by error cleanup below.
+	 * now mdev takes over responsibility, and the state engine should
+	 * clean it up somewhere.  */
+	D_ASSERT(mdev->ldev == NULL);
+	mdev->ldev = nbc;
+	mdev->resync = resync_lru;
+	nbc = NULL;
+	resync_lru = NULL;
+
+	mdev->write_ordering = WO_bdev_flush;
+	drbd_bump_write_ordering(mdev, WO_bdev_flush);
+
+	if (drbd_md_test_flag(mdev->ldev, MDF_CRASHED_PRIMARY))
+		set_bit(CRASHED_PRIMARY, &mdev->flags);
+	else
+		clear_bit(CRASHED_PRIMARY, &mdev->flags);
+
+	if (drbd_md_test_flag(mdev->ldev, MDF_PRIMARY_IND) &&
+	    !(mdev->state.role == R_PRIMARY && mdev->state.susp_nod)) {
+		set_bit(CRASHED_PRIMARY, &mdev->flags);
+		cp_discovered = 1;
+	}
+
+	mdev->send_cnt = 0;
+	mdev->recv_cnt = 0;
+	mdev->read_cnt = 0;
+	mdev->writ_cnt = 0;
+
+	drbd_reconsider_max_bio_size(mdev);
+
+	/* If I am currently not R_PRIMARY,
+	 * but meta data primary indicator is set,
+	 * I just now recover from a hard crash,
+	 * and have been R_PRIMARY before that crash.
+	 *
+	 * Now, if I had no connection before that crash
+	 * (have been degraded R_PRIMARY), chances are that
+	 * I won't find my peer now either.
+	 *
+	 * In that case, and _only_ in that case,
+	 * we use the degr-wfc-timeout instead of the default,
+	 * so we can automatically recover from a crash of a
+	 * degraded but active "cluster" after a certain timeout.
+	 */
+	clear_bit(USE_DEGR_WFC_T, &mdev->flags);
+	if (mdev->state.role != R_PRIMARY &&
+	     drbd_md_test_flag(mdev->ldev, MDF_PRIMARY_IND) &&
+	    !drbd_md_test_flag(mdev->ldev, MDF_CONNECTED_IND))
+		set_bit(USE_DEGR_WFC_T, &mdev->flags);
+
+	dd = drbd_determine_dev_size(mdev, 0);
+	if (dd == dev_size_error) {
+		retcode = ERR_NOMEM_BITMAP;
+		goto force_diskless_dec;
+	} else if (dd == grew)
+		set_bit(RESYNC_AFTER_NEG, &mdev->flags);
+
+	if (drbd_md_test_flag(mdev->ldev, MDF_FULL_SYNC)) {
+		dev_info(DEV, "Assuming that all blocks are out of sync "
+		     "(aka FullSync)\n");
+		if (drbd_bitmap_io(mdev, &drbd_bmio_set_n_write,
+			"set_n_write from attaching", BM_LOCKED_MASK)) {
+			retcode = ERR_IO_MD_DISK;
+			goto force_diskless_dec;
+		}
+	} else {
+		if (drbd_bitmap_io(mdev, &drbd_bm_read,
+			"read from attaching", BM_LOCKED_MASK) < 0) {
+			retcode = ERR_IO_MD_DISK;
+			goto force_diskless_dec;
+		}
+	}
+
+	if (cp_discovered) {
+		drbd_al_apply_to_bm(mdev);
+		if (drbd_bitmap_io(mdev, &drbd_bm_write,
+			"crashed primary apply AL", BM_LOCKED_MASK)) {
+			retcode = ERR_IO_MD_DISK;
+			goto force_diskless_dec;
+		}
+	}
+
+	if (_drbd_bm_total_weight(mdev) == drbd_bm_bits(mdev))
+		drbd_suspend_al(mdev); /* IO is still suspended here... */
+
+	spin_lock_irq(&mdev->req_lock);
+	os = mdev->state;
+	ns.i = os.i;
+	/* If MDF_CONSISTENT is not set go into inconsistent state,
+	   otherwise investigate MDF_WasUpToDate...
+	   If MDF_WAS_UP_TO_DATE is not set go into D_OUTDATED disk state,
+	   otherwise into D_CONSISTENT state.
+	*/
+	if (drbd_md_test_flag(mdev->ldev, MDF_CONSISTENT)) {
+		if (drbd_md_test_flag(mdev->ldev, MDF_WAS_UP_TO_DATE))
+			ns.disk = D_CONSISTENT;
+		else
+			ns.disk = D_OUTDATED;
+	} else {
+		ns.disk = D_INCONSISTENT;
+	}
+
+	if (drbd_md_test_flag(mdev->ldev, MDF_PEER_OUT_DATED))
+		ns.pdsk = D_OUTDATED;
+
+	if ( ns.disk == D_CONSISTENT &&
+	    (ns.pdsk == D_OUTDATED || mdev->ldev->dc.fencing == FP_DONT_CARE))
+		ns.disk = D_UP_TO_DATE;
+
+	/* All tests on MDF_PRIMARY_IND, MDF_CONNECTED_IND,
+	   MDF_CONSISTENT and MDF_WAS_UP_TO_DATE must happen before
+	   this point, because drbd_request_state() modifies these
+	   flags. */
+
+	/* In case we are C_CONNECTED postpone any decision on the new disk
+	   state after the negotiation phase. */
+	if (mdev->state.conn == C_CONNECTED) {
+		mdev->new_state_tmp.i = ns.i;
+		ns.i = os.i;
+		ns.disk = D_NEGOTIATING;
+
+		/* We expect to receive up-to-date UUIDs soon.
+		   To avoid a race in receive_state, free p_uuid while
+		   holding req_lock. I.e. atomic with the state change */
+		kfree(mdev->p_uuid);
+		mdev->p_uuid = NULL;
+	}
+
+	rv = _drbd_set_state(mdev, ns, CS_VERBOSE, NULL);
+	ns = mdev->state;
+	spin_unlock_irq(&mdev->req_lock);
+
+	if (rv < SS_SUCCESS)
+		goto force_diskless_dec;
+
+	if (mdev->state.role == R_PRIMARY)
+		mdev->ldev->md.uuid[UI_CURRENT] |=  (u64)1;
+	else
+		mdev->ldev->md.uuid[UI_CURRENT] &= ~(u64)1;
+
+	drbd_md_mark_dirty(mdev);
+	drbd_md_sync(mdev);
+
+	kobject_uevent(&disk_to_dev(mdev->vdisk)->kobj, KOBJ_CHANGE);
+	put_ldev(mdev);
+	reply->ret_code = retcode;
+	drbd_reconfig_done(mdev);
+	return 0;
+
+ force_diskless_dec:
+	put_ldev(mdev);
+ force_diskless:
+	drbd_force_state(mdev, NS(disk, D_FAILED));
+	drbd_md_sync(mdev);
+ fail:
+	if (nbc) {
+		if (nbc->backing_bdev)
+			blkdev_put(nbc->backing_bdev,
+				   FMODE_READ | FMODE_WRITE | FMODE_EXCL);
+		if (nbc->md_bdev)
+			blkdev_put(nbc->md_bdev,
+				   FMODE_READ | FMODE_WRITE | FMODE_EXCL);
+		kfree(nbc);
+	}
+	lc_destroy(resync_lru);
+
+	reply->ret_code = retcode;
+	drbd_reconfig_done(mdev);
+	return 0;
+}
+
+/* Detaching the disk is a process in multiple stages.  First we need to lock
+ * out application IO, in-flight IO, IO stuck in drbd_al_begin_io.
+ * Then we transition to D_DISKLESS, and wait for put_ldev() to return all
+ * internal references as well.
+ * Only then we have finally detached. */
+static int drbd_nl_detach(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
+			  struct drbd_nl_cfg_reply *reply)
+{
+	enum drbd_ret_code retcode;
+	int ret;
+	drbd_suspend_io(mdev); /* so no-one is stuck in drbd_al_begin_io */
+	retcode = drbd_request_state(mdev, NS(disk, D_FAILED));
+	/* D_FAILED will transition to DISKLESS. */
+	ret = wait_event_interruptible(mdev->misc_wait,
+			mdev->state.disk != D_FAILED);
+	drbd_resume_io(mdev);
+	if ((int)retcode == (int)SS_IS_DISKLESS)
+		retcode = SS_NOTHING_TO_DO;
+	if (ret)
+		retcode = ERR_INTR;
+	reply->ret_code = retcode;
+	return 0;
+}
+
+static int drbd_nl_net_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
+			    struct drbd_nl_cfg_reply *reply)
+{
+	int i, ns;
+	enum drbd_ret_code retcode;
+	struct net_conf *new_conf = NULL;
+	struct crypto_hash *tfm = NULL;
+	struct crypto_hash *integrity_w_tfm = NULL;
+	struct crypto_hash *integrity_r_tfm = NULL;
+	struct hlist_head *new_tl_hash = NULL;
+	struct hlist_head *new_ee_hash = NULL;
+	struct drbd_conf *odev;
+	char hmac_name[CRYPTO_MAX_ALG_NAME];
+	void *int_dig_out = NULL;
+	void *int_dig_in = NULL;
+	void *int_dig_vv = NULL;
+	struct sockaddr *new_my_addr, *new_peer_addr, *taken_addr;
+
+	drbd_reconfig_start(mdev);
+
+	if (mdev->state.conn > C_STANDALONE) {
+		retcode = ERR_NET_CONFIGURED;
+		goto fail;
+	}
+
+	/* allocation not in the IO path, cqueue thread context */
+	new_conf = kzalloc(sizeof(struct net_conf), GFP_KERNEL);
+	if (!new_conf) {
+		retcode = ERR_NOMEM;
+		goto fail;
+	}
+
+	new_conf->timeout	   = DRBD_TIMEOUT_DEF;
+	new_conf->try_connect_int  = DRBD_CONNECT_INT_DEF;
+	new_conf->ping_int	   = DRBD_PING_INT_DEF;
+	new_conf->max_epoch_size   = DRBD_MAX_EPOCH_SIZE_DEF;
+	new_conf->max_buffers	   = DRBD_MAX_BUFFERS_DEF;
+	new_conf->unplug_watermark = DRBD_UNPLUG_WATERMARK_DEF;
+	new_conf->sndbuf_size	   = DRBD_SNDBUF_SIZE_DEF;
+	new_conf->rcvbuf_size	   = DRBD_RCVBUF_SIZE_DEF;
+	new_conf->ko_count	   = DRBD_KO_COUNT_DEF;
+	new_conf->after_sb_0p	   = DRBD_AFTER_SB_0P_DEF;
+	new_conf->after_sb_1p	   = DRBD_AFTER_SB_1P_DEF;
+	new_conf->after_sb_2p	   = DRBD_AFTER_SB_2P_DEF;
+	new_conf->want_lose	   = 0;
+	new_conf->two_primaries    = 0;
+	new_conf->wire_protocol    = DRBD_PROT_C;
+	new_conf->ping_timeo	   = DRBD_PING_TIMEO_DEF;
+	new_conf->rr_conflict	   = DRBD_RR_CONFLICT_DEF;
+	new_conf->on_congestion    = DRBD_ON_CONGESTION_DEF;
+	new_conf->cong_extents     = DRBD_CONG_EXTENTS_DEF;
+
+	if (!net_conf_from_tags(mdev, nlp->tag_list, new_conf)) {
+		retcode = ERR_MANDATORY_TAG;
+		goto fail;
+	}
+
+	if (new_conf->two_primaries
+	    && (new_conf->wire_protocol != DRBD_PROT_C)) {
+		retcode = ERR_NOT_PROTO_C;
+		goto fail;
+	}
+
+	if (get_ldev(mdev)) {
+		enum drbd_fencing_p fp = mdev->ldev->dc.fencing;
+		put_ldev(mdev);
+		if (new_conf->wire_protocol == DRBD_PROT_A && fp == FP_STONITH) {
+			retcode = ERR_STONITH_AND_PROT_A;
+			goto fail;
+		}
+	}
+
+	if (new_conf->on_congestion != OC_BLOCK && new_conf->wire_protocol != DRBD_PROT_A) {
+		retcode = ERR_CONG_NOT_PROTO_A;
+		goto fail;
+	}
+
+	if (mdev->state.role == R_PRIMARY && new_conf->want_lose) {
+		retcode = ERR_DISCARD;
+		goto fail;
+	}
+
+	retcode = NO_ERROR;
+
+	new_my_addr = (struct sockaddr *)&new_conf->my_addr;
+	new_peer_addr = (struct sockaddr *)&new_conf->peer_addr;
+	for (i = 0; i < minor_count; i++) {
+		odev = minor_to_mdev(i);
+		if (!odev || odev == mdev)
+			continue;
+		if (get_net_conf(odev)) {
+			taken_addr = (struct sockaddr *)&odev->net_conf->my_addr;
+			if (new_conf->my_addr_len == odev->net_conf->my_addr_len &&
+			    !memcmp(new_my_addr, taken_addr, new_conf->my_addr_len))
+				retcode = ERR_LOCAL_ADDR;
+
+			taken_addr = (struct sockaddr *)&odev->net_conf->peer_addr;
+			if (new_conf->peer_addr_len == odev->net_conf->peer_addr_len &&
+			    !memcmp(new_peer_addr, taken_addr, new_conf->peer_addr_len))
+				retcode = ERR_PEER_ADDR;
+
+			put_net_conf(odev);
+			if (retcode != NO_ERROR)
+				goto fail;
+		}
+	}
+
+	if (new_conf->cram_hmac_alg[0] != 0) {
+		snprintf(hmac_name, CRYPTO_MAX_ALG_NAME, "hmac(%s)",
+			new_conf->cram_hmac_alg);
+		tfm = crypto_alloc_hash(hmac_name, 0, CRYPTO_ALG_ASYNC);
+		if (IS_ERR(tfm)) {
+			tfm = NULL;
+			retcode = ERR_AUTH_ALG;
+			goto fail;
+		}
+
+		if (!drbd_crypto_is_hash(crypto_hash_tfm(tfm))) {
+			retcode = ERR_AUTH_ALG_ND;
+			goto fail;
+		}
+	}
+
+	if (new_conf->integrity_alg[0]) {
+		integrity_w_tfm = crypto_alloc_hash(new_conf->integrity_alg, 0, CRYPTO_ALG_ASYNC);
+		if (IS_ERR(integrity_w_tfm)) {
+			integrity_w_tfm = NULL;
+			retcode=ERR_INTEGRITY_ALG;
+			goto fail;
+		}
+
+		if (!drbd_crypto_is_hash(crypto_hash_tfm(integrity_w_tfm))) {
+			retcode=ERR_INTEGRITY_ALG_ND;
+			goto fail;
+		}
+
+		integrity_r_tfm = crypto_alloc_hash(new_conf->integrity_alg, 0, CRYPTO_ALG_ASYNC);
+		if (IS_ERR(integrity_r_tfm)) {
+			integrity_r_tfm = NULL;
+			retcode=ERR_INTEGRITY_ALG;
+			goto fail;
+		}
+	}
+
+	ns = new_conf->max_epoch_size/8;
+	if (mdev->tl_hash_s != ns) {
+		new_tl_hash = kzalloc(ns*sizeof(void *), GFP_KERNEL);
+		if (!new_tl_hash) {
+			retcode = ERR_NOMEM;
+			goto fail;
+		}
+	}
+
+	ns = new_conf->max_buffers/8;
+	if (new_conf->two_primaries && (mdev->ee_hash_s != ns)) {
+		new_ee_hash = kzalloc(ns*sizeof(void *), GFP_KERNEL);
+		if (!new_ee_hash) {
+			retcode = ERR_NOMEM;
+			goto fail;
+		}
+	}
+
+	((char *)new_conf->shared_secret)[SHARED_SECRET_MAX-1] = 0;
+
+	if (integrity_w_tfm) {
+		i = crypto_hash_digestsize(integrity_w_tfm);
+		int_dig_out = kmalloc(i, GFP_KERNEL);
+		if (!int_dig_out) {
+			retcode = ERR_NOMEM;
+			goto fail;
+		}
+		int_dig_in = kmalloc(i, GFP_KERNEL);
+		if (!int_dig_in) {
+			retcode = ERR_NOMEM;
+			goto fail;
+		}
+		int_dig_vv = kmalloc(i, GFP_KERNEL);
+		if (!int_dig_vv) {
+			retcode = ERR_NOMEM;
+			goto fail;
+		}
+	}
+
+	if (!mdev->bitmap) {
+		if(drbd_bm_init(mdev)) {
+			retcode = ERR_NOMEM;
+			goto fail;
+		}
+	}
+
+	drbd_flush_workqueue(mdev);
+	spin_lock_irq(&mdev->req_lock);
+	if (mdev->net_conf != NULL) {
+		retcode = ERR_NET_CONFIGURED;
+		spin_unlock_irq(&mdev->req_lock);
+		goto fail;
+	}
+	mdev->net_conf = new_conf;
+
+	mdev->send_cnt = 0;
+	mdev->recv_cnt = 0;
+
+	if (new_tl_hash) {
+		kfree(mdev->tl_hash);
+		mdev->tl_hash_s = mdev->net_conf->max_epoch_size/8;
+		mdev->tl_hash = new_tl_hash;
+	}
+
+	if (new_ee_hash) {
+		kfree(mdev->ee_hash);
+		mdev->ee_hash_s = mdev->net_conf->max_buffers/8;
+		mdev->ee_hash = new_ee_hash;
+	}
+
+	crypto_free_hash(mdev->cram_hmac_tfm);
+	mdev->cram_hmac_tfm = tfm;
+
+	crypto_free_hash(mdev->integrity_w_tfm);
+	mdev->integrity_w_tfm = integrity_w_tfm;
+
+	crypto_free_hash(mdev->integrity_r_tfm);
+	mdev->integrity_r_tfm = integrity_r_tfm;
+
+	kfree(mdev->int_dig_out);
+	kfree(mdev->int_dig_in);
+	kfree(mdev->int_dig_vv);
+	mdev->int_dig_out=int_dig_out;
+	mdev->int_dig_in=int_dig_in;
+	mdev->int_dig_vv=int_dig_vv;
+	retcode = _drbd_set_state(_NS(mdev, conn, C_UNCONNECTED), CS_VERBOSE, NULL);
+	spin_unlock_irq(&mdev->req_lock);
+
+	kobject_uevent(&disk_to_dev(mdev->vdisk)->kobj, KOBJ_CHANGE);
+	reply->ret_code = retcode;
+	drbd_reconfig_done(mdev);
+	return 0;
+
+fail:
+	kfree(int_dig_out);
+	kfree(int_dig_in);
+	kfree(int_dig_vv);
+	crypto_free_hash(tfm);
+	crypto_free_hash(integrity_w_tfm);
+	crypto_free_hash(integrity_r_tfm);
+	kfree(new_tl_hash);
+	kfree(new_ee_hash);
+	kfree(new_conf);
+
+	reply->ret_code = retcode;
+	drbd_reconfig_done(mdev);
+	return 0;
+}
+
+static int drbd_nl_disconnect(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
+			      struct drbd_nl_cfg_reply *reply)
+{
+	int retcode;
+	struct disconnect dc;
+
+	memset(&dc, 0, sizeof(struct disconnect));
+	if (!disconnect_from_tags(mdev, nlp->tag_list, &dc)) {
+		retcode = ERR_MANDATORY_TAG;
+		goto fail;
+	}
+
+	if (dc.force) {
+		spin_lock_irq(&mdev->req_lock);
+		if (mdev->state.conn >= C_WF_CONNECTION)
+			_drbd_set_state(_NS(mdev, conn, C_DISCONNECTING), CS_HARD, NULL);
+		spin_unlock_irq(&mdev->req_lock);
+		goto done;
+	}
+
+	retcode = _drbd_request_state(mdev, NS(conn, C_DISCONNECTING), CS_ORDERED);
+
+	if (retcode == SS_NOTHING_TO_DO)
+		goto done;
+	else if (retcode == SS_ALREADY_STANDALONE)
+		goto done;
+	else if (retcode == SS_PRIMARY_NOP) {
+		/* Our statche checking code wants to see the peer outdated. */
+		retcode = drbd_request_state(mdev, NS2(conn, C_DISCONNECTING,
+						      pdsk, D_OUTDATED));
+	} else if (retcode == SS_CW_FAILED_BY_PEER) {
+		/* The peer probably wants to see us outdated. */
+		retcode = _drbd_request_state(mdev, NS2(conn, C_DISCONNECTING,
+							disk, D_OUTDATED),
+					      CS_ORDERED);
+		if (retcode == SS_IS_DISKLESS || retcode == SS_LOWER_THAN_OUTDATED) {
+			drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
+			retcode = SS_SUCCESS;
+		}
+	}
+
+	if (retcode < SS_SUCCESS)
+		goto fail;
+
+	if (wait_event_interruptible(mdev->state_wait,
+				     mdev->state.conn != C_DISCONNECTING)) {
+		/* Do not test for mdev->state.conn == C_STANDALONE, since
+		   someone else might connect us in the mean time! */
+		retcode = ERR_INTR;
+		goto fail;
+	}
+
+ done:
+	retcode = NO_ERROR;
+ fail:
+	drbd_md_sync(mdev);
+	reply->ret_code = retcode;
+	return 0;
+}
+
+void resync_after_online_grow(struct drbd_conf *mdev)
+{
+	int iass; /* I am sync source */
+
+	dev_info(DEV, "Resync of new storage after online grow\n");
+	if (mdev->state.role != mdev->state.peer)
+		iass = (mdev->state.role == R_PRIMARY);
+	else
+		iass = test_bit(DISCARD_CONCURRENT, &mdev->flags);
+
+	if (iass)
+		drbd_start_resync(mdev, C_SYNC_SOURCE);
+	else
+		_drbd_request_state(mdev, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE + CS_SERIALIZE);
+}
+
+static int drbd_nl_resize(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
+			  struct drbd_nl_cfg_reply *reply)
+{
+	struct resize rs;
+	int retcode = NO_ERROR;
+	enum determine_dev_size dd;
+	enum dds_flags ddsf;
+
+	memset(&rs, 0, sizeof(struct resize));
+	if (!resize_from_tags(mdev, nlp->tag_list, &rs)) {
+		retcode = ERR_MANDATORY_TAG;
+		goto fail;
+	}
+
+	if (mdev->state.conn > C_CONNECTED) {
+		retcode = ERR_RESIZE_RESYNC;
+		goto fail;
+	}
+
+	if (mdev->state.role == R_SECONDARY &&
+	    mdev->state.peer == R_SECONDARY) {
+		retcode = ERR_NO_PRIMARY;
+		goto fail;
+	}
+
+	if (!get_ldev(mdev)) {
+		retcode = ERR_NO_DISK;
+		goto fail;
+	}
+
+	if (rs.no_resync && mdev->agreed_pro_version < 93) {
+		retcode = ERR_NEED_APV_93;
+		goto fail;
+	}
+
+	if (mdev->ldev->known_size != drbd_get_capacity(mdev->ldev->backing_bdev))
+		mdev->ldev->known_size = drbd_get_capacity(mdev->ldev->backing_bdev);
+
+	mdev->ldev->dc.disk_size = (sector_t)rs.resize_size;
+	ddsf = (rs.resize_force ? DDSF_FORCED : 0) | (rs.no_resync ? DDSF_NO_RESYNC : 0);
+	dd = drbd_determine_dev_size(mdev, ddsf);
+	drbd_md_sync(mdev);
+	put_ldev(mdev);
+	if (dd == dev_size_error) {
+		retcode = ERR_NOMEM_BITMAP;
+		goto fail;
+	}
+
+	if (mdev->state.conn == C_CONNECTED) {
+		if (dd == grew)
+			set_bit(RESIZE_PENDING, &mdev->flags);
+
+		drbd_send_uuids(mdev);
+		drbd_send_sizes(mdev, 1, ddsf);
+	}
+
+ fail:
+	reply->ret_code = retcode;
+	return 0;
+}
+
+static int drbd_nl_syncer_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
+			       struct drbd_nl_cfg_reply *reply)
+{
+	int retcode = NO_ERROR;
+	int err;
+	int ovr; /* online verify running */
+	int rsr; /* re-sync running */
+	struct crypto_hash *verify_tfm = NULL;
+	struct crypto_hash *csums_tfm = NULL;
+	struct syncer_conf sc;
+	cpumask_var_t new_cpu_mask;
+	int *rs_plan_s = NULL;
+	int fifo_size;
+
+	if (!zalloc_cpumask_var(&new_cpu_mask, GFP_KERNEL)) {
+		retcode = ERR_NOMEM;
+		goto fail;
+	}
+
+	if (nlp->flags & DRBD_NL_SET_DEFAULTS) {
+		memset(&sc, 0, sizeof(struct syncer_conf));
+		sc.rate       = DRBD_RATE_DEF;
+		sc.after      = DRBD_AFTER_DEF;
+		sc.al_extents = DRBD_AL_EXTENTS_DEF;
+		sc.on_no_data  = DRBD_ON_NO_DATA_DEF;
+		sc.c_plan_ahead = DRBD_C_PLAN_AHEAD_DEF;
+		sc.c_delay_target = DRBD_C_DELAY_TARGET_DEF;
+		sc.c_fill_target = DRBD_C_FILL_TARGET_DEF;
+		sc.c_max_rate = DRBD_C_MAX_RATE_DEF;
+		sc.c_min_rate = DRBD_C_MIN_RATE_DEF;
+	} else
+		memcpy(&sc, &mdev->sync_conf, sizeof(struct syncer_conf));
+
+	if (!syncer_conf_from_tags(mdev, nlp->tag_list, &sc)) {
+		retcode = ERR_MANDATORY_TAG;
+		goto fail;
+	}
+
+	/* re-sync running */
+	rsr = (	mdev->state.conn == C_SYNC_SOURCE ||
+		mdev->state.conn == C_SYNC_TARGET ||
+		mdev->state.conn == C_PAUSED_SYNC_S ||
+		mdev->state.conn == C_PAUSED_SYNC_T );
+
+	if (rsr && strcmp(sc.csums_alg, mdev->sync_conf.csums_alg)) {
+		retcode = ERR_CSUMS_RESYNC_RUNNING;
+		goto fail;
+	}
+
+	if (!rsr && sc.csums_alg[0]) {
+		csums_tfm = crypto_alloc_hash(sc.csums_alg, 0, CRYPTO_ALG_ASYNC);
+		if (IS_ERR(csums_tfm)) {
+			csums_tfm = NULL;
+			retcode = ERR_CSUMS_ALG;
+			goto fail;
+		}
+
+		if (!drbd_crypto_is_hash(crypto_hash_tfm(csums_tfm))) {
+			retcode = ERR_CSUMS_ALG_ND;
+			goto fail;
+		}
+	}
+
+	/* online verify running */
+	ovr = (mdev->state.conn == C_VERIFY_S || mdev->state.conn == C_VERIFY_T);
+
+	if (ovr) {
+		if (strcmp(sc.verify_alg, mdev->sync_conf.verify_alg)) {
+			retcode = ERR_VERIFY_RUNNING;
+			goto fail;
+		}
+	}
+
+	if (!ovr && sc.verify_alg[0]) {
+		verify_tfm = crypto_alloc_hash(sc.verify_alg, 0, CRYPTO_ALG_ASYNC);
+		if (IS_ERR(verify_tfm)) {
+			verify_tfm = NULL;
+			retcode = ERR_VERIFY_ALG;
+			goto fail;
+		}
+
+		if (!drbd_crypto_is_hash(crypto_hash_tfm(verify_tfm))) {
+			retcode = ERR_VERIFY_ALG_ND;
+			goto fail;
+		}
+	}
+
+	/* silently ignore cpu mask on UP kernel */
+	if (nr_cpu_ids > 1 && sc.cpu_mask[0] != 0) {
+		err = __bitmap_parse(sc.cpu_mask, 32, 0,
+				cpumask_bits(new_cpu_mask), nr_cpu_ids);
+		if (err) {
+			dev_warn(DEV, "__bitmap_parse() failed with %d\n", err);
+			retcode = ERR_CPU_MASK_PARSE;
+			goto fail;
+		}
+	}
+
+	ERR_IF (sc.rate < 1) sc.rate = 1;
+	ERR_IF (sc.al_extents < 7) sc.al_extents = 127; /* arbitrary minimum */
+#define AL_MAX ((MD_AL_MAX_SIZE-1) * AL_EXTENTS_PT)
+	if (sc.al_extents > AL_MAX) {
+		dev_err(DEV, "sc.al_extents > %d\n", AL_MAX);
+		sc.al_extents = AL_MAX;
+	}
+#undef AL_MAX
+
+	/* to avoid spurious errors when configuring minors before configuring
+	 * the minors they depend on: if necessary, first create the minor we
+	 * depend on */
+	if (sc.after >= 0)
+		ensure_mdev(sc.after, 1);
+
+	/* most sanity checks done, try to assign the new sync-after
+	 * dependency.  need to hold the global lock in there,
+	 * to avoid a race in the dependency loop check. */
+	retcode = drbd_alter_sa(mdev, sc.after);
+	if (retcode != NO_ERROR)
+		goto fail;
+
+	fifo_size = (sc.c_plan_ahead * 10 * SLEEP_TIME) / HZ;
+	if (fifo_size != mdev->rs_plan_s.size && fifo_size > 0) {
+		rs_plan_s   = kzalloc(sizeof(int) * fifo_size, GFP_KERNEL);
+		if (!rs_plan_s) {
+			dev_err(DEV, "kmalloc of fifo_buffer failed");
+			retcode = ERR_NOMEM;
+			goto fail;
+		}
+	}
+
+	/* ok, assign the rest of it as well.
+	 * lock against receive_SyncParam() */
+	spin_lock(&mdev->peer_seq_lock);
+	mdev->sync_conf = sc;
+
+	if (!rsr) {
+		crypto_free_hash(mdev->csums_tfm);
+		mdev->csums_tfm = csums_tfm;
+		csums_tfm = NULL;
+	}
+
+	if (!ovr) {
+		crypto_free_hash(mdev->verify_tfm);
+		mdev->verify_tfm = verify_tfm;
+		verify_tfm = NULL;
+	}
+
+	if (fifo_size != mdev->rs_plan_s.size) {
+		kfree(mdev->rs_plan_s.values);
+		mdev->rs_plan_s.values = rs_plan_s;
+		mdev->rs_plan_s.size   = fifo_size;
+		mdev->rs_planed = 0;
+		rs_plan_s = NULL;
+	}
+
+	spin_unlock(&mdev->peer_seq_lock);
+
+	if (get_ldev(mdev)) {
+		wait_event(mdev->al_wait, lc_try_lock(mdev->act_log));
+		drbd_al_shrink(mdev);
+		err = drbd_check_al_size(mdev);
+		lc_unlock(mdev->act_log);
+		wake_up(&mdev->al_wait);
+
+		put_ldev(mdev);
+		drbd_md_sync(mdev);
+
+		if (err) {
+			retcode = ERR_NOMEM;
+			goto fail;
+		}
+	}
+
+	if (mdev->state.conn >= C_CONNECTED)
+		drbd_send_sync_param(mdev, &sc);
+
+	if (!cpumask_equal(mdev->cpu_mask, new_cpu_mask)) {
+		cpumask_copy(mdev->cpu_mask, new_cpu_mask);
+		drbd_calc_cpu_mask(mdev);
+		mdev->receiver.reset_cpu_mask = 1;
+		mdev->asender.reset_cpu_mask = 1;
+		mdev->worker.reset_cpu_mask = 1;
+	}
+
+	kobject_uevent(&disk_to_dev(mdev->vdisk)->kobj, KOBJ_CHANGE);
+fail:
+	kfree(rs_plan_s);
+	free_cpumask_var(new_cpu_mask);
+	crypto_free_hash(csums_tfm);
+	crypto_free_hash(verify_tfm);
+	reply->ret_code = retcode;
+	return 0;
+}
+
+static int drbd_nl_invalidate(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
+			      struct drbd_nl_cfg_reply *reply)
+{
+	int retcode;
+
+	/* If there is still bitmap IO pending, probably because of a previous
+	 * resync just being finished, wait for it before requesting a new resync. */
+	wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags));
+
+	retcode = _drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_T), CS_ORDERED);
+
+	if (retcode < SS_SUCCESS && retcode != SS_NEED_CONNECTION)
+		retcode = drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_T));
+
+	while (retcode == SS_NEED_CONNECTION) {
+		spin_lock_irq(&mdev->req_lock);
+		if (mdev->state.conn < C_CONNECTED)
+			retcode = _drbd_set_state(_NS(mdev, disk, D_INCONSISTENT), CS_VERBOSE, NULL);
+		spin_unlock_irq(&mdev->req_lock);
+
+		if (retcode != SS_NEED_CONNECTION)
+			break;
+
+		retcode = drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_T));
+	}
+
+	reply->ret_code = retcode;
+	return 0;
+}
+
+static int drbd_bmio_set_susp_al(struct drbd_conf *mdev)
+{
+	int rv;
+
+	rv = drbd_bmio_set_n_write(mdev);
+	drbd_suspend_al(mdev);
+	return rv;
+}
+
+static int drbd_nl_invalidate_peer(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
+				   struct drbd_nl_cfg_reply *reply)
+{
+	int retcode;
+
+	/* If there is still bitmap IO pending, probably because of a previous
+	 * resync just being finished, wait for it before requesting a new resync. */
+	wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags));
+
+	retcode = _drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_S), CS_ORDERED);
+
+	if (retcode < SS_SUCCESS) {
+		if (retcode == SS_NEED_CONNECTION && mdev->state.role == R_PRIMARY) {
+			/* The peer will get a resync upon connect anyways. Just make that
+			   into a full resync. */
+			retcode = drbd_request_state(mdev, NS(pdsk, D_INCONSISTENT));
+			if (retcode >= SS_SUCCESS) {
+				if (drbd_bitmap_io(mdev, &drbd_bmio_set_susp_al,
+					"set_n_write from invalidate_peer",
+					BM_LOCKED_SET_ALLOWED))
+					retcode = ERR_IO_MD_DISK;
+			}
+		} else
+			retcode = drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_S));
+	}
+
+	reply->ret_code = retcode;
+	return 0;
+}
+
+static int drbd_nl_pause_sync(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
+			      struct drbd_nl_cfg_reply *reply)
+{
+	int retcode = NO_ERROR;
+
+	if (drbd_request_state(mdev, NS(user_isp, 1)) == SS_NOTHING_TO_DO)
+		retcode = ERR_PAUSE_IS_SET;
+
+	reply->ret_code = retcode;
+	return 0;
+}
+
+static int drbd_nl_resume_sync(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
+			       struct drbd_nl_cfg_reply *reply)
+{
+	int retcode = NO_ERROR;
+	union drbd_state s;
+
+	if (drbd_request_state(mdev, NS(user_isp, 0)) == SS_NOTHING_TO_DO) {
+		s = mdev->state;
+		if (s.conn == C_PAUSED_SYNC_S || s.conn == C_PAUSED_SYNC_T) {
+			retcode = s.aftr_isp ? ERR_PIC_AFTER_DEP :
+				  s.peer_isp ? ERR_PIC_PEER_DEP : ERR_PAUSE_IS_CLEAR;
+		} else {
+			retcode = ERR_PAUSE_IS_CLEAR;
+		}
+	}
+
+	reply->ret_code = retcode;
+	return 0;
+}
+
+static int drbd_nl_suspend_io(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
+			      struct drbd_nl_cfg_reply *reply)
+{
+	reply->ret_code = drbd_request_state(mdev, NS(susp, 1));
+
+	return 0;
+}
+
+static int drbd_nl_resume_io(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
+			     struct drbd_nl_cfg_reply *reply)
+{
+	if (test_bit(NEW_CUR_UUID, &mdev->flags)) {
+		drbd_uuid_new_current(mdev);
+		clear_bit(NEW_CUR_UUID, &mdev->flags);
+	}
+	drbd_suspend_io(mdev);
+	reply->ret_code = drbd_request_state(mdev, NS3(susp, 0, susp_nod, 0, susp_fen, 0));
+	if (reply->ret_code == SS_SUCCESS) {
+		if (mdev->state.conn < C_CONNECTED)
+			tl_clear(mdev);
+		if (mdev->state.disk == D_DISKLESS || mdev->state.disk == D_FAILED)
+			tl_restart(mdev, fail_frozen_disk_io);
+	}
+	drbd_resume_io(mdev);
+
+	return 0;
+}
+
+static int drbd_nl_outdate(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
+			   struct drbd_nl_cfg_reply *reply)
+{
+	reply->ret_code = drbd_request_state(mdev, NS(disk, D_OUTDATED));
+	return 0;
+}
+
+static int drbd_nl_get_config(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
+			   struct drbd_nl_cfg_reply *reply)
+{
+	unsigned short *tl;
+
+	tl = reply->tag_list;
+
+	if (get_ldev(mdev)) {
+		tl = disk_conf_to_tags(mdev, &mdev->ldev->dc, tl);
+		put_ldev(mdev);
+	}
+
+	if (get_net_conf(mdev)) {
+		tl = net_conf_to_tags(mdev, mdev->net_conf, tl);
+		put_net_conf(mdev);
+	}
+	tl = syncer_conf_to_tags(mdev, &mdev->sync_conf, tl);
+
+	put_unaligned(TT_END, tl++); /* Close the tag list */
+
+	return (int)((char *)tl - (char *)reply->tag_list);
+}
+
+static int drbd_nl_get_state(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
+			     struct drbd_nl_cfg_reply *reply)
+{
+	unsigned short *tl = reply->tag_list;
+	union drbd_state s = mdev->state;
+	unsigned long rs_left;
+	unsigned int res;
+
+	tl = get_state_to_tags(mdev, (struct get_state *)&s, tl);
+
+	/* no local ref, no bitmap, no syncer progress. */
+	if (s.conn >= C_SYNC_SOURCE && s.conn <= C_PAUSED_SYNC_T) {
+		if (get_ldev(mdev)) {
+			drbd_get_syncer_progress(mdev, &rs_left, &res);
+			tl = tl_add_int(tl, T_sync_progress, &res);
+			put_ldev(mdev);
+		}
+	}
+	put_unaligned(TT_END, tl++); /* Close the tag list */
+
+	return (int)((char *)tl - (char *)reply->tag_list);
+}
+
+static int drbd_nl_get_uuids(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
+			     struct drbd_nl_cfg_reply *reply)
+{
+	unsigned short *tl;
+
+	tl = reply->tag_list;
+
+	if (get_ldev(mdev)) {
+		tl = tl_add_blob(tl, T_uuids, mdev->ldev->md.uuid, UI_SIZE*sizeof(u64));
+		tl = tl_add_int(tl, T_uuids_flags, &mdev->ldev->md.flags);
+		put_ldev(mdev);
+	}
+	put_unaligned(TT_END, tl++); /* Close the tag list */
+
+	return (int)((char *)tl - (char *)reply->tag_list);
+}
+
+/**
+ * drbd_nl_get_timeout_flag() - Used by drbdsetup to find out which timeout value to use
+ * @mdev:	DRBD device.
+ * @nlp:	Netlink/connector packet from drbdsetup
+ * @reply:	Reply packet for drbdsetup
+ */
+static int drbd_nl_get_timeout_flag(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
+				    struct drbd_nl_cfg_reply *reply)
+{
+	unsigned short *tl;
+	char rv;
+
+	tl = reply->tag_list;
+
+	rv = mdev->state.pdsk == D_OUTDATED        ? UT_PEER_OUTDATED :
+	  test_bit(USE_DEGR_WFC_T, &mdev->flags) ? UT_DEGRADED : UT_DEFAULT;
+
+	tl = tl_add_blob(tl, T_use_degraded, &rv, sizeof(rv));
+	put_unaligned(TT_END, tl++); /* Close the tag list */
+
+	return (int)((char *)tl - (char *)reply->tag_list);
+}
+
+static int drbd_nl_start_ov(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
+				    struct drbd_nl_cfg_reply *reply)
+{
+	/* default to resume from last known position, if possible */
+	struct start_ov args =
+		{ .start_sector = mdev->ov_start_sector };
+
+	if (!start_ov_from_tags(mdev, nlp->tag_list, &args)) {
+		reply->ret_code = ERR_MANDATORY_TAG;
+		return 0;
+	}
+
+	/* If there is still bitmap IO pending, e.g. previous resync or verify
+	 * just being finished, wait for it before requesting a new resync. */
+	wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags));
+
+	/* w_make_ov_request expects position to be aligned */
+	mdev->ov_start_sector = args.start_sector & ~BM_SECT_PER_BIT;
+	reply->ret_code = drbd_request_state(mdev,NS(conn,C_VERIFY_S));
+	return 0;
+}
+
+
+static int drbd_nl_new_c_uuid(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
+			      struct drbd_nl_cfg_reply *reply)
+{
+	int retcode = NO_ERROR;
+	int skip_initial_sync = 0;
+	int err;
+
+	struct new_c_uuid args;
+
+	memset(&args, 0, sizeof(struct new_c_uuid));
+	if (!new_c_uuid_from_tags(mdev, nlp->tag_list, &args)) {
+		reply->ret_code = ERR_MANDATORY_TAG;
+		return 0;
+	}
+
+	mutex_lock(&mdev->state_mutex); /* Protects us against serialized state changes. */
+
+	if (!get_ldev(mdev)) {
+		retcode = ERR_NO_DISK;
+		goto out;
+	}
+
+	/* this is "skip initial sync", assume to be clean */
+	if (mdev->state.conn == C_CONNECTED && mdev->agreed_pro_version >= 90 &&
+	    mdev->ldev->md.uuid[UI_CURRENT] == UUID_JUST_CREATED && args.clear_bm) {
+		dev_info(DEV, "Preparing to skip initial sync\n");
+		skip_initial_sync = 1;
+	} else if (mdev->state.conn != C_STANDALONE) {
+		retcode = ERR_CONNECTED;
+		goto out_dec;
+	}
+
+	drbd_uuid_set(mdev, UI_BITMAP, 0); /* Rotate UI_BITMAP to History 1, etc... */
+	drbd_uuid_new_current(mdev); /* New current, previous to UI_BITMAP */
+
+	if (args.clear_bm) {
+		err = drbd_bitmap_io(mdev, &drbd_bmio_clear_n_write,
+			"clear_n_write from new_c_uuid", BM_LOCKED_MASK);
+		if (err) {
+			dev_err(DEV, "Writing bitmap failed with %d\n",err);
+			retcode = ERR_IO_MD_DISK;
+		}
+		if (skip_initial_sync) {
+			drbd_send_uuids_skip_initial_sync(mdev);
+			_drbd_uuid_set(mdev, UI_BITMAP, 0);
+			drbd_print_uuids(mdev, "cleared bitmap UUID");
+			spin_lock_irq(&mdev->req_lock);
+			_drbd_set_state(_NS2(mdev, disk, D_UP_TO_DATE, pdsk, D_UP_TO_DATE),
+					CS_VERBOSE, NULL);
+			spin_unlock_irq(&mdev->req_lock);
+		}
+	}
+
+	drbd_md_sync(mdev);
+out_dec:
+	put_ldev(mdev);
+out:
+	mutex_unlock(&mdev->state_mutex);
+
+	reply->ret_code = retcode;
+	return 0;
+}
+
+struct cn_handler_struct {
+	int (*function)(struct drbd_conf *,
+			 struct drbd_nl_cfg_req *,
+			 struct drbd_nl_cfg_reply *);
+	int reply_body_size;
+};
+
+static struct cn_handler_struct cnd_table[] = {
+	[ P_primary ]		= { &drbd_nl_primary,		0 },
+	[ P_secondary ]		= { &drbd_nl_secondary,		0 },
+	[ P_disk_conf ]		= { &drbd_nl_disk_conf,		0 },
+	[ P_detach ]		= { &drbd_nl_detach,		0 },
+	[ P_net_conf ]		= { &drbd_nl_net_conf,		0 },
+	[ P_disconnect ]	= { &drbd_nl_disconnect,	0 },
+	[ P_resize ]		= { &drbd_nl_resize,		0 },
+	[ P_syncer_conf ]	= { &drbd_nl_syncer_conf,	0 },
+	[ P_invalidate ]	= { &drbd_nl_invalidate,	0 },
+	[ P_invalidate_peer ]	= { &drbd_nl_invalidate_peer,	0 },
+	[ P_pause_sync ]	= { &drbd_nl_pause_sync,	0 },
+	[ P_resume_sync ]	= { &drbd_nl_resume_sync,	0 },
+	[ P_suspend_io ]	= { &drbd_nl_suspend_io,	0 },
+	[ P_resume_io ]		= { &drbd_nl_resume_io,		0 },
+	[ P_outdate ]		= { &drbd_nl_outdate,		0 },
+	[ P_get_config ]	= { &drbd_nl_get_config,
+				    sizeof(struct syncer_conf_tag_len_struct) +
+				    sizeof(struct disk_conf_tag_len_struct) +
+				    sizeof(struct net_conf_tag_len_struct) },
+	[ P_get_state ]		= { &drbd_nl_get_state,
+				    sizeof(struct get_state_tag_len_struct) +
+				    sizeof(struct sync_progress_tag_len_struct)	},
+	[ P_get_uuids ]		= { &drbd_nl_get_uuids,
+				    sizeof(struct get_uuids_tag_len_struct) },
+	[ P_get_timeout_flag ]	= { &drbd_nl_get_timeout_flag,
+				    sizeof(struct get_timeout_flag_tag_len_struct)},
+	[ P_start_ov ]		= { &drbd_nl_start_ov,		0 },
+	[ P_new_c_uuid ]	= { &drbd_nl_new_c_uuid,	0 },
+};
+
+static void drbd_connector_callback(struct cn_msg *req, struct netlink_skb_parms *nsp)
+{
+	struct drbd_nl_cfg_req *nlp = (struct drbd_nl_cfg_req *)req->data;
+	struct cn_handler_struct *cm;
+	struct cn_msg *cn_reply;
+	struct drbd_nl_cfg_reply *reply;
+	struct drbd_conf *mdev;
+	int retcode, rr;
+	int reply_size = sizeof(struct cn_msg)
+		+ sizeof(struct drbd_nl_cfg_reply)
+		+ sizeof(short int);
+
+	if (!try_module_get(THIS_MODULE)) {
+		printk(KERN_ERR "drbd: try_module_get() failed!\n");
+		return;
+	}
+
+	if (!cap_raised(current_cap(), CAP_SYS_ADMIN)) {
+		retcode = ERR_PERM;
+		goto fail;
+	}
+
+	mdev = ensure_mdev(nlp->drbd_minor,
+			(nlp->flags & DRBD_NL_CREATE_DEVICE));
+	if (!mdev) {
+		retcode = ERR_MINOR_INVALID;
+		goto fail;
+	}
+
+	if (nlp->packet_type >= P_nl_after_last_packet ||
+	    nlp->packet_type == P_return_code_only) {
+		retcode = ERR_PACKET_NR;
+		goto fail;
+	}
+
+	cm = cnd_table + nlp->packet_type;
+
+	/* This may happen if packet number is 0: */
+	if (cm->function == NULL) {
+		retcode = ERR_PACKET_NR;
+		goto fail;
+	}
+
+	reply_size += cm->reply_body_size;
+
+	/* allocation not in the IO path, cqueue thread context */
+	cn_reply = kzalloc(reply_size, GFP_KERNEL);
+	if (!cn_reply) {
+		retcode = ERR_NOMEM;
+		goto fail;
+	}
+	reply = (struct drbd_nl_cfg_reply *) cn_reply->data;
+
+	reply->packet_type =
+		cm->reply_body_size ? nlp->packet_type : P_return_code_only;
+	reply->minor = nlp->drbd_minor;
+	reply->ret_code = NO_ERROR; /* Might by modified by cm->function. */
+	/* reply->tag_list; might be modified by cm->function. */
+
+	rr = cm->function(mdev, nlp, reply);
+
+	cn_reply->id = req->id;
+	cn_reply->seq = req->seq;
+	cn_reply->ack = req->ack  + 1;
+	cn_reply->len = sizeof(struct drbd_nl_cfg_reply) + rr;
+	cn_reply->flags = 0;
+
+	rr = cn_netlink_send(cn_reply, CN_IDX_DRBD, GFP_KERNEL);
+	if (rr && rr != -ESRCH)
+		printk(KERN_INFO "drbd: cn_netlink_send()=%d\n", rr);
+
+	kfree(cn_reply);
+	module_put(THIS_MODULE);
+	return;
+ fail:
+	drbd_nl_send_reply(req, retcode);
+	module_put(THIS_MODULE);
+}
+
+static atomic_t drbd_nl_seq = ATOMIC_INIT(2); /* two. */
+
+static unsigned short *
+__tl_add_blob(unsigned short *tl, enum drbd_tags tag, const void *data,
+	unsigned short len, int nul_terminated)
+{
+	unsigned short l = tag_descriptions[tag_number(tag)].max_len;
+	len = (len < l) ? len :  l;
+	put_unaligned(tag, tl++);
+	put_unaligned(len, tl++);
+	memcpy(tl, data, len);
+	tl = (unsigned short*)((char*)tl + len);
+	if (nul_terminated)
+		*((char*)tl - 1) = 0;
+	return tl;
+}
+
+static unsigned short *
+tl_add_blob(unsigned short *tl, enum drbd_tags tag, const void *data, int len)
+{
+	return __tl_add_blob(tl, tag, data, len, 0);
+}
+
+static unsigned short *
+tl_add_str(unsigned short *tl, enum drbd_tags tag, const char *str)
+{
+	return __tl_add_blob(tl, tag, str, strlen(str)+1, 0);
+}
+
+static unsigned short *
+tl_add_int(unsigned short *tl, enum drbd_tags tag, const void *val)
+{
+	put_unaligned(tag, tl++);
+	switch(tag_type(tag)) {
+	case TT_INTEGER:
+		put_unaligned(sizeof(int), tl++);
+		put_unaligned(*(int *)val, (int *)tl);
+		tl = (unsigned short*)((char*)tl+sizeof(int));
+		break;
+	case TT_INT64:
+		put_unaligned(sizeof(u64), tl++);
+		put_unaligned(*(u64 *)val, (u64 *)tl);
+		tl = (unsigned short*)((char*)tl+sizeof(u64));
+		break;
+	default:
+		/* someone did something stupid. */
+		;
+	}
+	return tl;
+}
+
+void drbd_bcast_state(struct drbd_conf *mdev, union drbd_state state)
+{
+	char buffer[sizeof(struct cn_msg)+
+		    sizeof(struct drbd_nl_cfg_reply)+
+		    sizeof(struct get_state_tag_len_struct)+
+		    sizeof(short int)];
+	struct cn_msg *cn_reply = (struct cn_msg *) buffer;
+	struct drbd_nl_cfg_reply *reply =
+		(struct drbd_nl_cfg_reply *)cn_reply->data;
+	unsigned short *tl = reply->tag_list;
+
+	/* dev_warn(DEV, "drbd_bcast_state() got called\n"); */
+
+	tl = get_state_to_tags(mdev, (struct get_state *)&state, tl);
+
+	put_unaligned(TT_END, tl++); /* Close the tag list */
+
+	cn_reply->id.idx = CN_IDX_DRBD;
+	cn_reply->id.val = CN_VAL_DRBD;
+
+	cn_reply->seq = atomic_add_return(1, &drbd_nl_seq);
+	cn_reply->ack = 0; /* not used here. */
+	cn_reply->len = sizeof(struct drbd_nl_cfg_reply) +
+		(int)((char *)tl - (char *)reply->tag_list);
+	cn_reply->flags = 0;
+
+	reply->packet_type = P_get_state;
+	reply->minor = mdev_to_minor(mdev);
+	reply->ret_code = NO_ERROR;
+
+	cn_netlink_send(cn_reply, CN_IDX_DRBD, GFP_NOIO);
+}
+
+void drbd_bcast_ev_helper(struct drbd_conf *mdev, char *helper_name)
+{
+	char buffer[sizeof(struct cn_msg)+
+		    sizeof(struct drbd_nl_cfg_reply)+
+		    sizeof(struct call_helper_tag_len_struct)+
+		    sizeof(short int)];
+	struct cn_msg *cn_reply = (struct cn_msg *) buffer;
+	struct drbd_nl_cfg_reply *reply =
+		(struct drbd_nl_cfg_reply *)cn_reply->data;
+	unsigned short *tl = reply->tag_list;
+
+	/* dev_warn(DEV, "drbd_bcast_state() got called\n"); */
+
+	tl = tl_add_str(tl, T_helper, helper_name);
+	put_unaligned(TT_END, tl++); /* Close the tag list */
+
+	cn_reply->id.idx = CN_IDX_DRBD;
+	cn_reply->id.val = CN_VAL_DRBD;
+
+	cn_reply->seq = atomic_add_return(1, &drbd_nl_seq);
+	cn_reply->ack = 0; /* not used here. */
+	cn_reply->len = sizeof(struct drbd_nl_cfg_reply) +
+		(int)((char *)tl - (char *)reply->tag_list);
+	cn_reply->flags = 0;
+
+	reply->packet_type = P_call_helper;
+	reply->minor = mdev_to_minor(mdev);
+	reply->ret_code = NO_ERROR;
+
+	cn_netlink_send(cn_reply, CN_IDX_DRBD, GFP_NOIO);
+}
+
+void drbd_bcast_ee(struct drbd_conf *mdev,
+		const char *reason, const int dgs,
+		const char* seen_hash, const char* calc_hash,
+		const struct drbd_epoch_entry* e)
+{
+	struct cn_msg *cn_reply;
+	struct drbd_nl_cfg_reply *reply;
+	unsigned short *tl;
+	struct page *page;
+	unsigned len;
+
+	if (!e)
+		return;
+	if (!reason || !reason[0])
+		return;
+
+	/* apparently we have to memcpy twice, first to prepare the data for the
+	 * struct cn_msg, then within cn_netlink_send from the cn_msg to the
+	 * netlink skb. */
+	/* receiver thread context, which is not in the writeout path (of this node),
+	 * but may be in the writeout path of the _other_ node.
+	 * GFP_NOIO to avoid potential "distributed deadlock". */
+	cn_reply = kzalloc(
+		sizeof(struct cn_msg)+
+		sizeof(struct drbd_nl_cfg_reply)+
+		sizeof(struct dump_ee_tag_len_struct)+
+		sizeof(short int),
+		GFP_NOIO);
+
+	if (!cn_reply) {
+		dev_err(DEV, "could not kmalloc buffer for drbd_bcast_ee, sector %llu, size %u\n",
+				(unsigned long long)e->sector, e->size);
+		return;
+	}
+
+	reply = (struct drbd_nl_cfg_reply*)cn_reply->data;
+	tl = reply->tag_list;
+
+	tl = tl_add_str(tl, T_dump_ee_reason, reason);
+	tl = tl_add_blob(tl, T_seen_digest, seen_hash, dgs);
+	tl = tl_add_blob(tl, T_calc_digest, calc_hash, dgs);
+	tl = tl_add_int(tl, T_ee_sector, &e->sector);
+	tl = tl_add_int(tl, T_ee_block_id, &e->block_id);
+
+	/* dump the first 32k */
+	len = min_t(unsigned, e->size, 32 << 10);
+	put_unaligned(T_ee_data, tl++);
+	put_unaligned(len, tl++);
+
+	page = e->pages;
+	page_chain_for_each(page) {
+		void *d = kmap_atomic(page, KM_USER0);
+		unsigned l = min_t(unsigned, len, PAGE_SIZE);
+		memcpy(tl, d, l);
+		kunmap_atomic(d, KM_USER0);
+		tl = (unsigned short*)((char*)tl + l);
+		len -= l;
+		if (len == 0)
+			break;
+	}
+	put_unaligned(TT_END, tl++); /* Close the tag list */
+
+	cn_reply->id.idx = CN_IDX_DRBD;
+	cn_reply->id.val = CN_VAL_DRBD;
+
+	cn_reply->seq = atomic_add_return(1,&drbd_nl_seq);
+	cn_reply->ack = 0; // not used here.
+	cn_reply->len = sizeof(struct drbd_nl_cfg_reply) +
+		(int)((char*)tl - (char*)reply->tag_list);
+	cn_reply->flags = 0;
+
+	reply->packet_type = P_dump_ee;
+	reply->minor = mdev_to_minor(mdev);
+	reply->ret_code = NO_ERROR;
+
+	cn_netlink_send(cn_reply, CN_IDX_DRBD, GFP_NOIO);
+	kfree(cn_reply);
+}
+
+void drbd_bcast_sync_progress(struct drbd_conf *mdev)
+{
+	char buffer[sizeof(struct cn_msg)+
+		    sizeof(struct drbd_nl_cfg_reply)+
+		    sizeof(struct sync_progress_tag_len_struct)+
+		    sizeof(short int)];
+	struct cn_msg *cn_reply = (struct cn_msg *) buffer;
+	struct drbd_nl_cfg_reply *reply =
+		(struct drbd_nl_cfg_reply *)cn_reply->data;
+	unsigned short *tl = reply->tag_list;
+	unsigned long rs_left;
+	unsigned int res;
+
+	/* no local ref, no bitmap, no syncer progress, no broadcast. */
+	if (!get_ldev(mdev))
+		return;
+	drbd_get_syncer_progress(mdev, &rs_left, &res);
+	put_ldev(mdev);
+
+	tl = tl_add_int(tl, T_sync_progress, &res);
+	put_unaligned(TT_END, tl++); /* Close the tag list */
+
+	cn_reply->id.idx = CN_IDX_DRBD;
+	cn_reply->id.val = CN_VAL_DRBD;
+
+	cn_reply->seq = atomic_add_return(1, &drbd_nl_seq);
+	cn_reply->ack = 0; /* not used here. */
+	cn_reply->len = sizeof(struct drbd_nl_cfg_reply) +
+		(int)((char *)tl - (char *)reply->tag_list);
+	cn_reply->flags = 0;
+
+	reply->packet_type = P_sync_progress;
+	reply->minor = mdev_to_minor(mdev);
+	reply->ret_code = NO_ERROR;
+
+	cn_netlink_send(cn_reply, CN_IDX_DRBD, GFP_NOIO);
+}
+
+int __init drbd_nl_init(void)
+{
+	static struct cb_id cn_id_drbd;
+	int err, try=10;
+
+	cn_id_drbd.val = CN_VAL_DRBD;
+	do {
+		cn_id_drbd.idx = cn_idx;
+		err = cn_add_callback(&cn_id_drbd, "cn_drbd", &drbd_connector_callback);
+		if (!err)
+			break;
+		cn_idx = (cn_idx + CN_IDX_STEP);
+	} while (try--);
+
+	if (err) {
+		printk(KERN_ERR "drbd: cn_drbd failed to register\n");
+		return err;
+	}
+
+	return 0;
+}
+
+void drbd_nl_cleanup(void)
+{
+	static struct cb_id cn_id_drbd;
+
+	cn_id_drbd.idx = cn_idx;
+	cn_id_drbd.val = CN_VAL_DRBD;
+
+	cn_del_callback(&cn_id_drbd);
+}
+
+void drbd_nl_send_reply(struct cn_msg *req, int ret_code)
+{
+	char buffer[sizeof(struct cn_msg)+sizeof(struct drbd_nl_cfg_reply)];
+	struct cn_msg *cn_reply = (struct cn_msg *) buffer;
+	struct drbd_nl_cfg_reply *reply =
+		(struct drbd_nl_cfg_reply *)cn_reply->data;
+	int rr;
+
+	memset(buffer, 0, sizeof(buffer));
+	cn_reply->id = req->id;
+
+	cn_reply->seq = req->seq;
+	cn_reply->ack = req->ack  + 1;
+	cn_reply->len = sizeof(struct drbd_nl_cfg_reply);
+	cn_reply->flags = 0;
+
+	reply->packet_type = P_return_code_only;
+	reply->minor = ((struct drbd_nl_cfg_req *)req->data)->drbd_minor;
+	reply->ret_code = ret_code;
+
+	rr = cn_netlink_send(cn_reply, CN_IDX_DRBD, GFP_NOIO);
+	if (rr && rr != -ESRCH)
+		printk(KERN_INFO "drbd: cn_netlink_send()=%d\n", rr);
+}
+
diff --git a/drivers/block/drbd/drbd_proc.c b/drivers/block/drbd/drbd_proc.c
new file mode 100644
index 00000000..2959cdfb
--- /dev/null
+++ b/drivers/block/drbd/drbd_proc.c
@@ -0,0 +1,319 @@
+/*
+   drbd_proc.c
+
+   This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
+
+   Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
+   Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
+   Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
+
+   drbd is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2, or (at your option)
+   any later version.
+
+   drbd is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with drbd; see the file COPYING.  If not, write to
+   the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ */
+
+#include <linux/module.h>
+
+#include <asm/uaccess.h>
+#include <linux/fs.h>
+#include <linux/file.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/drbd.h>
+#include "drbd_int.h"
+
+static int drbd_proc_open(struct inode *inode, struct file *file);
+static int drbd_proc_release(struct inode *inode, struct file *file);
+
+
+struct proc_dir_entry *drbd_proc;
+const struct file_operations drbd_proc_fops = {
+	.owner		= THIS_MODULE,
+	.open		= drbd_proc_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= drbd_proc_release,
+};
+
+void seq_printf_with_thousands_grouping(struct seq_file *seq, long v)
+{
+	/* v is in kB/sec. We don't expect TiByte/sec yet. */
+	if (unlikely(v >= 1000000)) {
+		/* cool: > GiByte/s */
+		seq_printf(seq, "%ld,", v / 1000000);
+		v /= 1000000;
+		seq_printf(seq, "%03ld,%03ld", v/1000, v % 1000);
+	} else if (likely(v >= 1000))
+		seq_printf(seq, "%ld,%03ld", v/1000, v % 1000);
+	else
+		seq_printf(seq, "%ld", v);
+}
+
+/*lge
+ * progress bars shamelessly adapted from driver/md/md.c
+ * output looks like
+ *	[=====>..............] 33.5% (23456/123456)
+ *	finish: 2:20:20 speed: 6,345 (6,456) K/sec
+ */
+static void drbd_syncer_progress(struct drbd_conf *mdev, struct seq_file *seq)
+{
+	unsigned long db, dt, dbdt, rt, rs_left;
+	unsigned int res;
+	int i, x, y;
+	int stalled = 0;
+
+	drbd_get_syncer_progress(mdev, &rs_left, &res);
+
+	x = res/50;
+	y = 20-x;
+	seq_printf(seq, "\t[");
+	for (i = 1; i < x; i++)
+		seq_printf(seq, "=");
+	seq_printf(seq, ">");
+	for (i = 0; i < y; i++)
+		seq_printf(seq, ".");
+	seq_printf(seq, "] ");
+
+	if (mdev->state.conn == C_VERIFY_S || mdev->state.conn == C_VERIFY_T)
+		seq_printf(seq, "verified:");
+	else
+		seq_printf(seq, "sync'ed:");
+	seq_printf(seq, "%3u.%u%% ", res / 10, res % 10);
+
+	/* if more than a few GB, display in MB */
+	if (mdev->rs_total > (4UL << (30 - BM_BLOCK_SHIFT)))
+		seq_printf(seq, "(%lu/%lu)M",
+			    (unsigned long) Bit2KB(rs_left >> 10),
+			    (unsigned long) Bit2KB(mdev->rs_total >> 10));
+	else
+		seq_printf(seq, "(%lu/%lu)K\n\t",
+			    (unsigned long) Bit2KB(rs_left),
+			    (unsigned long) Bit2KB(mdev->rs_total));
+
+	/* see drivers/md/md.c
+	 * We do not want to overflow, so the order of operands and
+	 * the * 100 / 100 trick are important. We do a +1 to be
+	 * safe against division by zero. We only estimate anyway.
+	 *
+	 * dt: time from mark until now
+	 * db: blocks written from mark until now
+	 * rt: remaining time
+	 */
+	/* Rolling marks. last_mark+1 may just now be modified.  last_mark+2 is
+	 * at least (DRBD_SYNC_MARKS-2)*DRBD_SYNC_MARK_STEP old, and has at
+	 * least DRBD_SYNC_MARK_STEP time before it will be modified. */
+	/* ------------------------ ~18s average ------------------------ */
+	i = (mdev->rs_last_mark + 2) % DRBD_SYNC_MARKS;
+	dt = (jiffies - mdev->rs_mark_time[i]) / HZ;
+	if (dt > (DRBD_SYNC_MARK_STEP * DRBD_SYNC_MARKS))
+		stalled = 1;
+
+	if (!dt)
+		dt++;
+	db = mdev->rs_mark_left[i] - rs_left;
+	rt = (dt * (rs_left / (db/100+1)))/100; /* seconds */
+
+	seq_printf(seq, "finish: %lu:%02lu:%02lu",
+		rt / 3600, (rt % 3600) / 60, rt % 60);
+
+	dbdt = Bit2KB(db/dt);
+	seq_printf(seq, " speed: ");
+	seq_printf_with_thousands_grouping(seq, dbdt);
+	seq_printf(seq, " (");
+	/* ------------------------- ~3s average ------------------------ */
+	if (proc_details >= 1) {
+		/* this is what drbd_rs_should_slow_down() uses */
+		i = (mdev->rs_last_mark + DRBD_SYNC_MARKS-1) % DRBD_SYNC_MARKS;
+		dt = (jiffies - mdev->rs_mark_time[i]) / HZ;
+		if (!dt)
+			dt++;
+		db = mdev->rs_mark_left[i] - rs_left;
+		dbdt = Bit2KB(db/dt);
+		seq_printf_with_thousands_grouping(seq, dbdt);
+		seq_printf(seq, " -- ");
+	}
+
+	/* --------------------- long term average ---------------------- */
+	/* mean speed since syncer started
+	 * we do account for PausedSync periods */
+	dt = (jiffies - mdev->rs_start - mdev->rs_paused) / HZ;
+	if (dt == 0)
+		dt = 1;
+	db = mdev->rs_total - rs_left;
+	dbdt = Bit2KB(db/dt);
+	seq_printf_with_thousands_grouping(seq, dbdt);
+	seq_printf(seq, ")");
+
+	if (mdev->state.conn == C_SYNC_TARGET ||
+	    mdev->state.conn == C_VERIFY_S) {
+		seq_printf(seq, " want: ");
+		seq_printf_with_thousands_grouping(seq, mdev->c_sync_rate);
+	}
+	seq_printf(seq, " K/sec%s\n", stalled ? " (stalled)" : "");
+
+	if (proc_details >= 1) {
+		/* 64 bit:
+		 * we convert to sectors in the display below. */
+		unsigned long bm_bits = drbd_bm_bits(mdev);
+		unsigned long bit_pos;
+		if (mdev->state.conn == C_VERIFY_S ||
+		    mdev->state.conn == C_VERIFY_T)
+			bit_pos = bm_bits - mdev->ov_left;
+		else
+			bit_pos = mdev->bm_resync_fo;
+		/* Total sectors may be slightly off for oddly
+		 * sized devices. So what. */
+		seq_printf(seq,
+			"\t%3d%% sector pos: %llu/%llu\n",
+			(int)(bit_pos / (bm_bits/100+1)),
+			(unsigned long long)bit_pos * BM_SECT_PER_BIT,
+			(unsigned long long)bm_bits * BM_SECT_PER_BIT);
+	}
+}
+
+static void resync_dump_detail(struct seq_file *seq, struct lc_element *e)
+{
+	struct bm_extent *bme = lc_entry(e, struct bm_extent, lce);
+
+	seq_printf(seq, "%5d %s %s\n", bme->rs_left,
+		   bme->flags & BME_NO_WRITES ? "NO_WRITES" : "---------",
+		   bme->flags & BME_LOCKED ? "LOCKED" : "------"
+		   );
+}
+
+static int drbd_seq_show(struct seq_file *seq, void *v)
+{
+	int i, hole = 0;
+	const char *sn;
+	struct drbd_conf *mdev;
+
+	static char write_ordering_chars[] = {
+		[WO_none] = 'n',
+		[WO_drain_io] = 'd',
+		[WO_bdev_flush] = 'f',
+	};
+
+	seq_printf(seq, "version: " REL_VERSION " (api:%d/proto:%d-%d)\n%s\n",
+		   API_VERSION, PRO_VERSION_MIN, PRO_VERSION_MAX, drbd_buildtag());
+
+	/*
+	  cs .. connection state
+	  ro .. node role (local/remote)
+	  ds .. disk state (local/remote)
+	     protocol
+	     various flags
+	  ns .. network send
+	  nr .. network receive
+	  dw .. disk write
+	  dr .. disk read
+	  al .. activity log write count
+	  bm .. bitmap update write count
+	  pe .. pending (waiting for ack or data reply)
+	  ua .. unack'd (still need to send ack or data reply)
+	  ap .. application requests accepted, but not yet completed
+	  ep .. number of epochs currently "on the fly", P_BARRIER_ACK pending
+	  wo .. write ordering mode currently in use
+	 oos .. known out-of-sync kB
+	*/
+
+	for (i = 0; i < minor_count; i++) {
+		mdev = minor_to_mdev(i);
+		if (!mdev) {
+			hole = 1;
+			continue;
+		}
+		if (hole) {
+			hole = 0;
+			seq_printf(seq, "\n");
+		}
+
+		sn = drbd_conn_str(mdev->state.conn);
+
+		if (mdev->state.conn == C_STANDALONE &&
+		    mdev->state.disk == D_DISKLESS &&
+		    mdev->state.role == R_SECONDARY) {
+			seq_printf(seq, "%2d: cs:Unconfigured\n", i);
+		} else {
+			seq_printf(seq,
+			   "%2d: cs:%s ro:%s/%s ds:%s/%s %c %c%c%c%c%c%c\n"
+			   "    ns:%u nr:%u dw:%u dr:%u al:%u bm:%u "
+			   "lo:%d pe:%d ua:%d ap:%d ep:%d wo:%c",
+			   i, sn,
+			   drbd_role_str(mdev->state.role),
+			   drbd_role_str(mdev->state.peer),
+			   drbd_disk_str(mdev->state.disk),
+			   drbd_disk_str(mdev->state.pdsk),
+			   (mdev->net_conf == NULL ? ' ' :
+			    (mdev->net_conf->wire_protocol - DRBD_PROT_A+'A')),
+			   is_susp(mdev->state) ? 's' : 'r',
+			   mdev->state.aftr_isp ? 'a' : '-',
+			   mdev->state.peer_isp ? 'p' : '-',
+			   mdev->state.user_isp ? 'u' : '-',
+			   mdev->congestion_reason ?: '-',
+			   test_bit(AL_SUSPENDED, &mdev->flags) ? 's' : '-',
+			   mdev->send_cnt/2,
+			   mdev->recv_cnt/2,
+			   mdev->writ_cnt/2,
+			   mdev->read_cnt/2,
+			   mdev->al_writ_cnt,
+			   mdev->bm_writ_cnt,
+			   atomic_read(&mdev->local_cnt),
+			   atomic_read(&mdev->ap_pending_cnt) +
+			   atomic_read(&mdev->rs_pending_cnt),
+			   atomic_read(&mdev->unacked_cnt),
+			   atomic_read(&mdev->ap_bio_cnt),
+			   mdev->epochs,
+			   write_ordering_chars[mdev->write_ordering]
+			);
+			seq_printf(seq, " oos:%llu\n",
+				   Bit2KB((unsigned long long)
+					   drbd_bm_total_weight(mdev)));
+		}
+		if (mdev->state.conn == C_SYNC_SOURCE ||
+		    mdev->state.conn == C_SYNC_TARGET ||
+		    mdev->state.conn == C_VERIFY_S ||
+		    mdev->state.conn == C_VERIFY_T)
+			drbd_syncer_progress(mdev, seq);
+
+		if (proc_details >= 1 && get_ldev_if_state(mdev, D_FAILED)) {
+			lc_seq_printf_stats(seq, mdev->resync);
+			lc_seq_printf_stats(seq, mdev->act_log);
+			put_ldev(mdev);
+		}
+
+		if (proc_details >= 2) {
+			if (mdev->resync) {
+				lc_seq_dump_details(seq, mdev->resync, "rs_left",
+					resync_dump_detail);
+			}
+		}
+	}
+
+	return 0;
+}
+
+static int drbd_proc_open(struct inode *inode, struct file *file)
+{
+	if (try_module_get(THIS_MODULE))
+		return single_open(file, drbd_seq_show, PDE(inode)->data);
+	return -ENODEV;
+}
+
+static int drbd_proc_release(struct inode *inode, struct file *file)
+{
+	module_put(THIS_MODULE);
+	return single_release(inode, file);
+}
+
+/* PROC FS stuff end */
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
new file mode 100644
index 00000000..43beaca5
--- /dev/null
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -0,0 +1,4678 @@
+/*
+   drbd_receiver.c
+
+   This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
+
+   Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
+   Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
+   Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
+
+   drbd is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2, or (at your option)
+   any later version.
+
+   drbd is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with drbd; see the file COPYING.  If not, write to
+   the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+
+#include <linux/module.h>
+
+#include <asm/uaccess.h>
+#include <net/sock.h>
+
+#include <linux/drbd.h>
+#include <linux/fs.h>
+#include <linux/file.h>
+#include <linux/in.h>
+#include <linux/mm.h>
+#include <linux/memcontrol.h>
+#include <linux/mm_inline.h>
+#include <linux/slab.h>
+#include <linux/pkt_sched.h>
+#define __KERNEL_SYSCALLS__
+#include <linux/unistd.h>
+#include <linux/vmalloc.h>
+#include <linux/random.h>
+#include <linux/string.h>
+#include <linux/scatterlist.h>
+#include "drbd_int.h"
+#include "drbd_req.h"
+
+#include "drbd_vli.h"
+
+enum finish_epoch {
+	FE_STILL_LIVE,
+	FE_DESTROYED,
+	FE_RECYCLED,
+};
+
+static int drbd_do_handshake(struct drbd_conf *mdev);
+static int drbd_do_auth(struct drbd_conf *mdev);
+
+static enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *, struct drbd_epoch *, enum epoch_event);
+static int e_end_block(struct drbd_conf *, struct drbd_work *, int);
+
+
+#define GFP_TRY	(__GFP_HIGHMEM | __GFP_NOWARN)
+
+/*
+ * some helper functions to deal with single linked page lists,
+ * page->private being our "next" pointer.
+ */
+
+/* If at least n pages are linked at head, get n pages off.
+ * Otherwise, don't modify head, and return NULL.
+ * Locking is the responsibility of the caller.
+ */
+static struct page *page_chain_del(struct page **head, int n)
+{
+	struct page *page;
+	struct page *tmp;
+
+	BUG_ON(!n);
+	BUG_ON(!head);
+
+	page = *head;
+
+	if (!page)
+		return NULL;
+
+	while (page) {
+		tmp = page_chain_next(page);
+		if (--n == 0)
+			break; /* found sufficient pages */
+		if (tmp == NULL)
+			/* insufficient pages, don't use any of them. */
+			return NULL;
+		page = tmp;
+	}
+
+	/* add end of list marker for the returned list */
+	set_page_private(page, 0);
+	/* actual return value, and adjustment of head */
+	page = *head;
+	*head = tmp;
+	return page;
+}
+
+/* may be used outside of locks to find the tail of a (usually short)
+ * "private" page chain, before adding it back to a global chain head
+ * with page_chain_add() under a spinlock. */
+static struct page *page_chain_tail(struct page *page, int *len)
+{
+	struct page *tmp;
+	int i = 1;
+	while ((tmp = page_chain_next(page)))
+		++i, page = tmp;
+	if (len)
+		*len = i;
+	return page;
+}
+
+static int page_chain_free(struct page *page)
+{
+	struct page *tmp;
+	int i = 0;
+	page_chain_for_each_safe(page, tmp) {
+		put_page(page);
+		++i;
+	}
+	return i;
+}
+
+static void page_chain_add(struct page **head,
+		struct page *chain_first, struct page *chain_last)
+{
+#if 1
+	struct page *tmp;
+	tmp = page_chain_tail(chain_first, NULL);
+	BUG_ON(tmp != chain_last);
+#endif
+
+	/* add chain to head */
+	set_page_private(chain_last, (unsigned long)*head);
+	*head = chain_first;
+}
+
+static struct page *drbd_pp_first_pages_or_try_alloc(struct drbd_conf *mdev, int number)
+{
+	struct page *page = NULL;
+	struct page *tmp = NULL;
+	int i = 0;
+
+	/* Yes, testing drbd_pp_vacant outside the lock is racy.
+	 * So what. It saves a spin_lock. */
+	if (drbd_pp_vacant >= number) {
+		spin_lock(&drbd_pp_lock);
+		page = page_chain_del(&drbd_pp_pool, number);
+		if (page)
+			drbd_pp_vacant -= number;
+		spin_unlock(&drbd_pp_lock);
+		if (page)
+			return page;
+	}
+
+	/* GFP_TRY, because we must not cause arbitrary write-out: in a DRBD
+	 * "criss-cross" setup, that might cause write-out on some other DRBD,
+	 * which in turn might block on the other node at this very place.  */
+	for (i = 0; i < number; i++) {
+		tmp = alloc_page(GFP_TRY);
+		if (!tmp)
+			break;
+		set_page_private(tmp, (unsigned long)page);
+		page = tmp;
+	}
+
+	if (i == number)
+		return page;
+
+	/* Not enough pages immediately available this time.
+	 * No need to jump around here, drbd_pp_alloc will retry this
+	 * function "soon". */
+	if (page) {
+		tmp = page_chain_tail(page, NULL);
+		spin_lock(&drbd_pp_lock);
+		page_chain_add(&drbd_pp_pool, page, tmp);
+		drbd_pp_vacant += i;
+		spin_unlock(&drbd_pp_lock);
+	}
+	return NULL;
+}
+
+static void reclaim_net_ee(struct drbd_conf *mdev, struct list_head *to_be_freed)
+{
+	struct drbd_epoch_entry *e;
+	struct list_head *le, *tle;
+
+	/* The EEs are always appended to the end of the list. Since
+	   they are sent in order over the wire, they have to finish
+	   in order. As soon as we see the first not finished we can
+	   stop to examine the list... */
+
+	list_for_each_safe(le, tle, &mdev->net_ee) {
+		e = list_entry(le, struct drbd_epoch_entry, w.list);
+		if (drbd_ee_has_active_page(e))
+			break;
+		list_move(le, to_be_freed);
+	}
+}
+
+static void drbd_kick_lo_and_reclaim_net(struct drbd_conf *mdev)
+{
+	LIST_HEAD(reclaimed);
+	struct drbd_epoch_entry *e, *t;
+
+	spin_lock_irq(&mdev->req_lock);
+	reclaim_net_ee(mdev, &reclaimed);
+	spin_unlock_irq(&mdev->req_lock);
+
+	list_for_each_entry_safe(e, t, &reclaimed, w.list)
+		drbd_free_net_ee(mdev, e);
+}
+
+/**
+ * drbd_pp_alloc() - Returns @number pages, retries forever (or until signalled)
+ * @mdev:	DRBD device.
+ * @number:	number of pages requested
+ * @retry:	whether to retry, if not enough pages are available right now
+ *
+ * Tries to allocate number pages, first from our own page pool, then from
+ * the kernel, unless this allocation would exceed the max_buffers setting.
+ * Possibly retry until DRBD frees sufficient pages somewhere else.
+ *
+ * Returns a page chain linked via page->private.
+ */
+static struct page *drbd_pp_alloc(struct drbd_conf *mdev, unsigned number, bool retry)
+{
+	struct page *page = NULL;
+	DEFINE_WAIT(wait);
+
+	/* Yes, we may run up to @number over max_buffers. If we
+	 * follow it strictly, the admin will get it wrong anyways. */
+	if (atomic_read(&mdev->pp_in_use) < mdev->net_conf->max_buffers)
+		page = drbd_pp_first_pages_or_try_alloc(mdev, number);
+
+	while (page == NULL) {
+		prepare_to_wait(&drbd_pp_wait, &wait, TASK_INTERRUPTIBLE);
+
+		drbd_kick_lo_and_reclaim_net(mdev);
+
+		if (atomic_read(&mdev->pp_in_use) < mdev->net_conf->max_buffers) {
+			page = drbd_pp_first_pages_or_try_alloc(mdev, number);
+			if (page)
+				break;
+		}
+
+		if (!retry)
+			break;
+
+		if (signal_pending(current)) {
+			dev_warn(DEV, "drbd_pp_alloc interrupted!\n");
+			break;
+		}
+
+		schedule();
+	}
+	finish_wait(&drbd_pp_wait, &wait);
+
+	if (page)
+		atomic_add(number, &mdev->pp_in_use);
+	return page;
+}
+
+/* Must not be used from irq, as that may deadlock: see drbd_pp_alloc.
+ * Is also used from inside an other spin_lock_irq(&mdev->req_lock);
+ * Either links the page chain back to the global pool,
+ * or returns all pages to the system. */
+static void drbd_pp_free(struct drbd_conf *mdev, struct page *page, int is_net)
+{
+	atomic_t *a = is_net ? &mdev->pp_in_use_by_net : &mdev->pp_in_use;
+	int i;
+
+	if (drbd_pp_vacant > (DRBD_MAX_BIO_SIZE/PAGE_SIZE)*minor_count)
+		i = page_chain_free(page);
+	else {
+		struct page *tmp;
+		tmp = page_chain_tail(page, &i);
+		spin_lock(&drbd_pp_lock);
+		page_chain_add(&drbd_pp_pool, page, tmp);
+		drbd_pp_vacant += i;
+		spin_unlock(&drbd_pp_lock);
+	}
+	i = atomic_sub_return(i, a);
+	if (i < 0)
+		dev_warn(DEV, "ASSERTION FAILED: %s: %d < 0\n",
+			is_net ? "pp_in_use_by_net" : "pp_in_use", i);
+	wake_up(&drbd_pp_wait);
+}
+
+/*
+You need to hold the req_lock:
+ _drbd_wait_ee_list_empty()
+
+You must not have the req_lock:
+ drbd_free_ee()
+ drbd_alloc_ee()
+ drbd_init_ee()
+ drbd_release_ee()
+ drbd_ee_fix_bhs()
+ drbd_process_done_ee()
+ drbd_clear_done_ee()
+ drbd_wait_ee_list_empty()
+*/
+
+struct drbd_epoch_entry *drbd_alloc_ee(struct drbd_conf *mdev,
+				     u64 id,
+				     sector_t sector,
+				     unsigned int data_size,
+				     gfp_t gfp_mask) __must_hold(local)
+{
+	struct drbd_epoch_entry *e;
+	struct page *page;
+	unsigned nr_pages = (data_size + PAGE_SIZE -1) >> PAGE_SHIFT;
+
+	if (drbd_insert_fault(mdev, DRBD_FAULT_AL_EE))
+		return NULL;
+
+	e = mempool_alloc(drbd_ee_mempool, gfp_mask & ~__GFP_HIGHMEM);
+	if (!e) {
+		if (!(gfp_mask & __GFP_NOWARN))
+			dev_err(DEV, "alloc_ee: Allocation of an EE failed\n");
+		return NULL;
+	}
+
+	page = drbd_pp_alloc(mdev, nr_pages, (gfp_mask & __GFP_WAIT));
+	if (!page)
+		goto fail;
+
+	INIT_HLIST_NODE(&e->collision);
+	e->epoch = NULL;
+	e->mdev = mdev;
+	e->pages = page;
+	atomic_set(&e->pending_bios, 0);
+	e->size = data_size;
+	e->flags = 0;
+	e->sector = sector;
+	e->block_id = id;
+
+	return e;
+
+ fail:
+	mempool_free(e, drbd_ee_mempool);
+	return NULL;
+}
+
+void drbd_free_some_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e, int is_net)
+{
+	if (e->flags & EE_HAS_DIGEST)
+		kfree(e->digest);
+	drbd_pp_free(mdev, e->pages, is_net);
+	D_ASSERT(atomic_read(&e->pending_bios) == 0);
+	D_ASSERT(hlist_unhashed(&e->collision));
+	mempool_free(e, drbd_ee_mempool);
+}
+
+int drbd_release_ee(struct drbd_conf *mdev, struct list_head *list)
+{
+	LIST_HEAD(work_list);
+	struct drbd_epoch_entry *e, *t;
+	int count = 0;
+	int is_net = list == &mdev->net_ee;
+
+	spin_lock_irq(&mdev->req_lock);
+	list_splice_init(list, &work_list);
+	spin_unlock_irq(&mdev->req_lock);
+
+	list_for_each_entry_safe(e, t, &work_list, w.list) {
+		drbd_free_some_ee(mdev, e, is_net);
+		count++;
+	}
+	return count;
+}
+
+
+/*
+ * This function is called from _asender only_
+ * but see also comments in _req_mod(,barrier_acked)
+ * and receive_Barrier.
+ *
+ * Move entries from net_ee to done_ee, if ready.
+ * Grab done_ee, call all callbacks, free the entries.
+ * The callbacks typically send out ACKs.
+ */
+static int drbd_process_done_ee(struct drbd_conf *mdev)
+{
+	LIST_HEAD(work_list);
+	LIST_HEAD(reclaimed);
+	struct drbd_epoch_entry *e, *t;
+	int ok = (mdev->state.conn >= C_WF_REPORT_PARAMS);
+
+	spin_lock_irq(&mdev->req_lock);
+	reclaim_net_ee(mdev, &reclaimed);
+	list_splice_init(&mdev->done_ee, &work_list);
+	spin_unlock_irq(&mdev->req_lock);
+
+	list_for_each_entry_safe(e, t, &reclaimed, w.list)
+		drbd_free_net_ee(mdev, e);
+
+	/* possible callbacks here:
+	 * e_end_block, and e_end_resync_block, e_send_discard_ack.
+	 * all ignore the last argument.
+	 */
+	list_for_each_entry_safe(e, t, &work_list, w.list) {
+		/* list_del not necessary, next/prev members not touched */
+		ok = e->w.cb(mdev, &e->w, !ok) && ok;
+		drbd_free_ee(mdev, e);
+	}
+	wake_up(&mdev->ee_wait);
+
+	return ok;
+}
+
+void _drbd_wait_ee_list_empty(struct drbd_conf *mdev, struct list_head *head)
+{
+	DEFINE_WAIT(wait);
+
+	/* avoids spin_lock/unlock
+	 * and calling prepare_to_wait in the fast path */
+	while (!list_empty(head)) {
+		prepare_to_wait(&mdev->ee_wait, &wait, TASK_UNINTERRUPTIBLE);
+		spin_unlock_irq(&mdev->req_lock);
+		io_schedule();
+		finish_wait(&mdev->ee_wait, &wait);
+		spin_lock_irq(&mdev->req_lock);
+	}
+}
+
+void drbd_wait_ee_list_empty(struct drbd_conf *mdev, struct list_head *head)
+{
+	spin_lock_irq(&mdev->req_lock);
+	_drbd_wait_ee_list_empty(mdev, head);
+	spin_unlock_irq(&mdev->req_lock);
+}
+
+/* see also kernel_accept; which is only present since 2.6.18.
+ * also we want to log which part of it failed, exactly */
+static int drbd_accept(struct drbd_conf *mdev, const char **what,
+		struct socket *sock, struct socket **newsock)
+{
+	struct sock *sk = sock->sk;
+	int err = 0;
+
+	*what = "listen";
+	err = sock->ops->listen(sock, 5);
+	if (err < 0)
+		goto out;
+
+	*what = "sock_create_lite";
+	err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
+			       newsock);
+	if (err < 0)
+		goto out;
+
+	*what = "accept";
+	err = sock->ops->accept(sock, *newsock, 0);
+	if (err < 0) {
+		sock_release(*newsock);
+		*newsock = NULL;
+		goto out;
+	}
+	(*newsock)->ops  = sock->ops;
+
+out:
+	return err;
+}
+
+static int drbd_recv_short(struct drbd_conf *mdev, struct socket *sock,
+		    void *buf, size_t size, int flags)
+{
+	mm_segment_t oldfs;
+	struct kvec iov = {
+		.iov_base = buf,
+		.iov_len = size,
+	};
+	struct msghdr msg = {
+		.msg_iovlen = 1,
+		.msg_iov = (struct iovec *)&iov,
+		.msg_flags = (flags ? flags : MSG_WAITALL | MSG_NOSIGNAL)
+	};
+	int rv;
+
+	oldfs = get_fs();
+	set_fs(KERNEL_DS);
+	rv = sock_recvmsg(sock, &msg, size, msg.msg_flags);
+	set_fs(oldfs);
+
+	return rv;
+}
+
+static int drbd_recv(struct drbd_conf *mdev, void *buf, size_t size)
+{
+	mm_segment_t oldfs;
+	struct kvec iov = {
+		.iov_base = buf,
+		.iov_len = size,
+	};
+	struct msghdr msg = {
+		.msg_iovlen = 1,
+		.msg_iov = (struct iovec *)&iov,
+		.msg_flags = MSG_WAITALL | MSG_NOSIGNAL
+	};
+	int rv;
+
+	oldfs = get_fs();
+	set_fs(KERNEL_DS);
+
+	for (;;) {
+		rv = sock_recvmsg(mdev->data.socket, &msg, size, msg.msg_flags);
+		if (rv == size)
+			break;
+
+		/* Note:
+		 * ECONNRESET	other side closed the connection
+		 * ERESTARTSYS	(on  sock) we got a signal
+		 */
+
+		if (rv < 0) {
+			if (rv == -ECONNRESET)
+				dev_info(DEV, "sock was reset by peer\n");
+			else if (rv != -ERESTARTSYS)
+				dev_err(DEV, "sock_recvmsg returned %d\n", rv);
+			break;
+		} else if (rv == 0) {
+			dev_info(DEV, "sock was shut down by peer\n");
+			break;
+		} else	{
+			/* signal came in, or peer/link went down,
+			 * after we read a partial message
+			 */
+			/* D_ASSERT(signal_pending(current)); */
+			break;
+		}
+	};
+
+	set_fs(oldfs);
+
+	if (rv != size)
+		drbd_force_state(mdev, NS(conn, C_BROKEN_PIPE));
+
+	return rv;
+}
+
+/* quoting tcp(7):
+ *   On individual connections, the socket buffer size must be set prior to the
+ *   listen(2) or connect(2) calls in order to have it take effect.
+ * This is our wrapper to do so.
+ */
+static void drbd_setbufsize(struct socket *sock, unsigned int snd,
+		unsigned int rcv)
+{
+	/* open coded SO_SNDBUF, SO_RCVBUF */
+	if (snd) {
+		sock->sk->sk_sndbuf = snd;
+		sock->sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
+	}
+	if (rcv) {
+		sock->sk->sk_rcvbuf = rcv;
+		sock->sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
+	}
+}
+
+static struct socket *drbd_try_connect(struct drbd_conf *mdev)
+{
+	const char *what;
+	struct socket *sock;
+	struct sockaddr_in6 src_in6;
+	int err;
+	int disconnect_on_error = 1;
+
+	if (!get_net_conf(mdev))
+		return NULL;
+
+	what = "sock_create_kern";
+	err = sock_create_kern(((struct sockaddr *)mdev->net_conf->my_addr)->sa_family,
+		SOCK_STREAM, IPPROTO_TCP, &sock);
+	if (err < 0) {
+		sock = NULL;
+		goto out;
+	}
+
+	sock->sk->sk_rcvtimeo =
+	sock->sk->sk_sndtimeo =  mdev->net_conf->try_connect_int*HZ;
+	drbd_setbufsize(sock, mdev->net_conf->sndbuf_size,
+			mdev->net_conf->rcvbuf_size);
+
+       /* explicitly bind to the configured IP as source IP
+	*  for the outgoing connections.
+	*  This is needed for multihomed hosts and to be
+	*  able to use lo: interfaces for drbd.
+	* Make sure to use 0 as port number, so linux selects
+	*  a free one dynamically.
+	*/
+	memcpy(&src_in6, mdev->net_conf->my_addr,
+	       min_t(int, mdev->net_conf->my_addr_len, sizeof(src_in6)));
+	if (((struct sockaddr *)mdev->net_conf->my_addr)->sa_family == AF_INET6)
+		src_in6.sin6_port = 0;
+	else
+		((struct sockaddr_in *)&src_in6)->sin_port = 0; /* AF_INET & AF_SCI */
+
+	what = "bind before connect";
+	err = sock->ops->bind(sock,
+			      (struct sockaddr *) &src_in6,
+			      mdev->net_conf->my_addr_len);
+	if (err < 0)
+		goto out;
+
+	/* connect may fail, peer not yet available.
+	 * stay C_WF_CONNECTION, don't go Disconnecting! */
+	disconnect_on_error = 0;
+	what = "connect";
+	err = sock->ops->connect(sock,
+				 (struct sockaddr *)mdev->net_conf->peer_addr,
+				 mdev->net_conf->peer_addr_len, 0);
+
+out:
+	if (err < 0) {
+		if (sock) {
+			sock_release(sock);
+			sock = NULL;
+		}
+		switch (-err) {
+			/* timeout, busy, signal pending */
+		case ETIMEDOUT: case EAGAIN: case EINPROGRESS:
+		case EINTR: case ERESTARTSYS:
+			/* peer not (yet) available, network problem */
+		case ECONNREFUSED: case ENETUNREACH:
+		case EHOSTDOWN:    case EHOSTUNREACH:
+			disconnect_on_error = 0;
+			break;
+		default:
+			dev_err(DEV, "%s failed, err = %d\n", what, err);
+		}
+		if (disconnect_on_error)
+			drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
+	}
+	put_net_conf(mdev);
+	return sock;
+}
+
+static struct socket *drbd_wait_for_connect(struct drbd_conf *mdev)
+{
+	int timeo, err;
+	struct socket *s_estab = NULL, *s_listen;
+	const char *what;
+
+	if (!get_net_conf(mdev))
+		return NULL;
+
+	what = "sock_create_kern";
+	err = sock_create_kern(((struct sockaddr *)mdev->net_conf->my_addr)->sa_family,
+		SOCK_STREAM, IPPROTO_TCP, &s_listen);
+	if (err) {
+		s_listen = NULL;
+		goto out;
+	}
+
+	timeo = mdev->net_conf->try_connect_int * HZ;
+	timeo += (random32() & 1) ? timeo / 7 : -timeo / 7; /* 28.5% random jitter */
+
+	s_listen->sk->sk_reuse    = 1; /* SO_REUSEADDR */
+	s_listen->sk->sk_rcvtimeo = timeo;
+	s_listen->sk->sk_sndtimeo = timeo;
+	drbd_setbufsize(s_listen, mdev->net_conf->sndbuf_size,
+			mdev->net_conf->rcvbuf_size);
+
+	what = "bind before listen";
+	err = s_listen->ops->bind(s_listen,
+			      (struct sockaddr *) mdev->net_conf->my_addr,
+			      mdev->net_conf->my_addr_len);
+	if (err < 0)
+		goto out;
+
+	err = drbd_accept(mdev, &what, s_listen, &s_estab);
+
+out:
+	if (s_listen)
+		sock_release(s_listen);
+	if (err < 0) {
+		if (err != -EAGAIN && err != -EINTR && err != -ERESTARTSYS) {
+			dev_err(DEV, "%s failed, err = %d\n", what, err);
+			drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
+		}
+	}
+	put_net_conf(mdev);
+
+	return s_estab;
+}
+
+static int drbd_send_fp(struct drbd_conf *mdev,
+	struct socket *sock, enum drbd_packets cmd)
+{
+	struct p_header80 *h = &mdev->data.sbuf.header.h80;
+
+	return _drbd_send_cmd(mdev, sock, cmd, h, sizeof(*h), 0);
+}
+
+static enum drbd_packets drbd_recv_fp(struct drbd_conf *mdev, struct socket *sock)
+{
+	struct p_header80 *h = &mdev->data.rbuf.header.h80;
+	int rr;
+
+	rr = drbd_recv_short(mdev, sock, h, sizeof(*h), 0);
+
+	if (rr == sizeof(*h) && h->magic == BE_DRBD_MAGIC)
+		return be16_to_cpu(h->command);
+
+	return 0xffff;
+}
+
+/**
+ * drbd_socket_okay() - Free the socket if its connection is not okay
+ * @mdev:	DRBD device.
+ * @sock:	pointer to the pointer to the socket.
+ */
+static int drbd_socket_okay(struct drbd_conf *mdev, struct socket **sock)
+{
+	int rr;
+	char tb[4];
+
+	if (!*sock)
+		return false;
+
+	rr = drbd_recv_short(mdev, *sock, tb, 4, MSG_DONTWAIT | MSG_PEEK);
+
+	if (rr > 0 || rr == -EAGAIN) {
+		return true;
+	} else {
+		sock_release(*sock);
+		*sock = NULL;
+		return false;
+	}
+}
+
+/*
+ * return values:
+ *   1 yes, we have a valid connection
+ *   0 oops, did not work out, please try again
+ *  -1 peer talks different language,
+ *     no point in trying again, please go standalone.
+ *  -2 We do not have a network config...
+ */
+static int drbd_connect(struct drbd_conf *mdev)
+{
+	struct socket *s, *sock, *msock;
+	int try, h, ok;
+
+	D_ASSERT(!mdev->data.socket);
+
+	if (drbd_request_state(mdev, NS(conn, C_WF_CONNECTION)) < SS_SUCCESS)
+		return -2;
+
+	clear_bit(DISCARD_CONCURRENT, &mdev->flags);
+
+	sock  = NULL;
+	msock = NULL;
+
+	do {
+		for (try = 0;;) {
+			/* 3 tries, this should take less than a second! */
+			s = drbd_try_connect(mdev);
+			if (s || ++try >= 3)
+				break;
+			/* give the other side time to call bind() & listen() */
+			schedule_timeout_interruptible(HZ / 10);
+		}
+
+		if (s) {
+			if (!sock) {
+				drbd_send_fp(mdev, s, P_HAND_SHAKE_S);
+				sock = s;
+				s = NULL;
+			} else if (!msock) {
+				drbd_send_fp(mdev, s, P_HAND_SHAKE_M);
+				msock = s;
+				s = NULL;
+			} else {
+				dev_err(DEV, "Logic error in drbd_connect()\n");
+				goto out_release_sockets;
+			}
+		}
+
+		if (sock && msock) {
+			schedule_timeout_interruptible(mdev->net_conf->ping_timeo*HZ/10);
+			ok = drbd_socket_okay(mdev, &sock);
+			ok = drbd_socket_okay(mdev, &msock) && ok;
+			if (ok)
+				break;
+		}
+
+retry:
+		s = drbd_wait_for_connect(mdev);
+		if (s) {
+			try = drbd_recv_fp(mdev, s);
+			drbd_socket_okay(mdev, &sock);
+			drbd_socket_okay(mdev, &msock);
+			switch (try) {
+			case P_HAND_SHAKE_S:
+				if (sock) {
+					dev_warn(DEV, "initial packet S crossed\n");
+					sock_release(sock);
+				}
+				sock = s;
+				break;
+			case P_HAND_SHAKE_M:
+				if (msock) {
+					dev_warn(DEV, "initial packet M crossed\n");
+					sock_release(msock);
+				}
+				msock = s;
+				set_bit(DISCARD_CONCURRENT, &mdev->flags);
+				break;
+			default:
+				dev_warn(DEV, "Error receiving initial packet\n");
+				sock_release(s);
+				if (random32() & 1)
+					goto retry;
+			}
+		}
+
+		if (mdev->state.conn <= C_DISCONNECTING)
+			goto out_release_sockets;
+		if (signal_pending(current)) {
+			flush_signals(current);
+			smp_rmb();
+			if (get_t_state(&mdev->receiver) == Exiting)
+				goto out_release_sockets;
+		}
+
+		if (sock && msock) {
+			ok = drbd_socket_okay(mdev, &sock);
+			ok = drbd_socket_okay(mdev, &msock) && ok;
+			if (ok)
+				break;
+		}
+	} while (1);
+
+	msock->sk->sk_reuse = 1; /* SO_REUSEADDR */
+	sock->sk->sk_reuse = 1; /* SO_REUSEADDR */
+
+	sock->sk->sk_allocation = GFP_NOIO;
+	msock->sk->sk_allocation = GFP_NOIO;
+
+	sock->sk->sk_priority = TC_PRIO_INTERACTIVE_BULK;
+	msock->sk->sk_priority = TC_PRIO_INTERACTIVE;
+
+	/* NOT YET ...
+	 * sock->sk->sk_sndtimeo = mdev->net_conf->timeout*HZ/10;
+	 * sock->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
+	 * first set it to the P_HAND_SHAKE timeout,
+	 * which we set to 4x the configured ping_timeout. */
+	sock->sk->sk_sndtimeo =
+	sock->sk->sk_rcvtimeo = mdev->net_conf->ping_timeo*4*HZ/10;
+
+	msock->sk->sk_sndtimeo = mdev->net_conf->timeout*HZ/10;
+	msock->sk->sk_rcvtimeo = mdev->net_conf->ping_int*HZ;
+
+	/* we don't want delays.
+	 * we use TCP_CORK where appropriate, though */
+	drbd_tcp_nodelay(sock);
+	drbd_tcp_nodelay(msock);
+
+	mdev->data.socket = sock;
+	mdev->meta.socket = msock;
+	mdev->last_received = jiffies;
+
+	D_ASSERT(mdev->asender.task == NULL);
+
+	h = drbd_do_handshake(mdev);
+	if (h <= 0)
+		return h;
+
+	if (mdev->cram_hmac_tfm) {
+		/* drbd_request_state(mdev, NS(conn, WFAuth)); */
+		switch (drbd_do_auth(mdev)) {
+		case -1:
+			dev_err(DEV, "Authentication of peer failed\n");
+			return -1;
+		case 0:
+			dev_err(DEV, "Authentication of peer failed, trying again.\n");
+			return 0;
+		}
+	}
+
+	if (drbd_request_state(mdev, NS(conn, C_WF_REPORT_PARAMS)) < SS_SUCCESS)
+		return 0;
+
+	sock->sk->sk_sndtimeo = mdev->net_conf->timeout*HZ/10;
+	sock->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
+
+	atomic_set(&mdev->packet_seq, 0);
+	mdev->peer_seq = 0;
+
+	drbd_thread_start(&mdev->asender);
+
+	if (drbd_send_protocol(mdev) == -1)
+		return -1;
+	drbd_send_sync_param(mdev, &mdev->sync_conf);
+	drbd_send_sizes(mdev, 0, 0);
+	drbd_send_uuids(mdev);
+	drbd_send_state(mdev);
+	clear_bit(USE_DEGR_WFC_T, &mdev->flags);
+	clear_bit(RESIZE_PENDING, &mdev->flags);
+	mod_timer(&mdev->request_timer, jiffies + HZ); /* just start it here. */
+
+	return 1;
+
+out_release_sockets:
+	if (sock)
+		sock_release(sock);
+	if (msock)
+		sock_release(msock);
+	return -1;
+}
+
+static int drbd_recv_header(struct drbd_conf *mdev, enum drbd_packets *cmd, unsigned int *packet_size)
+{
+	union p_header *h = &mdev->data.rbuf.header;
+	int r;
+
+	r = drbd_recv(mdev, h, sizeof(*h));
+	if (unlikely(r != sizeof(*h))) {
+		if (!signal_pending(current))
+			dev_warn(DEV, "short read expecting header on sock: r=%d\n", r);
+		return false;
+	}
+
+	if (likely(h->h80.magic == BE_DRBD_MAGIC)) {
+		*cmd = be16_to_cpu(h->h80.command);
+		*packet_size = be16_to_cpu(h->h80.length);
+	} else if (h->h95.magic == BE_DRBD_MAGIC_BIG) {
+		*cmd = be16_to_cpu(h->h95.command);
+		*packet_size = be32_to_cpu(h->h95.length);
+	} else {
+		dev_err(DEV, "magic?? on data m: 0x%08x c: %d l: %d\n",
+		    be32_to_cpu(h->h80.magic),
+		    be16_to_cpu(h->h80.command),
+		    be16_to_cpu(h->h80.length));
+		return false;
+	}
+	mdev->last_received = jiffies;
+
+	return true;
+}
+
+static void drbd_flush(struct drbd_conf *mdev)
+{
+	int rv;
+
+	if (mdev->write_ordering >= WO_bdev_flush && get_ldev(mdev)) {
+		rv = blkdev_issue_flush(mdev->ldev->backing_bdev, GFP_KERNEL,
+					NULL);
+		if (rv) {
+			dev_err(DEV, "local disk flush failed with status %d\n", rv);
+			/* would rather check on EOPNOTSUPP, but that is not reliable.
+			 * don't try again for ANY return value != 0
+			 * if (rv == -EOPNOTSUPP) */
+			drbd_bump_write_ordering(mdev, WO_drain_io);
+		}
+		put_ldev(mdev);
+	}
+}
+
+/**
+ * drbd_may_finish_epoch() - Applies an epoch_event to the epoch's state, eventually finishes it.
+ * @mdev:	DRBD device.
+ * @epoch:	Epoch object.
+ * @ev:		Epoch event.
+ */
+static enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *mdev,
+					       struct drbd_epoch *epoch,
+					       enum epoch_event ev)
+{
+	int epoch_size;
+	struct drbd_epoch *next_epoch;
+	enum finish_epoch rv = FE_STILL_LIVE;
+
+	spin_lock(&mdev->epoch_lock);
+	do {
+		next_epoch = NULL;
+
+		epoch_size = atomic_read(&epoch->epoch_size);
+
+		switch (ev & ~EV_CLEANUP) {
+		case EV_PUT:
+			atomic_dec(&epoch->active);
+			break;
+		case EV_GOT_BARRIER_NR:
+			set_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags);
+			break;
+		case EV_BECAME_LAST:
+			/* nothing to do*/
+			break;
+		}
+
+		if (epoch_size != 0 &&
+		    atomic_read(&epoch->active) == 0 &&
+		    test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags)) {
+			if (!(ev & EV_CLEANUP)) {
+				spin_unlock(&mdev->epoch_lock);
+				drbd_send_b_ack(mdev, epoch->barrier_nr, epoch_size);
+				spin_lock(&mdev->epoch_lock);
+			}
+			dec_unacked(mdev);
+
+			if (mdev->current_epoch != epoch) {
+				next_epoch = list_entry(epoch->list.next, struct drbd_epoch, list);
+				list_del(&epoch->list);
+				ev = EV_BECAME_LAST | (ev & EV_CLEANUP);
+				mdev->epochs--;
+				kfree(epoch);
+
+				if (rv == FE_STILL_LIVE)
+					rv = FE_DESTROYED;
+			} else {
+				epoch->flags = 0;
+				atomic_set(&epoch->epoch_size, 0);
+				/* atomic_set(&epoch->active, 0); is already zero */
+				if (rv == FE_STILL_LIVE)
+					rv = FE_RECYCLED;
+				wake_up(&mdev->ee_wait);
+			}
+		}
+
+		if (!next_epoch)
+			break;
+
+		epoch = next_epoch;
+	} while (1);
+
+	spin_unlock(&mdev->epoch_lock);
+
+	return rv;
+}
+
+/**
+ * drbd_bump_write_ordering() - Fall back to an other write ordering method
+ * @mdev:	DRBD device.
+ * @wo:		Write ordering method to try.
+ */
+void drbd_bump_write_ordering(struct drbd_conf *mdev, enum write_ordering_e wo) __must_hold(local)
+{
+	enum write_ordering_e pwo;
+	static char *write_ordering_str[] = {
+		[WO_none] = "none",
+		[WO_drain_io] = "drain",
+		[WO_bdev_flush] = "flush",
+	};
+
+	pwo = mdev->write_ordering;
+	wo = min(pwo, wo);
+	if (wo == WO_bdev_flush && mdev->ldev->dc.no_disk_flush)
+		wo = WO_drain_io;
+	if (wo == WO_drain_io && mdev->ldev->dc.no_disk_drain)
+		wo = WO_none;
+	mdev->write_ordering = wo;
+	if (pwo != mdev->write_ordering || wo == WO_bdev_flush)
+		dev_info(DEV, "Method to ensure write ordering: %s\n", write_ordering_str[mdev->write_ordering]);
+}
+
+/**
+ * drbd_submit_ee()
+ * @mdev:	DRBD device.
+ * @e:		epoch entry
+ * @rw:		flag field, see bio->bi_rw
+ *
+ * May spread the pages to multiple bios,
+ * depending on bio_add_page restrictions.
+ *
+ * Returns 0 if all bios have been submitted,
+ * -ENOMEM if we could not allocate enough bios,
+ * -ENOSPC (any better suggestion?) if we have not been able to bio_add_page a
+ *  single page to an empty bio (which should never happen and likely indicates
+ *  that the lower level IO stack is in some way broken). This has been observed
+ *  on certain Xen deployments.
+ */
+/* TODO allocate from our own bio_set. */
+int drbd_submit_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e,
+		const unsigned rw, const int fault_type)
+{
+	struct bio *bios = NULL;
+	struct bio *bio;
+	struct page *page = e->pages;
+	sector_t sector = e->sector;
+	unsigned ds = e->size;
+	unsigned n_bios = 0;
+	unsigned nr_pages = (ds + PAGE_SIZE -1) >> PAGE_SHIFT;
+	int err = -ENOMEM;
+
+	/* In most cases, we will only need one bio.  But in case the lower
+	 * level restrictions happen to be different at this offset on this
+	 * side than those of the sending peer, we may need to submit the
+	 * request in more than one bio. */
+next_bio:
+	bio = bio_alloc(GFP_NOIO, nr_pages);
+	if (!bio) {
+		dev_err(DEV, "submit_ee: Allocation of a bio failed\n");
+		goto fail;
+	}
+	/* > e->sector, unless this is the first bio */
+	bio->bi_sector = sector;
+	bio->bi_bdev = mdev->ldev->backing_bdev;
+	bio->bi_rw = rw;
+	bio->bi_private = e;
+	bio->bi_end_io = drbd_endio_sec;
+
+	bio->bi_next = bios;
+	bios = bio;
+	++n_bios;
+
+	page_chain_for_each(page) {
+		unsigned len = min_t(unsigned, ds, PAGE_SIZE);
+		if (!bio_add_page(bio, page, len, 0)) {
+			/* A single page must always be possible!
+			 * But in case it fails anyways,
+			 * we deal with it, and complain (below). */
+			if (bio->bi_vcnt == 0) {
+				dev_err(DEV,
+					"bio_add_page failed for len=%u, "
+					"bi_vcnt=0 (bi_sector=%llu)\n",
+					len, (unsigned long long)bio->bi_sector);
+				err = -ENOSPC;
+				goto fail;
+			}
+			goto next_bio;
+		}
+		ds -= len;
+		sector += len >> 9;
+		--nr_pages;
+	}
+	D_ASSERT(page == NULL);
+	D_ASSERT(ds == 0);
+
+	atomic_set(&e->pending_bios, n_bios);
+	do {
+		bio = bios;
+		bios = bios->bi_next;
+		bio->bi_next = NULL;
+
+		drbd_generic_make_request(mdev, fault_type, bio);
+	} while (bios);
+	return 0;
+
+fail:
+	while (bios) {
+		bio = bios;
+		bios = bios->bi_next;
+		bio_put(bio);
+	}
+	return err;
+}
+
+static int receive_Barrier(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size)
+{
+	int rv;
+	struct p_barrier *p = &mdev->data.rbuf.barrier;
+	struct drbd_epoch *epoch;
+
+	inc_unacked(mdev);
+
+	mdev->current_epoch->barrier_nr = p->barrier;
+	rv = drbd_may_finish_epoch(mdev, mdev->current_epoch, EV_GOT_BARRIER_NR);
+
+	/* P_BARRIER_ACK may imply that the corresponding extent is dropped from
+	 * the activity log, which means it would not be resynced in case the
+	 * R_PRIMARY crashes now.
+	 * Therefore we must send the barrier_ack after the barrier request was
+	 * completed. */
+	switch (mdev->write_ordering) {
+	case WO_none:
+		if (rv == FE_RECYCLED)
+			return true;
+
+		/* receiver context, in the writeout path of the other node.
+		 * avoid potential distributed deadlock */
+		epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO);
+		if (epoch)
+			break;
+		else
+			dev_warn(DEV, "Allocation of an epoch failed, slowing down\n");
+			/* Fall through */
+
+	case WO_bdev_flush:
+	case WO_drain_io:
+		drbd_wait_ee_list_empty(mdev, &mdev->active_ee);
+		drbd_flush(mdev);
+
+		if (atomic_read(&mdev->current_epoch->epoch_size)) {
+			epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO);
+			if (epoch)
+				break;
+		}
+
+		epoch = mdev->current_epoch;
+		wait_event(mdev->ee_wait, atomic_read(&epoch->epoch_size) == 0);
+
+		D_ASSERT(atomic_read(&epoch->active) == 0);
+		D_ASSERT(epoch->flags == 0);
+
+		return true;
+	default:
+		dev_err(DEV, "Strangeness in mdev->write_ordering %d\n", mdev->write_ordering);
+		return false;
+	}
+
+	epoch->flags = 0;
+	atomic_set(&epoch->epoch_size, 0);
+	atomic_set(&epoch->active, 0);
+
+	spin_lock(&mdev->epoch_lock);
+	if (atomic_read(&mdev->current_epoch->epoch_size)) {
+		list_add(&epoch->list, &mdev->current_epoch->list);
+		mdev->current_epoch = epoch;
+		mdev->epochs++;
+	} else {
+		/* The current_epoch got recycled while we allocated this one... */
+		kfree(epoch);
+	}
+	spin_unlock(&mdev->epoch_lock);
+
+	return true;
+}
+
+/* used from receive_RSDataReply (recv_resync_read)
+ * and from receive_Data */
+static struct drbd_epoch_entry *
+read_in_block(struct drbd_conf *mdev, u64 id, sector_t sector, int data_size) __must_hold(local)
+{
+	const sector_t capacity = drbd_get_capacity(mdev->this_bdev);
+	struct drbd_epoch_entry *e;
+	struct page *page;
+	int dgs, ds, rr;
+	void *dig_in = mdev->int_dig_in;
+	void *dig_vv = mdev->int_dig_vv;
+	unsigned long *data;
+
+	dgs = (mdev->agreed_pro_version >= 87 && mdev->integrity_r_tfm) ?
+		crypto_hash_digestsize(mdev->integrity_r_tfm) : 0;
+
+	if (dgs) {
+		rr = drbd_recv(mdev, dig_in, dgs);
+		if (rr != dgs) {
+			if (!signal_pending(current))
+				dev_warn(DEV,
+					"short read receiving data digest: read %d expected %d\n",
+					rr, dgs);
+			return NULL;
+		}
+	}
+
+	data_size -= dgs;
+
+	ERR_IF(data_size == 0) return NULL;
+	ERR_IF(data_size &  0x1ff) return NULL;
+	ERR_IF(data_size >  DRBD_MAX_BIO_SIZE) return NULL;
+
+	/* even though we trust out peer,
+	 * we sometimes have to double check. */
+	if (sector + (data_size>>9) > capacity) {
+		dev_err(DEV, "request from peer beyond end of local disk: "
+			"capacity: %llus < sector: %llus + size: %u\n",
+			(unsigned long long)capacity,
+			(unsigned long long)sector, data_size);
+		return NULL;
+	}
+
+	/* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD
+	 * "criss-cross" setup, that might cause write-out on some other DRBD,
+	 * which in turn might block on the other node at this very place.  */
+	e = drbd_alloc_ee(mdev, id, sector, data_size, GFP_NOIO);
+	if (!e)
+		return NULL;
+
+	ds = data_size;
+	page = e->pages;
+	page_chain_for_each(page) {
+		unsigned len = min_t(int, ds, PAGE_SIZE);
+		data = kmap(page);
+		rr = drbd_recv(mdev, data, len);
+		if (drbd_insert_fault(mdev, DRBD_FAULT_RECEIVE)) {
+			dev_err(DEV, "Fault injection: Corrupting data on receive\n");
+			data[0] = data[0] ^ (unsigned long)-1;
+		}
+		kunmap(page);
+		if (rr != len) {
+			drbd_free_ee(mdev, e);
+			if (!signal_pending(current))
+				dev_warn(DEV, "short read receiving data: read %d expected %d\n",
+				rr, len);
+			return NULL;
+		}
+		ds -= rr;
+	}
+
+	if (dgs) {
+		drbd_csum_ee(mdev, mdev->integrity_r_tfm, e, dig_vv);
+		if (memcmp(dig_in, dig_vv, dgs)) {
+			dev_err(DEV, "Digest integrity check FAILED: %llus +%u\n",
+				(unsigned long long)sector, data_size);
+			drbd_bcast_ee(mdev, "digest failed",
+					dgs, dig_in, dig_vv, e);
+			drbd_free_ee(mdev, e);
+			return NULL;
+		}
+	}
+	mdev->recv_cnt += data_size>>9;
+	return e;
+}
+
+/* drbd_drain_block() just takes a data block
+ * out of the socket input buffer, and discards it.
+ */
+static int drbd_drain_block(struct drbd_conf *mdev, int data_size)
+{
+	struct page *page;
+	int rr, rv = 1;
+	void *data;
+
+	if (!data_size)
+		return true;
+
+	page = drbd_pp_alloc(mdev, 1, 1);
+
+	data = kmap(page);
+	while (data_size) {
+		rr = drbd_recv(mdev, data, min_t(int, data_size, PAGE_SIZE));
+		if (rr != min_t(int, data_size, PAGE_SIZE)) {
+			rv = 0;
+			if (!signal_pending(current))
+				dev_warn(DEV,
+					"short read receiving data: read %d expected %d\n",
+					rr, min_t(int, data_size, PAGE_SIZE));
+			break;
+		}
+		data_size -= rr;
+	}
+	kunmap(page);
+	drbd_pp_free(mdev, page, 0);
+	return rv;
+}
+
+static int recv_dless_read(struct drbd_conf *mdev, struct drbd_request *req,
+			   sector_t sector, int data_size)
+{
+	struct bio_vec *bvec;
+	struct bio *bio;
+	int dgs, rr, i, expect;
+	void *dig_in = mdev->int_dig_in;
+	void *dig_vv = mdev->int_dig_vv;
+
+	dgs = (mdev->agreed_pro_version >= 87 && mdev->integrity_r_tfm) ?
+		crypto_hash_digestsize(mdev->integrity_r_tfm) : 0;
+
+	if (dgs) {
+		rr = drbd_recv(mdev, dig_in, dgs);
+		if (rr != dgs) {
+			if (!signal_pending(current))
+				dev_warn(DEV,
+					"short read receiving data reply digest: read %d expected %d\n",
+					rr, dgs);
+			return 0;
+		}
+	}
+
+	data_size -= dgs;
+
+	/* optimistically update recv_cnt.  if receiving fails below,
+	 * we disconnect anyways, and counters will be reset. */
+	mdev->recv_cnt += data_size>>9;
+
+	bio = req->master_bio;
+	D_ASSERT(sector == bio->bi_sector);
+
+	bio_for_each_segment(bvec, bio, i) {
+		expect = min_t(int, data_size, bvec->bv_len);
+		rr = drbd_recv(mdev,
+			     kmap(bvec->bv_page)+bvec->bv_offset,
+			     expect);
+		kunmap(bvec->bv_page);
+		if (rr != expect) {
+			if (!signal_pending(current))
+				dev_warn(DEV, "short read receiving data reply: "
+					"read %d expected %d\n",
+					rr, expect);
+			return 0;
+		}
+		data_size -= rr;
+	}
+
+	if (dgs) {
+		drbd_csum_bio(mdev, mdev->integrity_r_tfm, bio, dig_vv);
+		if (memcmp(dig_in, dig_vv, dgs)) {
+			dev_err(DEV, "Digest integrity check FAILED. Broken NICs?\n");
+			return 0;
+		}
+	}
+
+	D_ASSERT(data_size == 0);
+	return 1;
+}
+
+/* e_end_resync_block() is called via
+ * drbd_process_done_ee() by asender only */
+static int e_end_resync_block(struct drbd_conf *mdev, struct drbd_work *w, int unused)
+{
+	struct drbd_epoch_entry *e = (struct drbd_epoch_entry *)w;
+	sector_t sector = e->sector;
+	int ok;
+
+	D_ASSERT(hlist_unhashed(&e->collision));
+
+	if (likely((e->flags & EE_WAS_ERROR) == 0)) {
+		drbd_set_in_sync(mdev, sector, e->size);
+		ok = drbd_send_ack(mdev, P_RS_WRITE_ACK, e);
+	} else {
+		/* Record failure to sync */
+		drbd_rs_failed_io(mdev, sector, e->size);
+
+		ok  = drbd_send_ack(mdev, P_NEG_ACK, e);
+	}
+	dec_unacked(mdev);
+
+	return ok;
+}
+
+static int recv_resync_read(struct drbd_conf *mdev, sector_t sector, int data_size) __releases(local)
+{
+	struct drbd_epoch_entry *e;
+
+	e = read_in_block(mdev, ID_SYNCER, sector, data_size);
+	if (!e)
+		goto fail;
+
+	dec_rs_pending(mdev);
+
+	inc_unacked(mdev);
+	/* corresponding dec_unacked() in e_end_resync_block()
+	 * respective _drbd_clear_done_ee */
+
+	e->w.cb = e_end_resync_block;
+
+	spin_lock_irq(&mdev->req_lock);
+	list_add(&e->w.list, &mdev->sync_ee);
+	spin_unlock_irq(&mdev->req_lock);
+
+	atomic_add(data_size >> 9, &mdev->rs_sect_ev);
+	if (drbd_submit_ee(mdev, e, WRITE, DRBD_FAULT_RS_WR) == 0)
+		return true;
+
+	/* don't care for the reason here */
+	dev_err(DEV, "submit failed, triggering re-connect\n");
+	spin_lock_irq(&mdev->req_lock);
+	list_del(&e->w.list);
+	spin_unlock_irq(&mdev->req_lock);
+
+	drbd_free_ee(mdev, e);
+fail:
+	put_ldev(mdev);
+	return false;
+}
+
+static int receive_DataReply(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size)
+{
+	struct drbd_request *req;
+	sector_t sector;
+	int ok;
+	struct p_data *p = &mdev->data.rbuf.data;
+
+	sector = be64_to_cpu(p->sector);
+
+	spin_lock_irq(&mdev->req_lock);
+	req = _ar_id_to_req(mdev, p->block_id, sector);
+	spin_unlock_irq(&mdev->req_lock);
+	if (unlikely(!req)) {
+		dev_err(DEV, "Got a corrupt block_id/sector pair(1).\n");
+		return false;
+	}
+
+	/* hlist_del(&req->collision) is done in _req_may_be_done, to avoid
+	 * special casing it there for the various failure cases.
+	 * still no race with drbd_fail_pending_reads */
+	ok = recv_dless_read(mdev, req, sector, data_size);
+
+	if (ok)
+		req_mod(req, data_received);
+	/* else: nothing. handled from drbd_disconnect...
+	 * I don't think we may complete this just yet
+	 * in case we are "on-disconnect: freeze" */
+
+	return ok;
+}
+
+static int receive_RSDataReply(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size)
+{
+	sector_t sector;
+	int ok;
+	struct p_data *p = &mdev->data.rbuf.data;
+
+	sector = be64_to_cpu(p->sector);
+	D_ASSERT(p->block_id == ID_SYNCER);
+
+	if (get_ldev(mdev)) {
+		/* data is submitted to disk within recv_resync_read.
+		 * corresponding put_ldev done below on error,
+		 * or in drbd_endio_write_sec. */
+		ok = recv_resync_read(mdev, sector, data_size);
+	} else {
+		if (__ratelimit(&drbd_ratelimit_state))
+			dev_err(DEV, "Can not write resync data to local disk.\n");
+
+		ok = drbd_drain_block(mdev, data_size);
+
+		drbd_send_ack_dp(mdev, P_NEG_ACK, p, data_size);
+	}
+
+	atomic_add(data_size >> 9, &mdev->rs_sect_in);
+
+	return ok;
+}
+
+/* e_end_block() is called via drbd_process_done_ee().
+ * this means this function only runs in the asender thread
+ */
+static int e_end_block(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
+{
+	struct drbd_epoch_entry *e = (struct drbd_epoch_entry *)w;
+	sector_t sector = e->sector;
+	int ok = 1, pcmd;
+
+	if (mdev->net_conf->wire_protocol == DRBD_PROT_C) {
+		if (likely((e->flags & EE_WAS_ERROR) == 0)) {
+			pcmd = (mdev->state.conn >= C_SYNC_SOURCE &&
+				mdev->state.conn <= C_PAUSED_SYNC_T &&
+				e->flags & EE_MAY_SET_IN_SYNC) ?
+				P_RS_WRITE_ACK : P_WRITE_ACK;
+			ok &= drbd_send_ack(mdev, pcmd, e);
+			if (pcmd == P_RS_WRITE_ACK)
+				drbd_set_in_sync(mdev, sector, e->size);
+		} else {
+			ok  = drbd_send_ack(mdev, P_NEG_ACK, e);
+			/* we expect it to be marked out of sync anyways...
+			 * maybe assert this?  */
+		}
+		dec_unacked(mdev);
+	}
+	/* we delete from the conflict detection hash _after_ we sent out the
+	 * P_WRITE_ACK / P_NEG_ACK, to get the sequence number right.  */
+	if (mdev->net_conf->two_primaries) {
+		spin_lock_irq(&mdev->req_lock);
+		D_ASSERT(!hlist_unhashed(&e->collision));
+		hlist_del_init(&e->collision);
+		spin_unlock_irq(&mdev->req_lock);
+	} else {
+		D_ASSERT(hlist_unhashed(&e->collision));
+	}
+
+	drbd_may_finish_epoch(mdev, e->epoch, EV_PUT + (cancel ? EV_CLEANUP : 0));
+
+	return ok;
+}
+
+static int e_send_discard_ack(struct drbd_conf *mdev, struct drbd_work *w, int unused)
+{
+	struct drbd_epoch_entry *e = (struct drbd_epoch_entry *)w;
+	int ok = 1;
+
+	D_ASSERT(mdev->net_conf->wire_protocol == DRBD_PROT_C);
+	ok = drbd_send_ack(mdev, P_DISCARD_ACK, e);
+
+	spin_lock_irq(&mdev->req_lock);
+	D_ASSERT(!hlist_unhashed(&e->collision));
+	hlist_del_init(&e->collision);
+	spin_unlock_irq(&mdev->req_lock);
+
+	dec_unacked(mdev);
+
+	return ok;
+}
+
+/* Called from receive_Data.
+ * Synchronize packets on sock with packets on msock.
+ *
+ * This is here so even when a P_DATA packet traveling via sock overtook an Ack
+ * packet traveling on msock, they are still processed in the order they have
+ * been sent.
+ *
+ * Note: we don't care for Ack packets overtaking P_DATA packets.
+ *
+ * In case packet_seq is larger than mdev->peer_seq number, there are
+ * outstanding packets on the msock. We wait for them to arrive.
+ * In case we are the logically next packet, we update mdev->peer_seq
+ * ourselves. Correctly handles 32bit wrap around.
+ *
+ * Assume we have a 10 GBit connection, that is about 1<<30 byte per second,
+ * about 1<<21 sectors per second. So "worst" case, we have 1<<3 == 8 seconds
+ * for the 24bit wrap (historical atomic_t guarantee on some archs), and we have
+ * 1<<9 == 512 seconds aka ages for the 32bit wrap around...
+ *
+ * returns 0 if we may process the packet,
+ * -ERESTARTSYS if we were interrupted (by disconnect signal). */
+static int drbd_wait_peer_seq(struct drbd_conf *mdev, const u32 packet_seq)
+{
+	DEFINE_WAIT(wait);
+	unsigned int p_seq;
+	long timeout;
+	int ret = 0;
+	spin_lock(&mdev->peer_seq_lock);
+	for (;;) {
+		prepare_to_wait(&mdev->seq_wait, &wait, TASK_INTERRUPTIBLE);
+		if (seq_le(packet_seq, mdev->peer_seq+1))
+			break;
+		if (signal_pending(current)) {
+			ret = -ERESTARTSYS;
+			break;
+		}
+		p_seq = mdev->peer_seq;
+		spin_unlock(&mdev->peer_seq_lock);
+		timeout = schedule_timeout(30*HZ);
+		spin_lock(&mdev->peer_seq_lock);
+		if (timeout == 0 && p_seq == mdev->peer_seq) {
+			ret = -ETIMEDOUT;
+			dev_err(DEV, "ASSERT FAILED waited 30 seconds for sequence update, forcing reconnect\n");
+			break;
+		}
+	}
+	finish_wait(&mdev->seq_wait, &wait);
+	if (mdev->peer_seq+1 == packet_seq)
+		mdev->peer_seq++;
+	spin_unlock(&mdev->peer_seq_lock);
+	return ret;
+}
+
+/* see also bio_flags_to_wire()
+ * DRBD_REQ_*, because we need to semantically map the flags to data packet
+ * flags and back. We may replicate to other kernel versions. */
+static unsigned long wire_flags_to_bio(struct drbd_conf *mdev, u32 dpf)
+{
+	return  (dpf & DP_RW_SYNC ? REQ_SYNC : 0) |
+		(dpf & DP_FUA ? REQ_FUA : 0) |
+		(dpf & DP_FLUSH ? REQ_FLUSH : 0) |
+		(dpf & DP_DISCARD ? REQ_DISCARD : 0);
+}
+
+/* mirrored write */
+static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size)
+{
+	sector_t sector;
+	struct drbd_epoch_entry *e;
+	struct p_data *p = &mdev->data.rbuf.data;
+	int rw = WRITE;
+	u32 dp_flags;
+
+	if (!get_ldev(mdev)) {
+		spin_lock(&mdev->peer_seq_lock);
+		if (mdev->peer_seq+1 == be32_to_cpu(p->seq_num))
+			mdev->peer_seq++;
+		spin_unlock(&mdev->peer_seq_lock);
+
+		drbd_send_ack_dp(mdev, P_NEG_ACK, p, data_size);
+		atomic_inc(&mdev->current_epoch->epoch_size);
+		return drbd_drain_block(mdev, data_size);
+	}
+
+	/* get_ldev(mdev) successful.
+	 * Corresponding put_ldev done either below (on various errors),
+	 * or in drbd_endio_write_sec, if we successfully submit the data at
+	 * the end of this function. */
+
+	sector = be64_to_cpu(p->sector);
+	e = read_in_block(mdev, p->block_id, sector, data_size);
+	if (!e) {
+		put_ldev(mdev);
+		return false;
+	}
+
+	e->w.cb = e_end_block;
+
+	dp_flags = be32_to_cpu(p->dp_flags);
+	rw |= wire_flags_to_bio(mdev, dp_flags);
+
+	if (dp_flags & DP_MAY_SET_IN_SYNC)
+		e->flags |= EE_MAY_SET_IN_SYNC;
+
+	spin_lock(&mdev->epoch_lock);
+	e->epoch = mdev->current_epoch;
+	atomic_inc(&e->epoch->epoch_size);
+	atomic_inc(&e->epoch->active);
+	spin_unlock(&mdev->epoch_lock);
+
+	/* I'm the receiver, I do hold a net_cnt reference. */
+	if (!mdev->net_conf->two_primaries) {
+		spin_lock_irq(&mdev->req_lock);
+	} else {
+		/* don't get the req_lock yet,
+		 * we may sleep in drbd_wait_peer_seq */
+		const int size = e->size;
+		const int discard = test_bit(DISCARD_CONCURRENT, &mdev->flags);
+		DEFINE_WAIT(wait);
+		struct drbd_request *i;
+		struct hlist_node *n;
+		struct hlist_head *slot;
+		int first;
+
+		D_ASSERT(mdev->net_conf->wire_protocol == DRBD_PROT_C);
+		BUG_ON(mdev->ee_hash == NULL);
+		BUG_ON(mdev->tl_hash == NULL);
+
+		/* conflict detection and handling:
+		 * 1. wait on the sequence number,
+		 *    in case this data packet overtook ACK packets.
+		 * 2. check our hash tables for conflicting requests.
+		 *    we only need to walk the tl_hash, since an ee can not
+		 *    have a conflict with an other ee: on the submitting
+		 *    node, the corresponding req had already been conflicting,
+		 *    and a conflicting req is never sent.
+		 *
+		 * Note: for two_primaries, we are protocol C,
+		 * so there cannot be any request that is DONE
+		 * but still on the transfer log.
+		 *
+		 * unconditionally add to the ee_hash.
+		 *
+		 * if no conflicting request is found:
+		 *    submit.
+		 *
+		 * if any conflicting request is found
+		 * that has not yet been acked,
+		 * AND I have the "discard concurrent writes" flag:
+		 *	 queue (via done_ee) the P_DISCARD_ACK; OUT.
+		 *
+		 * if any conflicting request is found:
+		 *	 block the receiver, waiting on misc_wait
+		 *	 until no more conflicting requests are there,
+		 *	 or we get interrupted (disconnect).
+		 *
+		 *	 we do not just write after local io completion of those
+		 *	 requests, but only after req is done completely, i.e.
+		 *	 we wait for the P_DISCARD_ACK to arrive!
+		 *
+		 *	 then proceed normally, i.e. submit.
+		 */
+		if (drbd_wait_peer_seq(mdev, be32_to_cpu(p->seq_num)))
+			goto out_interrupted;
+
+		spin_lock_irq(&mdev->req_lock);
+
+		hlist_add_head(&e->collision, ee_hash_slot(mdev, sector));
+
+#define OVERLAPS overlaps(i->sector, i->size, sector, size)
+		slot = tl_hash_slot(mdev, sector);
+		first = 1;
+		for (;;) {
+			int have_unacked = 0;
+			int have_conflict = 0;
+			prepare_to_wait(&mdev->misc_wait, &wait,
+				TASK_INTERRUPTIBLE);
+			hlist_for_each_entry(i, n, slot, collision) {
+				if (OVERLAPS) {
+					/* only ALERT on first iteration,
+					 * we may be woken up early... */
+					if (first)
+						dev_alert(DEV, "%s[%u] Concurrent local write detected!"
+						      "	new: %llus +%u; pending: %llus +%u\n",
+						      current->comm, current->pid,
+						      (unsigned long long)sector, size,
+						      (unsigned long long)i->sector, i->size);
+					if (i->rq_state & RQ_NET_PENDING)
+						++have_unacked;
+					++have_conflict;
+				}
+			}
+#undef OVERLAPS
+			if (!have_conflict)
+				break;
+
+			/* Discard Ack only for the _first_ iteration */
+			if (first && discard && have_unacked) {
+				dev_alert(DEV, "Concurrent write! [DISCARD BY FLAG] sec=%llus\n",
+				     (unsigned long long)sector);
+				inc_unacked(mdev);
+				e->w.cb = e_send_discard_ack;
+				list_add_tail(&e->w.list, &mdev->done_ee);
+
+				spin_unlock_irq(&mdev->req_lock);
+
+				/* we could probably send that P_DISCARD_ACK ourselves,
+				 * but I don't like the receiver using the msock */
+
+				put_ldev(mdev);
+				wake_asender(mdev);
+				finish_wait(&mdev->misc_wait, &wait);
+				return true;
+			}
+
+			if (signal_pending(current)) {
+				hlist_del_init(&e->collision);
+
+				spin_unlock_irq(&mdev->req_lock);
+
+				finish_wait(&mdev->misc_wait, &wait);
+				goto out_interrupted;
+			}
+
+			spin_unlock_irq(&mdev->req_lock);
+			if (first) {
+				first = 0;
+				dev_alert(DEV, "Concurrent write! [W AFTERWARDS] "
+				     "sec=%llus\n", (unsigned long long)sector);
+			} else if (discard) {
+				/* we had none on the first iteration.
+				 * there must be none now. */
+				D_ASSERT(have_unacked == 0);
+			}
+			schedule();
+			spin_lock_irq(&mdev->req_lock);
+		}
+		finish_wait(&mdev->misc_wait, &wait);
+	}
+
+	list_add(&e->w.list, &mdev->active_ee);
+	spin_unlock_irq(&mdev->req_lock);
+
+	switch (mdev->net_conf->wire_protocol) {
+	case DRBD_PROT_C:
+		inc_unacked(mdev);
+		/* corresponding dec_unacked() in e_end_block()
+		 * respective _drbd_clear_done_ee */
+		break;
+	case DRBD_PROT_B:
+		/* I really don't like it that the receiver thread
+		 * sends on the msock, but anyways */
+		drbd_send_ack(mdev, P_RECV_ACK, e);
+		break;
+	case DRBD_PROT_A:
+		/* nothing to do */
+		break;
+	}
+
+	if (mdev->state.pdsk < D_INCONSISTENT) {
+		/* In case we have the only disk of the cluster, */
+		drbd_set_out_of_sync(mdev, e->sector, e->size);
+		e->flags |= EE_CALL_AL_COMPLETE_IO;
+		e->flags &= ~EE_MAY_SET_IN_SYNC;
+		drbd_al_begin_io(mdev, e->sector);
+	}
+
+	if (drbd_submit_ee(mdev, e, rw, DRBD_FAULT_DT_WR) == 0)
+		return true;
+
+	/* don't care for the reason here */
+	dev_err(DEV, "submit failed, triggering re-connect\n");
+	spin_lock_irq(&mdev->req_lock);
+	list_del(&e->w.list);
+	hlist_del_init(&e->collision);
+	spin_unlock_irq(&mdev->req_lock);
+	if (e->flags & EE_CALL_AL_COMPLETE_IO)
+		drbd_al_complete_io(mdev, e->sector);
+
+out_interrupted:
+	drbd_may_finish_epoch(mdev, e->epoch, EV_PUT + EV_CLEANUP);
+	put_ldev(mdev);
+	drbd_free_ee(mdev, e);
+	return false;
+}
+
+/* We may throttle resync, if the lower device seems to be busy,
+ * and current sync rate is above c_min_rate.
+ *
+ * To decide whether or not the lower device is busy, we use a scheme similar
+ * to MD RAID is_mddev_idle(): if the partition stats reveal "significant"
+ * (more than 64 sectors) of activity we cannot account for with our own resync
+ * activity, it obviously is "busy".
+ *
+ * The current sync rate used here uses only the most recent two step marks,
+ * to have a short time average so we can react faster.
+ */
+int drbd_rs_should_slow_down(struct drbd_conf *mdev, sector_t sector)
+{
+	struct gendisk *disk = mdev->ldev->backing_bdev->bd_contains->bd_disk;
+	unsigned long db, dt, dbdt;
+	struct lc_element *tmp;
+	int curr_events;
+	int throttle = 0;
+
+	/* feature disabled? */
+	if (mdev->sync_conf.c_min_rate == 0)
+		return 0;
+
+	spin_lock_irq(&mdev->al_lock);
+	tmp = lc_find(mdev->resync, BM_SECT_TO_EXT(sector));
+	if (tmp) {
+		struct bm_extent *bm_ext = lc_entry(tmp, struct bm_extent, lce);
+		if (test_bit(BME_PRIORITY, &bm_ext->flags)) {
+			spin_unlock_irq(&mdev->al_lock);
+			return 0;
+		}
+		/* Do not slow down if app IO is already waiting for this extent */
+	}
+	spin_unlock_irq(&mdev->al_lock);
+
+	curr_events = (int)part_stat_read(&disk->part0, sectors[0]) +
+		      (int)part_stat_read(&disk->part0, sectors[1]) -
+			atomic_read(&mdev->rs_sect_ev);
+
+	if (!mdev->rs_last_events || curr_events - mdev->rs_last_events > 64) {
+		unsigned long rs_left;
+		int i;
+
+		mdev->rs_last_events = curr_events;
+
+		/* sync speed average over the last 2*DRBD_SYNC_MARK_STEP,
+		 * approx. */
+		i = (mdev->rs_last_mark + DRBD_SYNC_MARKS-1) % DRBD_SYNC_MARKS;
+
+		if (mdev->state.conn == C_VERIFY_S || mdev->state.conn == C_VERIFY_T)
+			rs_left = mdev->ov_left;
+		else
+			rs_left = drbd_bm_total_weight(mdev) - mdev->rs_failed;
+
+		dt = ((long)jiffies - (long)mdev->rs_mark_time[i]) / HZ;
+		if (!dt)
+			dt++;
+		db = mdev->rs_mark_left[i] - rs_left;
+		dbdt = Bit2KB(db/dt);
+
+		if (dbdt > mdev->sync_conf.c_min_rate)
+			throttle = 1;
+	}
+	return throttle;
+}
+
+
+static int receive_DataRequest(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int digest_size)
+{
+	sector_t sector;
+	const sector_t capacity = drbd_get_capacity(mdev->this_bdev);
+	struct drbd_epoch_entry *e;
+	struct digest_info *di = NULL;
+	int size, verb;
+	unsigned int fault_type;
+	struct p_block_req *p =	&mdev->data.rbuf.block_req;
+
+	sector = be64_to_cpu(p->sector);
+	size   = be32_to_cpu(p->blksize);
+
+	if (size <= 0 || (size & 0x1ff) != 0 || size > DRBD_MAX_BIO_SIZE) {
+		dev_err(DEV, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__,
+				(unsigned long long)sector, size);
+		return false;
+	}
+	if (sector + (size>>9) > capacity) {
+		dev_err(DEV, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__,
+				(unsigned long long)sector, size);
+		return false;
+	}
+
+	if (!get_ldev_if_state(mdev, D_UP_TO_DATE)) {
+		verb = 1;
+		switch (cmd) {
+		case P_DATA_REQUEST:
+			drbd_send_ack_rp(mdev, P_NEG_DREPLY, p);
+			break;
+		case P_RS_DATA_REQUEST:
+		case P_CSUM_RS_REQUEST:
+		case P_OV_REQUEST:
+			drbd_send_ack_rp(mdev, P_NEG_RS_DREPLY , p);
+			break;
+		case P_OV_REPLY:
+			verb = 0;
+			dec_rs_pending(mdev);
+			drbd_send_ack_ex(mdev, P_OV_RESULT, sector, size, ID_IN_SYNC);
+			break;
+		default:
+			dev_err(DEV, "unexpected command (%s) in receive_DataRequest\n",
+				cmdname(cmd));
+		}
+		if (verb && __ratelimit(&drbd_ratelimit_state))
+			dev_err(DEV, "Can not satisfy peer's read request, "
+			    "no local data.\n");
+
+		/* drain possibly payload */
+		return drbd_drain_block(mdev, digest_size);
+	}
+
+	/* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD
+	 * "criss-cross" setup, that might cause write-out on some other DRBD,
+	 * which in turn might block on the other node at this very place.  */
+	e = drbd_alloc_ee(mdev, p->block_id, sector, size, GFP_NOIO);
+	if (!e) {
+		put_ldev(mdev);
+		return false;
+	}
+
+	switch (cmd) {
+	case P_DATA_REQUEST:
+		e->w.cb = w_e_end_data_req;
+		fault_type = DRBD_FAULT_DT_RD;
+		/* application IO, don't drbd_rs_begin_io */
+		goto submit;
+
+	case P_RS_DATA_REQUEST:
+		e->w.cb = w_e_end_rsdata_req;
+		fault_type = DRBD_FAULT_RS_RD;
+		/* used in the sector offset progress display */
+		mdev->bm_resync_fo = BM_SECT_TO_BIT(sector);
+		break;
+
+	case P_OV_REPLY:
+	case P_CSUM_RS_REQUEST:
+		fault_type = DRBD_FAULT_RS_RD;
+		di = kmalloc(sizeof(*di) + digest_size, GFP_NOIO);
+		if (!di)
+			goto out_free_e;
+
+		di->digest_size = digest_size;
+		di->digest = (((char *)di)+sizeof(struct digest_info));
+
+		e->digest = di;
+		e->flags |= EE_HAS_DIGEST;
+
+		if (drbd_recv(mdev, di->digest, digest_size) != digest_size)
+			goto out_free_e;
+
+		if (cmd == P_CSUM_RS_REQUEST) {
+			D_ASSERT(mdev->agreed_pro_version >= 89);
+			e->w.cb = w_e_end_csum_rs_req;
+			/* used in the sector offset progress display */
+			mdev->bm_resync_fo = BM_SECT_TO_BIT(sector);
+		} else if (cmd == P_OV_REPLY) {
+			/* track progress, we may need to throttle */
+			atomic_add(size >> 9, &mdev->rs_sect_in);
+			e->w.cb = w_e_end_ov_reply;
+			dec_rs_pending(mdev);
+			/* drbd_rs_begin_io done when we sent this request,
+			 * but accounting still needs to be done. */
+			goto submit_for_resync;
+		}
+		break;
+
+	case P_OV_REQUEST:
+		if (mdev->ov_start_sector == ~(sector_t)0 &&
+		    mdev->agreed_pro_version >= 90) {
+			unsigned long now = jiffies;
+			int i;
+			mdev->ov_start_sector = sector;
+			mdev->ov_position = sector;
+			mdev->ov_left = drbd_bm_bits(mdev) - BM_SECT_TO_BIT(sector);
+			mdev->rs_total = mdev->ov_left;
+			for (i = 0; i < DRBD_SYNC_MARKS; i++) {
+				mdev->rs_mark_left[i] = mdev->ov_left;
+				mdev->rs_mark_time[i] = now;
+			}
+			dev_info(DEV, "Online Verify start sector: %llu\n",
+					(unsigned long long)sector);
+		}
+		e->w.cb = w_e_end_ov_req;
+		fault_type = DRBD_FAULT_RS_RD;
+		break;
+
+	default:
+		dev_err(DEV, "unexpected command (%s) in receive_DataRequest\n",
+		    cmdname(cmd));
+		fault_type = DRBD_FAULT_MAX;
+		goto out_free_e;
+	}
+
+	/* Throttle, drbd_rs_begin_io and submit should become asynchronous
+	 * wrt the receiver, but it is not as straightforward as it may seem.
+	 * Various places in the resync start and stop logic assume resync
+	 * requests are processed in order, requeuing this on the worker thread
+	 * introduces a bunch of new code for synchronization between threads.
+	 *
+	 * Unlimited throttling before drbd_rs_begin_io may stall the resync
+	 * "forever", throttling after drbd_rs_begin_io will lock that extent
+	 * for application writes for the same time.  For now, just throttle
+	 * here, where the rest of the code expects the receiver to sleep for
+	 * a while, anyways.
+	 */
+
+	/* Throttle before drbd_rs_begin_io, as that locks out application IO;
+	 * this defers syncer requests for some time, before letting at least
+	 * on request through.  The resync controller on the receiving side
+	 * will adapt to the incoming rate accordingly.
+	 *
+	 * We cannot throttle here if remote is Primary/SyncTarget:
+	 * we would also throttle its application reads.
+	 * In that case, throttling is done on the SyncTarget only.
+	 */
+	if (mdev->state.peer != R_PRIMARY && drbd_rs_should_slow_down(mdev, sector))
+		schedule_timeout_uninterruptible(HZ/10);
+	if (drbd_rs_begin_io(mdev, sector))
+		goto out_free_e;
+
+submit_for_resync:
+	atomic_add(size >> 9, &mdev->rs_sect_ev);
+
+submit:
+	inc_unacked(mdev);
+	spin_lock_irq(&mdev->req_lock);
+	list_add_tail(&e->w.list, &mdev->read_ee);
+	spin_unlock_irq(&mdev->req_lock);
+
+	if (drbd_submit_ee(mdev, e, READ, fault_type) == 0)
+		return true;
+
+	/* don't care for the reason here */
+	dev_err(DEV, "submit failed, triggering re-connect\n");
+	spin_lock_irq(&mdev->req_lock);
+	list_del(&e->w.list);
+	spin_unlock_irq(&mdev->req_lock);
+	/* no drbd_rs_complete_io(), we are dropping the connection anyways */
+
+out_free_e:
+	put_ldev(mdev);
+	drbd_free_ee(mdev, e);
+	return false;
+}
+
+static int drbd_asb_recover_0p(struct drbd_conf *mdev) __must_hold(local)
+{
+	int self, peer, rv = -100;
+	unsigned long ch_self, ch_peer;
+
+	self = mdev->ldev->md.uuid[UI_BITMAP] & 1;
+	peer = mdev->p_uuid[UI_BITMAP] & 1;
+
+	ch_peer = mdev->p_uuid[UI_SIZE];
+	ch_self = mdev->comm_bm_set;
+
+	switch (mdev->net_conf->after_sb_0p) {
+	case ASB_CONSENSUS:
+	case ASB_DISCARD_SECONDARY:
+	case ASB_CALL_HELPER:
+		dev_err(DEV, "Configuration error.\n");
+		break;
+	case ASB_DISCONNECT:
+		break;
+	case ASB_DISCARD_YOUNGER_PRI:
+		if (self == 0 && peer == 1) {
+			rv = -1;
+			break;
+		}
+		if (self == 1 && peer == 0) {
+			rv =  1;
+			break;
+		}
+		/* Else fall through to one of the other strategies... */
+	case ASB_DISCARD_OLDER_PRI:
+		if (self == 0 && peer == 1) {
+			rv = 1;
+			break;
+		}
+		if (self == 1 && peer == 0) {
+			rv = -1;
+			break;
+		}
+		/* Else fall through to one of the other strategies... */
+		dev_warn(DEV, "Discard younger/older primary did not find a decision\n"
+		     "Using discard-least-changes instead\n");
+	case ASB_DISCARD_ZERO_CHG:
+		if (ch_peer == 0 && ch_self == 0) {
+			rv = test_bit(DISCARD_CONCURRENT, &mdev->flags)
+				? -1 : 1;
+			break;
+		} else {
+			if (ch_peer == 0) { rv =  1; break; }
+			if (ch_self == 0) { rv = -1; break; }
+		}
+		if (mdev->net_conf->after_sb_0p == ASB_DISCARD_ZERO_CHG)
+			break;
+	case ASB_DISCARD_LEAST_CHG:
+		if	(ch_self < ch_peer)
+			rv = -1;
+		else if (ch_self > ch_peer)
+			rv =  1;
+		else /* ( ch_self == ch_peer ) */
+		     /* Well, then use something else. */
+			rv = test_bit(DISCARD_CONCURRENT, &mdev->flags)
+				? -1 : 1;
+		break;
+	case ASB_DISCARD_LOCAL:
+		rv = -1;
+		break;
+	case ASB_DISCARD_REMOTE:
+		rv =  1;
+	}
+
+	return rv;
+}
+
+static int drbd_asb_recover_1p(struct drbd_conf *mdev) __must_hold(local)
+{
+	int hg, rv = -100;
+
+	switch (mdev->net_conf->after_sb_1p) {
+	case ASB_DISCARD_YOUNGER_PRI:
+	case ASB_DISCARD_OLDER_PRI:
+	case ASB_DISCARD_LEAST_CHG:
+	case ASB_DISCARD_LOCAL:
+	case ASB_DISCARD_REMOTE:
+		dev_err(DEV, "Configuration error.\n");
+		break;
+	case ASB_DISCONNECT:
+		break;
+	case ASB_CONSENSUS:
+		hg = drbd_asb_recover_0p(mdev);
+		if (hg == -1 && mdev->state.role == R_SECONDARY)
+			rv = hg;
+		if (hg == 1  && mdev->state.role == R_PRIMARY)
+			rv = hg;
+		break;
+	case ASB_VIOLENTLY:
+		rv = drbd_asb_recover_0p(mdev);
+		break;
+	case ASB_DISCARD_SECONDARY:
+		return mdev->state.role == R_PRIMARY ? 1 : -1;
+	case ASB_CALL_HELPER:
+		hg = drbd_asb_recover_0p(mdev);
+		if (hg == -1 && mdev->state.role == R_PRIMARY) {
+			enum drbd_state_rv rv2;
+
+			drbd_set_role(mdev, R_SECONDARY, 0);
+			 /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE,
+			  * we might be here in C_WF_REPORT_PARAMS which is transient.
+			  * we do not need to wait for the after state change work either. */
+			rv2 = drbd_change_state(mdev, CS_VERBOSE, NS(role, R_SECONDARY));
+			if (rv2 != SS_SUCCESS) {
+				drbd_khelper(mdev, "pri-lost-after-sb");
+			} else {
+				dev_warn(DEV, "Successfully gave up primary role.\n");
+				rv = hg;
+			}
+		} else
+			rv = hg;
+	}
+
+	return rv;
+}
+
+static int drbd_asb_recover_2p(struct drbd_conf *mdev) __must_hold(local)
+{
+	int hg, rv = -100;
+
+	switch (mdev->net_conf->after_sb_2p) {
+	case ASB_DISCARD_YOUNGER_PRI:
+	case ASB_DISCARD_OLDER_PRI:
+	case ASB_DISCARD_LEAST_CHG:
+	case ASB_DISCARD_LOCAL:
+	case ASB_DISCARD_REMOTE:
+	case ASB_CONSENSUS:
+	case ASB_DISCARD_SECONDARY:
+		dev_err(DEV, "Configuration error.\n");
+		break;
+	case ASB_VIOLENTLY:
+		rv = drbd_asb_recover_0p(mdev);
+		break;
+	case ASB_DISCONNECT:
+		break;
+	case ASB_CALL_HELPER:
+		hg = drbd_asb_recover_0p(mdev);
+		if (hg == -1) {
+			enum drbd_state_rv rv2;
+
+			 /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE,
+			  * we might be here in C_WF_REPORT_PARAMS which is transient.
+			  * we do not need to wait for the after state change work either. */
+			rv2 = drbd_change_state(mdev, CS_VERBOSE, NS(role, R_SECONDARY));
+			if (rv2 != SS_SUCCESS) {
+				drbd_khelper(mdev, "pri-lost-after-sb");
+			} else {
+				dev_warn(DEV, "Successfully gave up primary role.\n");
+				rv = hg;
+			}
+		} else
+			rv = hg;
+	}
+
+	return rv;
+}
+
+static void drbd_uuid_dump(struct drbd_conf *mdev, char *text, u64 *uuid,
+			   u64 bits, u64 flags)
+{
+	if (!uuid) {
+		dev_info(DEV, "%s uuid info vanished while I was looking!\n", text);
+		return;
+	}
+	dev_info(DEV, "%s %016llX:%016llX:%016llX:%016llX bits:%llu flags:%llX\n",
+	     text,
+	     (unsigned long long)uuid[UI_CURRENT],
+	     (unsigned long long)uuid[UI_BITMAP],
+	     (unsigned long long)uuid[UI_HISTORY_START],
+	     (unsigned long long)uuid[UI_HISTORY_END],
+	     (unsigned long long)bits,
+	     (unsigned long long)flags);
+}
+
+/*
+  100	after split brain try auto recover
+    2	C_SYNC_SOURCE set BitMap
+    1	C_SYNC_SOURCE use BitMap
+    0	no Sync
+   -1	C_SYNC_TARGET use BitMap
+   -2	C_SYNC_TARGET set BitMap
+ -100	after split brain, disconnect
+-1000	unrelated data
+-1091   requires proto 91
+-1096   requires proto 96
+ */
+static int drbd_uuid_compare(struct drbd_conf *mdev, int *rule_nr) __must_hold(local)
+{
+	u64 self, peer;
+	int i, j;
+
+	self = mdev->ldev->md.uuid[UI_CURRENT] & ~((u64)1);
+	peer = mdev->p_uuid[UI_CURRENT] & ~((u64)1);
+
+	*rule_nr = 10;
+	if (self == UUID_JUST_CREATED && peer == UUID_JUST_CREATED)
+		return 0;
+
+	*rule_nr = 20;
+	if ((self == UUID_JUST_CREATED || self == (u64)0) &&
+	     peer != UUID_JUST_CREATED)
+		return -2;
+
+	*rule_nr = 30;
+	if (self != UUID_JUST_CREATED &&
+	    (peer == UUID_JUST_CREATED || peer == (u64)0))
+		return 2;
+
+	if (self == peer) {
+		int rct, dc; /* roles at crash time */
+
+		if (mdev->p_uuid[UI_BITMAP] == (u64)0 && mdev->ldev->md.uuid[UI_BITMAP] != (u64)0) {
+
+			if (mdev->agreed_pro_version < 91)
+				return -1091;
+
+			if ((mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1)) == (mdev->p_uuid[UI_HISTORY_START] & ~((u64)1)) &&
+			    (mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (mdev->p_uuid[UI_HISTORY_START + 1] & ~((u64)1))) {
+				dev_info(DEV, "was SyncSource, missed the resync finished event, corrected myself:\n");
+				drbd_uuid_set_bm(mdev, 0UL);
+
+				drbd_uuid_dump(mdev, "self", mdev->ldev->md.uuid,
+					       mdev->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(mdev) : 0, 0);
+				*rule_nr = 34;
+			} else {
+				dev_info(DEV, "was SyncSource (peer failed to write sync_uuid)\n");
+				*rule_nr = 36;
+			}
+
+			return 1;
+		}
+
+		if (mdev->ldev->md.uuid[UI_BITMAP] == (u64)0 && mdev->p_uuid[UI_BITMAP] != (u64)0) {
+
+			if (mdev->agreed_pro_version < 91)
+				return -1091;
+
+			if ((mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (mdev->p_uuid[UI_BITMAP] & ~((u64)1)) &&
+			    (mdev->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) == (mdev->p_uuid[UI_HISTORY_START] & ~((u64)1))) {
+				dev_info(DEV, "was SyncTarget, peer missed the resync finished event, corrected peer:\n");
+
+				mdev->p_uuid[UI_HISTORY_START + 1] = mdev->p_uuid[UI_HISTORY_START];
+				mdev->p_uuid[UI_HISTORY_START] = mdev->p_uuid[UI_BITMAP];
+				mdev->p_uuid[UI_BITMAP] = 0UL;
+
+				drbd_uuid_dump(mdev, "peer", mdev->p_uuid, mdev->p_uuid[UI_SIZE], mdev->p_uuid[UI_FLAGS]);
+				*rule_nr = 35;
+			} else {
+				dev_info(DEV, "was SyncTarget (failed to write sync_uuid)\n");
+				*rule_nr = 37;
+			}
+
+			return -1;
+		}
+
+		/* Common power [off|failure] */
+		rct = (test_bit(CRASHED_PRIMARY, &mdev->flags) ? 1 : 0) +
+			(mdev->p_uuid[UI_FLAGS] & 2);
+		/* lowest bit is set when we were primary,
+		 * next bit (weight 2) is set when peer was primary */
+		*rule_nr = 40;
+
+		switch (rct) {
+		case 0: /* !self_pri && !peer_pri */ return 0;
+		case 1: /*  self_pri && !peer_pri */ return 1;
+		case 2: /* !self_pri &&  peer_pri */ return -1;
+		case 3: /*  self_pri &&  peer_pri */
+			dc = test_bit(DISCARD_CONCURRENT, &mdev->flags);
+			return dc ? -1 : 1;
+		}
+	}
+
+	*rule_nr = 50;
+	peer = mdev->p_uuid[UI_BITMAP] & ~((u64)1);
+	if (self == peer)
+		return -1;
+
+	*rule_nr = 51;
+	peer = mdev->p_uuid[UI_HISTORY_START] & ~((u64)1);
+	if (self == peer) {
+		if (mdev->agreed_pro_version < 96 ?
+		    (mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) ==
+		    (mdev->p_uuid[UI_HISTORY_START + 1] & ~((u64)1)) :
+		    peer + UUID_NEW_BM_OFFSET == (mdev->p_uuid[UI_BITMAP] & ~((u64)1))) {
+			/* The last P_SYNC_UUID did not get though. Undo the last start of
+			   resync as sync source modifications of the peer's UUIDs. */
+
+			if (mdev->agreed_pro_version < 91)
+				return -1091;
+
+			mdev->p_uuid[UI_BITMAP] = mdev->p_uuid[UI_HISTORY_START];
+			mdev->p_uuid[UI_HISTORY_START] = mdev->p_uuid[UI_HISTORY_START + 1];
+
+			dev_info(DEV, "Did not got last syncUUID packet, corrected:\n");
+			drbd_uuid_dump(mdev, "peer", mdev->p_uuid, mdev->p_uuid[UI_SIZE], mdev->p_uuid[UI_FLAGS]);
+
+			return -1;
+		}
+	}
+
+	*rule_nr = 60;
+	self = mdev->ldev->md.uuid[UI_CURRENT] & ~((u64)1);
+	for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
+		peer = mdev->p_uuid[i] & ~((u64)1);
+		if (self == peer)
+			return -2;
+	}
+
+	*rule_nr = 70;
+	self = mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1);
+	peer = mdev->p_uuid[UI_CURRENT] & ~((u64)1);
+	if (self == peer)
+		return 1;
+
+	*rule_nr = 71;
+	self = mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1);
+	if (self == peer) {
+		if (mdev->agreed_pro_version < 96 ?
+		    (mdev->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) ==
+		    (mdev->p_uuid[UI_HISTORY_START] & ~((u64)1)) :
+		    self + UUID_NEW_BM_OFFSET == (mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1))) {
+			/* The last P_SYNC_UUID did not get though. Undo the last start of
+			   resync as sync source modifications of our UUIDs. */
+
+			if (mdev->agreed_pro_version < 91)
+				return -1091;
+
+			_drbd_uuid_set(mdev, UI_BITMAP, mdev->ldev->md.uuid[UI_HISTORY_START]);
+			_drbd_uuid_set(mdev, UI_HISTORY_START, mdev->ldev->md.uuid[UI_HISTORY_START + 1]);
+
+			dev_info(DEV, "Last syncUUID did not get through, corrected:\n");
+			drbd_uuid_dump(mdev, "self", mdev->ldev->md.uuid,
+				       mdev->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(mdev) : 0, 0);
+
+			return 1;
+		}
+	}
+
+
+	*rule_nr = 80;
+	peer = mdev->p_uuid[UI_CURRENT] & ~((u64)1);
+	for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
+		self = mdev->ldev->md.uuid[i] & ~((u64)1);
+		if (self == peer)
+			return 2;
+	}
+
+	*rule_nr = 90;
+	self = mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1);
+	peer = mdev->p_uuid[UI_BITMAP] & ~((u64)1);
+	if (self == peer && self != ((u64)0))
+		return 100;
+
+	*rule_nr = 100;
+	for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
+		self = mdev->ldev->md.uuid[i] & ~((u64)1);
+		for (j = UI_HISTORY_START; j <= UI_HISTORY_END; j++) {
+			peer = mdev->p_uuid[j] & ~((u64)1);
+			if (self == peer)
+				return -100;
+		}
+	}
+
+	return -1000;
+}
+
+/* drbd_sync_handshake() returns the new conn state on success, or
+   CONN_MASK (-1) on failure.
+ */
+static enum drbd_conns drbd_sync_handshake(struct drbd_conf *mdev, enum drbd_role peer_role,
+					   enum drbd_disk_state peer_disk) __must_hold(local)
+{
+	int hg, rule_nr;
+	enum drbd_conns rv = C_MASK;
+	enum drbd_disk_state mydisk;
+
+	mydisk = mdev->state.disk;
+	if (mydisk == D_NEGOTIATING)
+		mydisk = mdev->new_state_tmp.disk;
+
+	dev_info(DEV, "drbd_sync_handshake:\n");
+	drbd_uuid_dump(mdev, "self", mdev->ldev->md.uuid, mdev->comm_bm_set, 0);
+	drbd_uuid_dump(mdev, "peer", mdev->p_uuid,
+		       mdev->p_uuid[UI_SIZE], mdev->p_uuid[UI_FLAGS]);
+
+	hg = drbd_uuid_compare(mdev, &rule_nr);
+
+	dev_info(DEV, "uuid_compare()=%d by rule %d\n", hg, rule_nr);
+
+	if (hg == -1000) {
+		dev_alert(DEV, "Unrelated data, aborting!\n");
+		return C_MASK;
+	}
+	if (hg < -1000) {
+		dev_alert(DEV, "To resolve this both sides have to support at least protocol %d\n", -hg - 1000);
+		return C_MASK;
+	}
+
+	if    ((mydisk == D_INCONSISTENT && peer_disk > D_INCONSISTENT) ||
+	    (peer_disk == D_INCONSISTENT && mydisk    > D_INCONSISTENT)) {
+		int f = (hg == -100) || abs(hg) == 2;
+		hg = mydisk > D_INCONSISTENT ? 1 : -1;
+		if (f)
+			hg = hg*2;
+		dev_info(DEV, "Becoming sync %s due to disk states.\n",
+		     hg > 0 ? "source" : "target");
+	}
+
+	if (abs(hg) == 100)
+		drbd_khelper(mdev, "initial-split-brain");
+
+	if (hg == 100 || (hg == -100 && mdev->net_conf->always_asbp)) {
+		int pcount = (mdev->state.role == R_PRIMARY)
+			   + (peer_role == R_PRIMARY);
+		int forced = (hg == -100);
+
+		switch (pcount) {
+		case 0:
+			hg = drbd_asb_recover_0p(mdev);
+			break;
+		case 1:
+			hg = drbd_asb_recover_1p(mdev);
+			break;
+		case 2:
+			hg = drbd_asb_recover_2p(mdev);
+			break;
+		}
+		if (abs(hg) < 100) {
+			dev_warn(DEV, "Split-Brain detected, %d primaries, "
+			     "automatically solved. Sync from %s node\n",
+			     pcount, (hg < 0) ? "peer" : "this");
+			if (forced) {
+				dev_warn(DEV, "Doing a full sync, since"
+				     " UUIDs where ambiguous.\n");
+				hg = hg*2;
+			}
+		}
+	}
+
+	if (hg == -100) {
+		if (mdev->net_conf->want_lose && !(mdev->p_uuid[UI_FLAGS]&1))
+			hg = -1;
+		if (!mdev->net_conf->want_lose && (mdev->p_uuid[UI_FLAGS]&1))
+			hg = 1;
+
+		if (abs(hg) < 100)
+			dev_warn(DEV, "Split-Brain detected, manually solved. "
+			     "Sync from %s node\n",
+			     (hg < 0) ? "peer" : "this");
+	}
+
+	if (hg == -100) {
+		/* FIXME this log message is not correct if we end up here
+		 * after an attempted attach on a diskless node.
+		 * We just refuse to attach -- well, we drop the "connection"
+		 * to that disk, in a way... */
+		dev_alert(DEV, "Split-Brain detected but unresolved, dropping connection!\n");
+		drbd_khelper(mdev, "split-brain");
+		return C_MASK;
+	}
+
+	if (hg > 0 && mydisk <= D_INCONSISTENT) {
+		dev_err(DEV, "I shall become SyncSource, but I am inconsistent!\n");
+		return C_MASK;
+	}
+
+	if (hg < 0 && /* by intention we do not use mydisk here. */
+	    mdev->state.role == R_PRIMARY && mdev->state.disk >= D_CONSISTENT) {
+		switch (mdev->net_conf->rr_conflict) {
+		case ASB_CALL_HELPER:
+			drbd_khelper(mdev, "pri-lost");
+			/* fall through */
+		case ASB_DISCONNECT:
+			dev_err(DEV, "I shall become SyncTarget, but I am primary!\n");
+			return C_MASK;
+		case ASB_VIOLENTLY:
+			dev_warn(DEV, "Becoming SyncTarget, violating the stable-data"
+			     "assumption\n");
+		}
+	}
+
+	if (mdev->net_conf->dry_run || test_bit(CONN_DRY_RUN, &mdev->flags)) {
+		if (hg == 0)
+			dev_info(DEV, "dry-run connect: No resync, would become Connected immediately.\n");
+		else
+			dev_info(DEV, "dry-run connect: Would become %s, doing a %s resync.",
+				 drbd_conn_str(hg > 0 ? C_SYNC_SOURCE : C_SYNC_TARGET),
+				 abs(hg) >= 2 ? "full" : "bit-map based");
+		return C_MASK;
+	}
+
+	if (abs(hg) >= 2) {
+		dev_info(DEV, "Writing the whole bitmap, full sync required after drbd_sync_handshake.\n");
+		if (drbd_bitmap_io(mdev, &drbd_bmio_set_n_write, "set_n_write from sync_handshake",
+					BM_LOCKED_SET_ALLOWED))
+			return C_MASK;
+	}
+
+	if (hg > 0) { /* become sync source. */
+		rv = C_WF_BITMAP_S;
+	} else if (hg < 0) { /* become sync target */
+		rv = C_WF_BITMAP_T;
+	} else {
+		rv = C_CONNECTED;
+		if (drbd_bm_total_weight(mdev)) {
+			dev_info(DEV, "No resync, but %lu bits in bitmap!\n",
+			     drbd_bm_total_weight(mdev));
+		}
+	}
+
+	return rv;
+}
+
+/* returns 1 if invalid */
+static int cmp_after_sb(enum drbd_after_sb_p peer, enum drbd_after_sb_p self)
+{
+	/* ASB_DISCARD_REMOTE - ASB_DISCARD_LOCAL is valid */
+	if ((peer == ASB_DISCARD_REMOTE && self == ASB_DISCARD_LOCAL) ||
+	    (self == ASB_DISCARD_REMOTE && peer == ASB_DISCARD_LOCAL))
+		return 0;
+
+	/* any other things with ASB_DISCARD_REMOTE or ASB_DISCARD_LOCAL are invalid */
+	if (peer == ASB_DISCARD_REMOTE || peer == ASB_DISCARD_LOCAL ||
+	    self == ASB_DISCARD_REMOTE || self == ASB_DISCARD_LOCAL)
+		return 1;
+
+	/* everything else is valid if they are equal on both sides. */
+	if (peer == self)
+		return 0;
+
+	/* everything es is invalid. */
+	return 1;
+}
+
+static int receive_protocol(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size)
+{
+	struct p_protocol *p = &mdev->data.rbuf.protocol;
+	int p_proto, p_after_sb_0p, p_after_sb_1p, p_after_sb_2p;
+	int p_want_lose, p_two_primaries, cf;
+	char p_integrity_alg[SHARED_SECRET_MAX] = "";
+
+	p_proto		= be32_to_cpu(p->protocol);
+	p_after_sb_0p	= be32_to_cpu(p->after_sb_0p);
+	p_after_sb_1p	= be32_to_cpu(p->after_sb_1p);
+	p_after_sb_2p	= be32_to_cpu(p->after_sb_2p);
+	p_two_primaries = be32_to_cpu(p->two_primaries);
+	cf		= be32_to_cpu(p->conn_flags);
+	p_want_lose = cf & CF_WANT_LOSE;
+
+	clear_bit(CONN_DRY_RUN, &mdev->flags);
+
+	if (cf & CF_DRY_RUN)
+		set_bit(CONN_DRY_RUN, &mdev->flags);
+
+	if (p_proto != mdev->net_conf->wire_protocol) {
+		dev_err(DEV, "incompatible communication protocols\n");
+		goto disconnect;
+	}
+
+	if (cmp_after_sb(p_after_sb_0p, mdev->net_conf->after_sb_0p)) {
+		dev_err(DEV, "incompatible after-sb-0pri settings\n");
+		goto disconnect;
+	}
+
+	if (cmp_after_sb(p_after_sb_1p, mdev->net_conf->after_sb_1p)) {
+		dev_err(DEV, "incompatible after-sb-1pri settings\n");
+		goto disconnect;
+	}
+
+	if (cmp_after_sb(p_after_sb_2p, mdev->net_conf->after_sb_2p)) {
+		dev_err(DEV, "incompatible after-sb-2pri settings\n");
+		goto disconnect;
+	}
+
+	if (p_want_lose && mdev->net_conf->want_lose) {
+		dev_err(DEV, "both sides have the 'want_lose' flag set\n");
+		goto disconnect;
+	}
+
+	if (p_two_primaries != mdev->net_conf->two_primaries) {
+		dev_err(DEV, "incompatible setting of the two-primaries options\n");
+		goto disconnect;
+	}
+
+	if (mdev->agreed_pro_version >= 87) {
+		unsigned char *my_alg = mdev->net_conf->integrity_alg;
+
+		if (drbd_recv(mdev, p_integrity_alg, data_size) != data_size)
+			return false;
+
+		p_integrity_alg[SHARED_SECRET_MAX-1] = 0;
+		if (strcmp(p_integrity_alg, my_alg)) {
+			dev_err(DEV, "incompatible setting of the data-integrity-alg\n");
+			goto disconnect;
+		}
+		dev_info(DEV, "data-integrity-alg: %s\n",
+		     my_alg[0] ? my_alg : (unsigned char *)"<not-used>");
+	}
+
+	return true;
+
+disconnect:
+	drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
+	return false;
+}
+
+/* helper function
+ * input: alg name, feature name
+ * return: NULL (alg name was "")
+ *         ERR_PTR(error) if something goes wrong
+ *         or the crypto hash ptr, if it worked out ok. */
+struct crypto_hash *drbd_crypto_alloc_digest_safe(const struct drbd_conf *mdev,
+		const char *alg, const char *name)
+{
+	struct crypto_hash *tfm;
+
+	if (!alg[0])
+		return NULL;
+
+	tfm = crypto_alloc_hash(alg, 0, CRYPTO_ALG_ASYNC);
+	if (IS_ERR(tfm)) {
+		dev_err(DEV, "Can not allocate \"%s\" as %s (reason: %ld)\n",
+			alg, name, PTR_ERR(tfm));
+		return tfm;
+	}
+	if (!drbd_crypto_is_hash(crypto_hash_tfm(tfm))) {
+		crypto_free_hash(tfm);
+		dev_err(DEV, "\"%s\" is not a digest (%s)\n", alg, name);
+		return ERR_PTR(-EINVAL);
+	}
+	return tfm;
+}
+
+static int receive_SyncParam(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int packet_size)
+{
+	int ok = true;
+	struct p_rs_param_95 *p = &mdev->data.rbuf.rs_param_95;
+	unsigned int header_size, data_size, exp_max_sz;
+	struct crypto_hash *verify_tfm = NULL;
+	struct crypto_hash *csums_tfm = NULL;
+	const int apv = mdev->agreed_pro_version;
+	int *rs_plan_s = NULL;
+	int fifo_size = 0;
+
+	exp_max_sz  = apv <= 87 ? sizeof(struct p_rs_param)
+		    : apv == 88 ? sizeof(struct p_rs_param)
+					+ SHARED_SECRET_MAX
+		    : apv <= 94 ? sizeof(struct p_rs_param_89)
+		    : /* apv >= 95 */ sizeof(struct p_rs_param_95);
+
+	if (packet_size > exp_max_sz) {
+		dev_err(DEV, "SyncParam packet too long: received %u, expected <= %u bytes\n",
+		    packet_size, exp_max_sz);
+		return false;
+	}
+
+	if (apv <= 88) {
+		header_size = sizeof(struct p_rs_param) - sizeof(struct p_header80);
+		data_size   = packet_size  - header_size;
+	} else if (apv <= 94) {
+		header_size = sizeof(struct p_rs_param_89) - sizeof(struct p_header80);
+		data_size   = packet_size  - header_size;
+		D_ASSERT(data_size == 0);
+	} else {
+		header_size = sizeof(struct p_rs_param_95) - sizeof(struct p_header80);
+		data_size   = packet_size  - header_size;
+		D_ASSERT(data_size == 0);
+	}
+
+	/* initialize verify_alg and csums_alg */
+	memset(p->verify_alg, 0, 2 * SHARED_SECRET_MAX);
+
+	if (drbd_recv(mdev, &p->head.payload, header_size) != header_size)
+		return false;
+
+	mdev->sync_conf.rate	  = be32_to_cpu(p->rate);
+
+	if (apv >= 88) {
+		if (apv == 88) {
+			if (data_size > SHARED_SECRET_MAX) {
+				dev_err(DEV, "verify-alg too long, "
+				    "peer wants %u, accepting only %u byte\n",
+						data_size, SHARED_SECRET_MAX);
+				return false;
+			}
+
+			if (drbd_recv(mdev, p->verify_alg, data_size) != data_size)
+				return false;
+
+			/* we expect NUL terminated string */
+			/* but just in case someone tries to be evil */
+			D_ASSERT(p->verify_alg[data_size-1] == 0);
+			p->verify_alg[data_size-1] = 0;
+
+		} else /* apv >= 89 */ {
+			/* we still expect NUL terminated strings */
+			/* but just in case someone tries to be evil */
+			D_ASSERT(p->verify_alg[SHARED_SECRET_MAX-1] == 0);
+			D_ASSERT(p->csums_alg[SHARED_SECRET_MAX-1] == 0);
+			p->verify_alg[SHARED_SECRET_MAX-1] = 0;
+			p->csums_alg[SHARED_SECRET_MAX-1] = 0;
+		}
+
+		if (strcmp(mdev->sync_conf.verify_alg, p->verify_alg)) {
+			if (mdev->state.conn == C_WF_REPORT_PARAMS) {
+				dev_err(DEV, "Different verify-alg settings. me=\"%s\" peer=\"%s\"\n",
+				    mdev->sync_conf.verify_alg, p->verify_alg);
+				goto disconnect;
+			}
+			verify_tfm = drbd_crypto_alloc_digest_safe(mdev,
+					p->verify_alg, "verify-alg");
+			if (IS_ERR(verify_tfm)) {
+				verify_tfm = NULL;
+				goto disconnect;
+			}
+		}
+
+		if (apv >= 89 && strcmp(mdev->sync_conf.csums_alg, p->csums_alg)) {
+			if (mdev->state.conn == C_WF_REPORT_PARAMS) {
+				dev_err(DEV, "Different csums-alg settings. me=\"%s\" peer=\"%s\"\n",
+				    mdev->sync_conf.csums_alg, p->csums_alg);
+				goto disconnect;
+			}
+			csums_tfm = drbd_crypto_alloc_digest_safe(mdev,
+					p->csums_alg, "csums-alg");
+			if (IS_ERR(csums_tfm)) {
+				csums_tfm = NULL;
+				goto disconnect;
+			}
+		}
+
+		if (apv > 94) {
+			mdev->sync_conf.rate	  = be32_to_cpu(p->rate);
+			mdev->sync_conf.c_plan_ahead = be32_to_cpu(p->c_plan_ahead);
+			mdev->sync_conf.c_delay_target = be32_to_cpu(p->c_delay_target);
+			mdev->sync_conf.c_fill_target = be32_to_cpu(p->c_fill_target);
+			mdev->sync_conf.c_max_rate = be32_to_cpu(p->c_max_rate);
+
+			fifo_size = (mdev->sync_conf.c_plan_ahead * 10 * SLEEP_TIME) / HZ;
+			if (fifo_size != mdev->rs_plan_s.size && fifo_size > 0) {
+				rs_plan_s   = kzalloc(sizeof(int) * fifo_size, GFP_KERNEL);
+				if (!rs_plan_s) {
+					dev_err(DEV, "kmalloc of fifo_buffer failed");
+					goto disconnect;
+				}
+			}
+		}
+
+		spin_lock(&mdev->peer_seq_lock);
+		/* lock against drbd_nl_syncer_conf() */
+		if (verify_tfm) {
+			strcpy(mdev->sync_conf.verify_alg, p->verify_alg);
+			mdev->sync_conf.verify_alg_len = strlen(p->verify_alg) + 1;
+			crypto_free_hash(mdev->verify_tfm);
+			mdev->verify_tfm = verify_tfm;
+			dev_info(DEV, "using verify-alg: \"%s\"\n", p->verify_alg);
+		}
+		if (csums_tfm) {
+			strcpy(mdev->sync_conf.csums_alg, p->csums_alg);
+			mdev->sync_conf.csums_alg_len = strlen(p->csums_alg) + 1;
+			crypto_free_hash(mdev->csums_tfm);
+			mdev->csums_tfm = csums_tfm;
+			dev_info(DEV, "using csums-alg: \"%s\"\n", p->csums_alg);
+		}
+		if (fifo_size != mdev->rs_plan_s.size) {
+			kfree(mdev->rs_plan_s.values);
+			mdev->rs_plan_s.values = rs_plan_s;
+			mdev->rs_plan_s.size   = fifo_size;
+			mdev->rs_planed = 0;
+		}
+		spin_unlock(&mdev->peer_seq_lock);
+	}
+
+	return ok;
+disconnect:
+	/* just for completeness: actually not needed,
+	 * as this is not reached if csums_tfm was ok. */
+	crypto_free_hash(csums_tfm);
+	/* but free the verify_tfm again, if csums_tfm did not work out */
+	crypto_free_hash(verify_tfm);
+	drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
+	return false;
+}
+
+/* warn if the arguments differ by more than 12.5% */
+static void warn_if_differ_considerably(struct drbd_conf *mdev,
+	const char *s, sector_t a, sector_t b)
+{
+	sector_t d;
+	if (a == 0 || b == 0)
+		return;
+	d = (a > b) ? (a - b) : (b - a);
+	if (d > (a>>3) || d > (b>>3))
+		dev_warn(DEV, "Considerable difference in %s: %llus vs. %llus\n", s,
+		     (unsigned long long)a, (unsigned long long)b);
+}
+
+static int receive_sizes(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size)
+{
+	struct p_sizes *p = &mdev->data.rbuf.sizes;
+	enum determine_dev_size dd = unchanged;
+	sector_t p_size, p_usize, my_usize;
+	int ldsc = 0; /* local disk size changed */
+	enum dds_flags ddsf;
+
+	p_size = be64_to_cpu(p->d_size);
+	p_usize = be64_to_cpu(p->u_size);
+
+	if (p_size == 0 && mdev->state.disk == D_DISKLESS) {
+		dev_err(DEV, "some backing storage is needed\n");
+		drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
+		return false;
+	}
+
+	/* just store the peer's disk size for now.
+	 * we still need to figure out whether we accept that. */
+	mdev->p_size = p_size;
+
+	if (get_ldev(mdev)) {
+		warn_if_differ_considerably(mdev, "lower level device sizes",
+			   p_size, drbd_get_max_capacity(mdev->ldev));
+		warn_if_differ_considerably(mdev, "user requested size",
+					    p_usize, mdev->ldev->dc.disk_size);
+
+		/* if this is the first connect, or an otherwise expected
+		 * param exchange, choose the minimum */
+		if (mdev->state.conn == C_WF_REPORT_PARAMS)
+			p_usize = min_not_zero((sector_t)mdev->ldev->dc.disk_size,
+					     p_usize);
+
+		my_usize = mdev->ldev->dc.disk_size;
+
+		if (mdev->ldev->dc.disk_size != p_usize) {
+			mdev->ldev->dc.disk_size = p_usize;
+			dev_info(DEV, "Peer sets u_size to %lu sectors\n",
+			     (unsigned long)mdev->ldev->dc.disk_size);
+		}
+
+		/* Never shrink a device with usable data during connect.
+		   But allow online shrinking if we are connected. */
+		if (drbd_new_dev_size(mdev, mdev->ldev, 0) <
+		   drbd_get_capacity(mdev->this_bdev) &&
+		   mdev->state.disk >= D_OUTDATED &&
+		   mdev->state.conn < C_CONNECTED) {
+			dev_err(DEV, "The peer's disk size is too small!\n");
+			drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
+			mdev->ldev->dc.disk_size = my_usize;
+			put_ldev(mdev);
+			return false;
+		}
+		put_ldev(mdev);
+	}
+
+	ddsf = be16_to_cpu(p->dds_flags);
+	if (get_ldev(mdev)) {
+		dd = drbd_determine_dev_size(mdev, ddsf);
+		put_ldev(mdev);
+		if (dd == dev_size_error)
+			return false;
+		drbd_md_sync(mdev);
+	} else {
+		/* I am diskless, need to accept the peer's size. */
+		drbd_set_my_capacity(mdev, p_size);
+	}
+
+	mdev->peer_max_bio_size = be32_to_cpu(p->max_bio_size);
+	drbd_reconsider_max_bio_size(mdev);
+
+	if (get_ldev(mdev)) {
+		if (mdev->ldev->known_size != drbd_get_capacity(mdev->ldev->backing_bdev)) {
+			mdev->ldev->known_size = drbd_get_capacity(mdev->ldev->backing_bdev);
+			ldsc = 1;
+		}
+
+		put_ldev(mdev);
+	}
+
+	if (mdev->state.conn > C_WF_REPORT_PARAMS) {
+		if (be64_to_cpu(p->c_size) !=
+		    drbd_get_capacity(mdev->this_bdev) || ldsc) {
+			/* we have different sizes, probably peer
+			 * needs to know my new size... */
+			drbd_send_sizes(mdev, 0, ddsf);
+		}
+		if (test_and_clear_bit(RESIZE_PENDING, &mdev->flags) ||
+		    (dd == grew && mdev->state.conn == C_CONNECTED)) {
+			if (mdev->state.pdsk >= D_INCONSISTENT &&
+			    mdev->state.disk >= D_INCONSISTENT) {
+				if (ddsf & DDSF_NO_RESYNC)
+					dev_info(DEV, "Resync of new storage suppressed with --assume-clean\n");
+				else
+					resync_after_online_grow(mdev);
+			} else
+				set_bit(RESYNC_AFTER_NEG, &mdev->flags);
+		}
+	}
+
+	return true;
+}
+
+static int receive_uuids(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size)
+{
+	struct p_uuids *p = &mdev->data.rbuf.uuids;
+	u64 *p_uuid;
+	int i, updated_uuids = 0;
+
+	p_uuid = kmalloc(sizeof(u64)*UI_EXTENDED_SIZE, GFP_NOIO);
+
+	for (i = UI_CURRENT; i < UI_EXTENDED_SIZE; i++)
+		p_uuid[i] = be64_to_cpu(p->uuid[i]);
+
+	kfree(mdev->p_uuid);
+	mdev->p_uuid = p_uuid;
+
+	if (mdev->state.conn < C_CONNECTED &&
+	    mdev->state.disk < D_INCONSISTENT &&
+	    mdev->state.role == R_PRIMARY &&
+	    (mdev->ed_uuid & ~((u64)1)) != (p_uuid[UI_CURRENT] & ~((u64)1))) {
+		dev_err(DEV, "Can only connect to data with current UUID=%016llX\n",
+		    (unsigned long long)mdev->ed_uuid);
+		drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
+		return false;
+	}
+
+	if (get_ldev(mdev)) {
+		int skip_initial_sync =
+			mdev->state.conn == C_CONNECTED &&
+			mdev->agreed_pro_version >= 90 &&
+			mdev->ldev->md.uuid[UI_CURRENT] == UUID_JUST_CREATED &&
+			(p_uuid[UI_FLAGS] & 8);
+		if (skip_initial_sync) {
+			dev_info(DEV, "Accepted new current UUID, preparing to skip initial sync\n");
+			drbd_bitmap_io(mdev, &drbd_bmio_clear_n_write,
+					"clear_n_write from receive_uuids",
+					BM_LOCKED_TEST_ALLOWED);
+			_drbd_uuid_set(mdev, UI_CURRENT, p_uuid[UI_CURRENT]);
+			_drbd_uuid_set(mdev, UI_BITMAP, 0);
+			_drbd_set_state(_NS2(mdev, disk, D_UP_TO_DATE, pdsk, D_UP_TO_DATE),
+					CS_VERBOSE, NULL);
+			drbd_md_sync(mdev);
+			updated_uuids = 1;
+		}
+		put_ldev(mdev);
+	} else if (mdev->state.disk < D_INCONSISTENT &&
+		   mdev->state.role == R_PRIMARY) {
+		/* I am a diskless primary, the peer just created a new current UUID
+		   for me. */
+		updated_uuids = drbd_set_ed_uuid(mdev, p_uuid[UI_CURRENT]);
+	}
+
+	/* Before we test for the disk state, we should wait until an eventually
+	   ongoing cluster wide state change is finished. That is important if
+	   we are primary and are detaching from our disk. We need to see the
+	   new disk state... */
+	wait_event(mdev->misc_wait, !test_bit(CLUSTER_ST_CHANGE, &mdev->flags));
+	if (mdev->state.conn >= C_CONNECTED && mdev->state.disk < D_INCONSISTENT)
+		updated_uuids |= drbd_set_ed_uuid(mdev, p_uuid[UI_CURRENT]);
+
+	if (updated_uuids)
+		drbd_print_uuids(mdev, "receiver updated UUIDs to");
+
+	return true;
+}
+
+/**
+ * convert_state() - Converts the peer's view of the cluster state to our point of view
+ * @ps:		The state as seen by the peer.
+ */
+static union drbd_state convert_state(union drbd_state ps)
+{
+	union drbd_state ms;
+
+	static enum drbd_conns c_tab[] = {
+		[C_CONNECTED] = C_CONNECTED,
+
+		[C_STARTING_SYNC_S] = C_STARTING_SYNC_T,
+		[C_STARTING_SYNC_T] = C_STARTING_SYNC_S,
+		[C_DISCONNECTING] = C_TEAR_DOWN, /* C_NETWORK_FAILURE, */
+		[C_VERIFY_S]       = C_VERIFY_T,
+		[C_MASK]   = C_MASK,
+	};
+
+	ms.i = ps.i;
+
+	ms.conn = c_tab[ps.conn];
+	ms.peer = ps.role;
+	ms.role = ps.peer;
+	ms.pdsk = ps.disk;
+	ms.disk = ps.pdsk;
+	ms.peer_isp = (ps.aftr_isp | ps.user_isp);
+
+	return ms;
+}
+
+static int receive_req_state(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size)
+{
+	struct p_req_state *p = &mdev->data.rbuf.req_state;
+	union drbd_state mask, val;
+	enum drbd_state_rv rv;
+
+	mask.i = be32_to_cpu(p->mask);
+	val.i = be32_to_cpu(p->val);
+
+	if (test_bit(DISCARD_CONCURRENT, &mdev->flags) &&
+	    test_bit(CLUSTER_ST_CHANGE, &mdev->flags)) {
+		drbd_send_sr_reply(mdev, SS_CONCURRENT_ST_CHG);
+		return true;
+	}
+
+	mask = convert_state(mask);
+	val = convert_state(val);
+
+	rv = drbd_change_state(mdev, CS_VERBOSE, mask, val);
+
+	drbd_send_sr_reply(mdev, rv);
+	drbd_md_sync(mdev);
+
+	return true;
+}
+
+static int receive_state(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size)
+{
+	struct p_state *p = &mdev->data.rbuf.state;
+	union drbd_state os, ns, peer_state;
+	enum drbd_disk_state real_peer_disk;
+	enum chg_state_flags cs_flags;
+	int rv;
+
+	peer_state.i = be32_to_cpu(p->state);
+
+	real_peer_disk = peer_state.disk;
+	if (peer_state.disk == D_NEGOTIATING) {
+		real_peer_disk = mdev->p_uuid[UI_FLAGS] & 4 ? D_INCONSISTENT : D_CONSISTENT;
+		dev_info(DEV, "real peer disk state = %s\n", drbd_disk_str(real_peer_disk));
+	}
+
+	spin_lock_irq(&mdev->req_lock);
+ retry:
+	os = ns = mdev->state;
+	spin_unlock_irq(&mdev->req_lock);
+
+	/* peer says his disk is uptodate, while we think it is inconsistent,
+	 * and this happens while we think we have a sync going on. */
+	if (os.pdsk == D_INCONSISTENT && real_peer_disk == D_UP_TO_DATE &&
+	    os.conn > C_CONNECTED && os.disk == D_UP_TO_DATE) {
+		/* If we are (becoming) SyncSource, but peer is still in sync
+		 * preparation, ignore its uptodate-ness to avoid flapping, it
+		 * will change to inconsistent once the peer reaches active
+		 * syncing states.
+		 * It may have changed syncer-paused flags, however, so we
+		 * cannot ignore this completely. */
+		if (peer_state.conn > C_CONNECTED &&
+		    peer_state.conn < C_SYNC_SOURCE)
+			real_peer_disk = D_INCONSISTENT;
+
+		/* if peer_state changes to connected at the same time,
+		 * it explicitly notifies us that it finished resync.
+		 * Maybe we should finish it up, too? */
+		else if (os.conn >= C_SYNC_SOURCE &&
+			 peer_state.conn == C_CONNECTED) {
+			if (drbd_bm_total_weight(mdev) <= mdev->rs_failed)
+				drbd_resync_finished(mdev);
+			return true;
+		}
+	}
+
+	/* peer says his disk is inconsistent, while we think it is uptodate,
+	 * and this happens while the peer still thinks we have a sync going on,
+	 * but we think we are already done with the sync.
+	 * We ignore this to avoid flapping pdsk.
+	 * This should not happen, if the peer is a recent version of drbd. */
+	if (os.pdsk == D_UP_TO_DATE && real_peer_disk == D_INCONSISTENT &&
+	    os.conn == C_CONNECTED && peer_state.conn > C_SYNC_SOURCE)
+		real_peer_disk = D_UP_TO_DATE;
+
+	if (ns.conn == C_WF_REPORT_PARAMS)
+		ns.conn = C_CONNECTED;
+
+	if (peer_state.conn == C_AHEAD)
+		ns.conn = C_BEHIND;
+
+	if (mdev->p_uuid && peer_state.disk >= D_NEGOTIATING &&
+	    get_ldev_if_state(mdev, D_NEGOTIATING)) {
+		int cr; /* consider resync */
+
+		/* if we established a new connection */
+		cr  = (os.conn < C_CONNECTED);
+		/* if we had an established connection
+		 * and one of the nodes newly attaches a disk */
+		cr |= (os.conn == C_CONNECTED &&
+		       (peer_state.disk == D_NEGOTIATING ||
+			os.disk == D_NEGOTIATING));
+		/* if we have both been inconsistent, and the peer has been
+		 * forced to be UpToDate with --overwrite-data */
+		cr |= test_bit(CONSIDER_RESYNC, &mdev->flags);
+		/* if we had been plain connected, and the admin requested to
+		 * start a sync by "invalidate" or "invalidate-remote" */
+		cr |= (os.conn == C_CONNECTED &&
+				(peer_state.conn >= C_STARTING_SYNC_S &&
+				 peer_state.conn <= C_WF_BITMAP_T));
+
+		if (cr)
+			ns.conn = drbd_sync_handshake(mdev, peer_state.role, real_peer_disk);
+
+		put_ldev(mdev);
+		if (ns.conn == C_MASK) {
+			ns.conn = C_CONNECTED;
+			if (mdev->state.disk == D_NEGOTIATING) {
+				drbd_force_state(mdev, NS(disk, D_FAILED));
+			} else if (peer_state.disk == D_NEGOTIATING) {
+				dev_err(DEV, "Disk attach process on the peer node was aborted.\n");
+				peer_state.disk = D_DISKLESS;
+				real_peer_disk = D_DISKLESS;
+			} else {
+				if (test_and_clear_bit(CONN_DRY_RUN, &mdev->flags))
+					return false;
+				D_ASSERT(os.conn == C_WF_REPORT_PARAMS);
+				drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
+				return false;
+			}
+		}
+	}
+
+	spin_lock_irq(&mdev->req_lock);
+	if (mdev->state.i != os.i)
+		goto retry;
+	clear_bit(CONSIDER_RESYNC, &mdev->flags);
+	ns.peer = peer_state.role;
+	ns.pdsk = real_peer_disk;
+	ns.peer_isp = (peer_state.aftr_isp | peer_state.user_isp);
+	if ((ns.conn == C_CONNECTED || ns.conn == C_WF_BITMAP_S) && ns.disk == D_NEGOTIATING)
+		ns.disk = mdev->new_state_tmp.disk;
+	cs_flags = CS_VERBOSE + (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED ? 0 : CS_HARD);
+	if (ns.pdsk == D_CONSISTENT && is_susp(ns) && ns.conn == C_CONNECTED && os.conn < C_CONNECTED &&
+	    test_bit(NEW_CUR_UUID, &mdev->flags)) {
+		/* Do not allow tl_restart(resend) for a rebooted peer. We can only allow this
+		   for temporal network outages! */
+		spin_unlock_irq(&mdev->req_lock);
+		dev_err(DEV, "Aborting Connect, can not thaw IO with an only Consistent peer\n");
+		tl_clear(mdev);
+		drbd_uuid_new_current(mdev);
+		clear_bit(NEW_CUR_UUID, &mdev->flags);
+		drbd_force_state(mdev, NS2(conn, C_PROTOCOL_ERROR, susp, 0));
+		return false;
+	}
+	rv = _drbd_set_state(mdev, ns, cs_flags, NULL);
+	ns = mdev->state;
+	spin_unlock_irq(&mdev->req_lock);
+
+	if (rv < SS_SUCCESS) {
+		drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
+		return false;
+	}
+
+	if (os.conn > C_WF_REPORT_PARAMS) {
+		if (ns.conn > C_CONNECTED && peer_state.conn <= C_CONNECTED &&
+		    peer_state.disk != D_NEGOTIATING ) {
+			/* we want resync, peer has not yet decided to sync... */
+			/* Nowadays only used when forcing a node into primary role and
+			   setting its disk to UpToDate with that */
+			drbd_send_uuids(mdev);
+			drbd_send_state(mdev);
+		}
+	}
+
+	mdev->net_conf->want_lose = 0;
+
+	drbd_md_sync(mdev); /* update connected indicator, la_size, ... */
+
+	return true;
+}
+
+static int receive_sync_uuid(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size)
+{
+	struct p_rs_uuid *p = &mdev->data.rbuf.rs_uuid;
+
+	wait_event(mdev->misc_wait,
+		   mdev->state.conn == C_WF_SYNC_UUID ||
+		   mdev->state.conn == C_BEHIND ||
+		   mdev->state.conn < C_CONNECTED ||
+		   mdev->state.disk < D_NEGOTIATING);
+
+	/* D_ASSERT( mdev->state.conn == C_WF_SYNC_UUID ); */
+
+	/* Here the _drbd_uuid_ functions are right, current should
+	   _not_ be rotated into the history */
+	if (get_ldev_if_state(mdev, D_NEGOTIATING)) {
+		_drbd_uuid_set(mdev, UI_CURRENT, be64_to_cpu(p->uuid));
+		_drbd_uuid_set(mdev, UI_BITMAP, 0UL);
+
+		drbd_print_uuids(mdev, "updated sync uuid");
+		drbd_start_resync(mdev, C_SYNC_TARGET);
+
+		put_ldev(mdev);
+	} else
+		dev_err(DEV, "Ignoring SyncUUID packet!\n");
+
+	return true;
+}
+
+/**
+ * receive_bitmap_plain
+ *
+ * Return 0 when done, 1 when another iteration is needed, and a negative error
+ * code upon failure.
+ */
+static int
+receive_bitmap_plain(struct drbd_conf *mdev, unsigned int data_size,
+		     unsigned long *buffer, struct bm_xfer_ctx *c)
+{
+	unsigned num_words = min_t(size_t, BM_PACKET_WORDS, c->bm_words - c->word_offset);
+	unsigned want = num_words * sizeof(long);
+	int err;
+
+	if (want != data_size) {
+		dev_err(DEV, "%s:want (%u) != data_size (%u)\n", __func__, want, data_size);
+		return -EIO;
+	}
+	if (want == 0)
+		return 0;
+	err = drbd_recv(mdev, buffer, want);
+	if (err != want) {
+		if (err >= 0)
+			err = -EIO;
+		return err;
+	}
+
+	drbd_bm_merge_lel(mdev, c->word_offset, num_words, buffer);
+
+	c->word_offset += num_words;
+	c->bit_offset = c->word_offset * BITS_PER_LONG;
+	if (c->bit_offset > c->bm_bits)
+		c->bit_offset = c->bm_bits;
+
+	return 1;
+}
+
+/**
+ * recv_bm_rle_bits
+ *
+ * Return 0 when done, 1 when another iteration is needed, and a negative error
+ * code upon failure.
+ */
+static int
+recv_bm_rle_bits(struct drbd_conf *mdev,
+		struct p_compressed_bm *p,
+		struct bm_xfer_ctx *c)
+{
+	struct bitstream bs;
+	u64 look_ahead;
+	u64 rl;
+	u64 tmp;
+	unsigned long s = c->bit_offset;
+	unsigned long e;
+	int len = be16_to_cpu(p->head.length) - (sizeof(*p) - sizeof(p->head));
+	int toggle = DCBP_get_start(p);
+	int have;
+	int bits;
+
+	bitstream_init(&bs, p->code, len, DCBP_get_pad_bits(p));
+
+	bits = bitstream_get_bits(&bs, &look_ahead, 64);
+	if (bits < 0)
+		return -EIO;
+
+	for (have = bits; have > 0; s += rl, toggle = !toggle) {
+		bits = vli_decode_bits(&rl, look_ahead);
+		if (bits <= 0)
+			return -EIO;
+
+		if (toggle) {
+			e = s + rl -1;
+			if (e >= c->bm_bits) {
+				dev_err(DEV, "bitmap overflow (e:%lu) while decoding bm RLE packet\n", e);
+				return -EIO;
+			}
+			_drbd_bm_set_bits(mdev, s, e);
+		}
+
+		if (have < bits) {
+			dev_err(DEV, "bitmap decoding error: h:%d b:%d la:0x%08llx l:%u/%u\n",
+				have, bits, look_ahead,
+				(unsigned int)(bs.cur.b - p->code),
+				(unsigned int)bs.buf_len);
+			return -EIO;
+		}
+		look_ahead >>= bits;
+		have -= bits;
+
+		bits = bitstream_get_bits(&bs, &tmp, 64 - have);
+		if (bits < 0)
+			return -EIO;
+		look_ahead |= tmp << have;
+		have += bits;
+	}
+
+	c->bit_offset = s;
+	bm_xfer_ctx_bit_to_word_offset(c);
+
+	return (s != c->bm_bits);
+}
+
+/**
+ * decode_bitmap_c
+ *
+ * Return 0 when done, 1 when another iteration is needed, and a negative error
+ * code upon failure.
+ */
+static int
+decode_bitmap_c(struct drbd_conf *mdev,
+		struct p_compressed_bm *p,
+		struct bm_xfer_ctx *c)
+{
+	if (DCBP_get_code(p) == RLE_VLI_Bits)
+		return recv_bm_rle_bits(mdev, p, c);
+
+	/* other variants had been implemented for evaluation,
+	 * but have been dropped as this one turned out to be "best"
+	 * during all our tests. */
+
+	dev_err(DEV, "receive_bitmap_c: unknown encoding %u\n", p->encoding);
+	drbd_force_state(mdev, NS(conn, C_PROTOCOL_ERROR));
+	return -EIO;
+}
+
+void INFO_bm_xfer_stats(struct drbd_conf *mdev,
+		const char *direction, struct bm_xfer_ctx *c)
+{
+	/* what would it take to transfer it "plaintext" */
+	unsigned plain = sizeof(struct p_header80) *
+		((c->bm_words+BM_PACKET_WORDS-1)/BM_PACKET_WORDS+1)
+		+ c->bm_words * sizeof(long);
+	unsigned total = c->bytes[0] + c->bytes[1];
+	unsigned r;
+
+	/* total can not be zero. but just in case: */
+	if (total == 0)
+		return;
+
+	/* don't report if not compressed */
+	if (total >= plain)
+		return;
+
+	/* total < plain. check for overflow, still */
+	r = (total > UINT_MAX/1000) ? (total / (plain/1000))
+		                    : (1000 * total / plain);
+
+	if (r > 1000)
+		r = 1000;
+
+	r = 1000 - r;
+	dev_info(DEV, "%s bitmap stats [Bytes(packets)]: plain %u(%u), RLE %u(%u), "
+	     "total %u; compression: %u.%u%%\n",
+			direction,
+			c->bytes[1], c->packets[1],
+			c->bytes[0], c->packets[0],
+			total, r/10, r % 10);
+}
+
+/* Since we are processing the bitfield from lower addresses to higher,
+   it does not matter if the process it in 32 bit chunks or 64 bit
+   chunks as long as it is little endian. (Understand it as byte stream,
+   beginning with the lowest byte...) If we would use big endian
+   we would need to process it from the highest address to the lowest,
+   in order to be agnostic to the 32 vs 64 bits issue.
+
+   returns 0 on failure, 1 if we successfully received it. */
+static int receive_bitmap(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size)
+{
+	struct bm_xfer_ctx c;
+	void *buffer;
+	int err;
+	int ok = false;
+	struct p_header80 *h = &mdev->data.rbuf.header.h80;
+
+	drbd_bm_lock(mdev, "receive bitmap", BM_LOCKED_SET_ALLOWED);
+	/* you are supposed to send additional out-of-sync information
+	 * if you actually set bits during this phase */
+
+	/* maybe we should use some per thread scratch page,
+	 * and allocate that during initial device creation? */
+	buffer	 = (unsigned long *) __get_free_page(GFP_NOIO);
+	if (!buffer) {
+		dev_err(DEV, "failed to allocate one page buffer in %s\n", __func__);
+		goto out;
+	}
+
+	c = (struct bm_xfer_ctx) {
+		.bm_bits = drbd_bm_bits(mdev),
+		.bm_words = drbd_bm_words(mdev),
+	};
+
+	for(;;) {
+		if (cmd == P_BITMAP) {
+			err = receive_bitmap_plain(mdev, data_size, buffer, &c);
+		} else if (cmd == P_COMPRESSED_BITMAP) {
+			/* MAYBE: sanity check that we speak proto >= 90,
+			 * and the feature is enabled! */
+			struct p_compressed_bm *p;
+
+			if (data_size > BM_PACKET_PAYLOAD_BYTES) {
+				dev_err(DEV, "ReportCBitmap packet too large\n");
+				goto out;
+			}
+			/* use the page buff */
+			p = buffer;
+			memcpy(p, h, sizeof(*h));
+			if (drbd_recv(mdev, p->head.payload, data_size) != data_size)
+				goto out;
+			if (data_size <= (sizeof(*p) - sizeof(p->head))) {
+				dev_err(DEV, "ReportCBitmap packet too small (l:%u)\n", data_size);
+				goto out;
+			}
+			err = decode_bitmap_c(mdev, p, &c);
+		} else {
+			dev_warn(DEV, "receive_bitmap: cmd neither ReportBitMap nor ReportCBitMap (is 0x%x)", cmd);
+			goto out;
+		}
+
+		c.packets[cmd == P_BITMAP]++;
+		c.bytes[cmd == P_BITMAP] += sizeof(struct p_header80) + data_size;
+
+		if (err <= 0) {
+			if (err < 0)
+				goto out;
+			break;
+		}
+		if (!drbd_recv_header(mdev, &cmd, &data_size))
+			goto out;
+	}
+
+	INFO_bm_xfer_stats(mdev, "receive", &c);
+
+	if (mdev->state.conn == C_WF_BITMAP_T) {
+		enum drbd_state_rv rv;
+
+		ok = !drbd_send_bitmap(mdev);
+		if (!ok)
+			goto out;
+		/* Omit CS_ORDERED with this state transition to avoid deadlocks. */
+		rv = _drbd_request_state(mdev, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE);
+		D_ASSERT(rv == SS_SUCCESS);
+	} else if (mdev->state.conn != C_WF_BITMAP_S) {
+		/* admin may have requested C_DISCONNECTING,
+		 * other threads may have noticed network errors */
+		dev_info(DEV, "unexpected cstate (%s) in receive_bitmap\n",
+		    drbd_conn_str(mdev->state.conn));
+	}
+
+	ok = true;
+ out:
+	drbd_bm_unlock(mdev);
+	if (ok && mdev->state.conn == C_WF_BITMAP_S)
+		drbd_start_resync(mdev, C_SYNC_SOURCE);
+	free_page((unsigned long) buffer);
+	return ok;
+}
+
+static int receive_skip(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size)
+{
+	/* TODO zero copy sink :) */
+	static char sink[128];
+	int size, want, r;
+
+	dev_warn(DEV, "skipping unknown optional packet type %d, l: %d!\n",
+		 cmd, data_size);
+
+	size = data_size;
+	while (size > 0) {
+		want = min_t(int, size, sizeof(sink));
+		r = drbd_recv(mdev, sink, want);
+		ERR_IF(r <= 0) break;
+		size -= r;
+	}
+	return size == 0;
+}
+
+static int receive_UnplugRemote(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size)
+{
+	/* Make sure we've acked all the TCP data associated
+	 * with the data requests being unplugged */
+	drbd_tcp_quickack(mdev->data.socket);
+
+	return true;
+}
+
+static int receive_out_of_sync(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size)
+{
+	struct p_block_desc *p = &mdev->data.rbuf.block_desc;
+
+	switch (mdev->state.conn) {
+	case C_WF_SYNC_UUID:
+	case C_WF_BITMAP_T:
+	case C_BEHIND:
+			break;
+	default:
+		dev_err(DEV, "ASSERT FAILED cstate = %s, expected: WFSyncUUID|WFBitMapT|Behind\n",
+				drbd_conn_str(mdev->state.conn));
+	}
+
+	drbd_set_out_of_sync(mdev, be64_to_cpu(p->sector), be32_to_cpu(p->blksize));
+
+	return true;
+}
+
+typedef int (*drbd_cmd_handler_f)(struct drbd_conf *, enum drbd_packets cmd, unsigned int to_receive);
+
+struct data_cmd {
+	int expect_payload;
+	size_t pkt_size;
+	drbd_cmd_handler_f function;
+};
+
+static struct data_cmd drbd_cmd_handler[] = {
+	[P_DATA]	    = { 1, sizeof(struct p_data), receive_Data },
+	[P_DATA_REPLY]	    = { 1, sizeof(struct p_data), receive_DataReply },
+	[P_RS_DATA_REPLY]   = { 1, sizeof(struct p_data), receive_RSDataReply } ,
+	[P_BARRIER]	    = { 0, sizeof(struct p_barrier), receive_Barrier } ,
+	[P_BITMAP]	    = { 1, sizeof(struct p_header80), receive_bitmap } ,
+	[P_COMPRESSED_BITMAP] = { 1, sizeof(struct p_header80), receive_bitmap } ,
+	[P_UNPLUG_REMOTE]   = { 0, sizeof(struct p_header80), receive_UnplugRemote },
+	[P_DATA_REQUEST]    = { 0, sizeof(struct p_block_req), receive_DataRequest },
+	[P_RS_DATA_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
+	[P_SYNC_PARAM]	    = { 1, sizeof(struct p_header80), receive_SyncParam },
+	[P_SYNC_PARAM89]    = { 1, sizeof(struct p_header80), receive_SyncParam },
+	[P_PROTOCOL]        = { 1, sizeof(struct p_protocol), receive_protocol },
+	[P_UUIDS]	    = { 0, sizeof(struct p_uuids), receive_uuids },
+	[P_SIZES]	    = { 0, sizeof(struct p_sizes), receive_sizes },
+	[P_STATE]	    = { 0, sizeof(struct p_state), receive_state },
+	[P_STATE_CHG_REQ]   = { 0, sizeof(struct p_req_state), receive_req_state },
+	[P_SYNC_UUID]       = { 0, sizeof(struct p_rs_uuid), receive_sync_uuid },
+	[P_OV_REQUEST]      = { 0, sizeof(struct p_block_req), receive_DataRequest },
+	[P_OV_REPLY]        = { 1, sizeof(struct p_block_req), receive_DataRequest },
+	[P_CSUM_RS_REQUEST] = { 1, sizeof(struct p_block_req), receive_DataRequest },
+	[P_DELAY_PROBE]     = { 0, sizeof(struct p_delay_probe93), receive_skip },
+	[P_OUT_OF_SYNC]     = { 0, sizeof(struct p_block_desc), receive_out_of_sync },
+	/* anything missing from this table is in
+	 * the asender_tbl, see get_asender_cmd */
+	[P_MAX_CMD]	    = { 0, 0, NULL },
+};
+
+/* All handler functions that expect a sub-header get that sub-heder in
+   mdev->data.rbuf.header.head.payload.
+
+   Usually in mdev->data.rbuf.header.head the callback can find the usual
+   p_header, but they may not rely on that. Since there is also p_header95 !
+ */
+
+static void drbdd(struct drbd_conf *mdev)
+{
+	union p_header *header = &mdev->data.rbuf.header;
+	unsigned int packet_size;
+	enum drbd_packets cmd;
+	size_t shs; /* sub header size */
+	int rv;
+
+	while (get_t_state(&mdev->receiver) == Running) {
+		drbd_thread_current_set_cpu(mdev);
+		if (!drbd_recv_header(mdev, &cmd, &packet_size))
+			goto err_out;
+
+		if (unlikely(cmd >= P_MAX_CMD || !drbd_cmd_handler[cmd].function)) {
+			dev_err(DEV, "unknown packet type %d, l: %d!\n", cmd, packet_size);
+			goto err_out;
+		}
+
+		shs = drbd_cmd_handler[cmd].pkt_size - sizeof(union p_header);
+		if (packet_size - shs > 0 && !drbd_cmd_handler[cmd].expect_payload) {
+			dev_err(DEV, "No payload expected %s l:%d\n", cmdname(cmd), packet_size);
+			goto err_out;
+		}
+
+		if (shs) {
+			rv = drbd_recv(mdev, &header->h80.payload, shs);
+			if (unlikely(rv != shs)) {
+				if (!signal_pending(current))
+					dev_warn(DEV, "short read while reading sub header: rv=%d\n", rv);
+				goto err_out;
+			}
+		}
+
+		rv = drbd_cmd_handler[cmd].function(mdev, cmd, packet_size - shs);
+
+		if (unlikely(!rv)) {
+			dev_err(DEV, "error receiving %s, l: %d!\n",
+			    cmdname(cmd), packet_size);
+			goto err_out;
+		}
+	}
+
+	if (0) {
+	err_out:
+		drbd_force_state(mdev, NS(conn, C_PROTOCOL_ERROR));
+	}
+	/* If we leave here, we probably want to update at least the
+	 * "Connected" indicator on stable storage. Do so explicitly here. */
+	drbd_md_sync(mdev);
+}
+
+void drbd_flush_workqueue(struct drbd_conf *mdev)
+{
+	struct drbd_wq_barrier barr;
+
+	barr.w.cb = w_prev_work_done;
+	init_completion(&barr.done);
+	drbd_queue_work(&mdev->data.work, &barr.w);
+	wait_for_completion(&barr.done);
+}
+
+void drbd_free_tl_hash(struct drbd_conf *mdev)
+{
+	struct hlist_head *h;
+
+	spin_lock_irq(&mdev->req_lock);
+
+	if (!mdev->tl_hash || mdev->state.conn != C_STANDALONE) {
+		spin_unlock_irq(&mdev->req_lock);
+		return;
+	}
+	/* paranoia code */
+	for (h = mdev->ee_hash; h < mdev->ee_hash + mdev->ee_hash_s; h++)
+		if (h->first)
+			dev_err(DEV, "ASSERT FAILED ee_hash[%u].first == %p, expected NULL\n",
+				(int)(h - mdev->ee_hash), h->first);
+	kfree(mdev->ee_hash);
+	mdev->ee_hash = NULL;
+	mdev->ee_hash_s = 0;
+
+	/* paranoia code */
+	for (h = mdev->tl_hash; h < mdev->tl_hash + mdev->tl_hash_s; h++)
+		if (h->first)
+			dev_err(DEV, "ASSERT FAILED tl_hash[%u] == %p, expected NULL\n",
+				(int)(h - mdev->tl_hash), h->first);
+	kfree(mdev->tl_hash);
+	mdev->tl_hash = NULL;
+	mdev->tl_hash_s = 0;
+	spin_unlock_irq(&mdev->req_lock);
+}
+
+static void drbd_disconnect(struct drbd_conf *mdev)
+{
+	enum drbd_fencing_p fp;
+	union drbd_state os, ns;
+	int rv = SS_UNKNOWN_ERROR;
+	unsigned int i;
+
+	if (mdev->state.conn == C_STANDALONE)
+		return;
+
+	/* asender does not clean up anything. it must not interfere, either */
+	drbd_thread_stop(&mdev->asender);
+	drbd_free_sock(mdev);
+
+	/* wait for current activity to cease. */
+	spin_lock_irq(&mdev->req_lock);
+	_drbd_wait_ee_list_empty(mdev, &mdev->active_ee);
+	_drbd_wait_ee_list_empty(mdev, &mdev->sync_ee);
+	_drbd_wait_ee_list_empty(mdev, &mdev->read_ee);
+	spin_unlock_irq(&mdev->req_lock);
+
+	/* We do not have data structures that would allow us to
+	 * get the rs_pending_cnt down to 0 again.
+	 *  * On C_SYNC_TARGET we do not have any data structures describing
+	 *    the pending RSDataRequest's we have sent.
+	 *  * On C_SYNC_SOURCE there is no data structure that tracks
+	 *    the P_RS_DATA_REPLY blocks that we sent to the SyncTarget.
+	 *  And no, it is not the sum of the reference counts in the
+	 *  resync_LRU. The resync_LRU tracks the whole operation including
+	 *  the disk-IO, while the rs_pending_cnt only tracks the blocks
+	 *  on the fly. */
+	drbd_rs_cancel_all(mdev);
+	mdev->rs_total = 0;
+	mdev->rs_failed = 0;
+	atomic_set(&mdev->rs_pending_cnt, 0);
+	wake_up(&mdev->misc_wait);
+
+	del_timer(&mdev->request_timer);
+
+	/* make sure syncer is stopped and w_resume_next_sg queued */
+	del_timer_sync(&mdev->resync_timer);
+	resync_timer_fn((unsigned long)mdev);
+
+	/* wait for all w_e_end_data_req, w_e_end_rsdata_req, w_send_barrier,
+	 * w_make_resync_request etc. which may still be on the worker queue
+	 * to be "canceled" */
+	drbd_flush_workqueue(mdev);
+
+	/* This also does reclaim_net_ee().  If we do this too early, we might
+	 * miss some resync ee and pages.*/
+	drbd_process_done_ee(mdev);
+
+	kfree(mdev->p_uuid);
+	mdev->p_uuid = NULL;
+
+	if (!is_susp(mdev->state))
+		tl_clear(mdev);
+
+	dev_info(DEV, "Connection closed\n");
+
+	drbd_md_sync(mdev);
+
+	fp = FP_DONT_CARE;
+	if (get_ldev(mdev)) {
+		fp = mdev->ldev->dc.fencing;
+		put_ldev(mdev);
+	}
+
+	if (mdev->state.role == R_PRIMARY && fp >= FP_RESOURCE && mdev->state.pdsk >= D_UNKNOWN)
+		drbd_try_outdate_peer_async(mdev);
+
+	spin_lock_irq(&mdev->req_lock);
+	os = mdev->state;
+	if (os.conn >= C_UNCONNECTED) {
+		/* Do not restart in case we are C_DISCONNECTING */
+		ns = os;
+		ns.conn = C_UNCONNECTED;
+		rv = _drbd_set_state(mdev, ns, CS_VERBOSE, NULL);
+	}
+	spin_unlock_irq(&mdev->req_lock);
+
+	if (os.conn == C_DISCONNECTING) {
+		wait_event(mdev->net_cnt_wait, atomic_read(&mdev->net_cnt) == 0);
+
+		crypto_free_hash(mdev->cram_hmac_tfm);
+		mdev->cram_hmac_tfm = NULL;
+
+		kfree(mdev->net_conf);
+		mdev->net_conf = NULL;
+		drbd_request_state(mdev, NS(conn, C_STANDALONE));
+	}
+
+	/* serialize with bitmap writeout triggered by the state change,
+	 * if any. */
+	wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags));
+
+	/* tcp_close and release of sendpage pages can be deferred.  I don't
+	 * want to use SO_LINGER, because apparently it can be deferred for
+	 * more than 20 seconds (longest time I checked).
+	 *
+	 * Actually we don't care for exactly when the network stack does its
+	 * put_page(), but release our reference on these pages right here.
+	 */
+	i = drbd_release_ee(mdev, &mdev->net_ee);
+	if (i)
+		dev_info(DEV, "net_ee not empty, killed %u entries\n", i);
+	i = atomic_read(&mdev->pp_in_use_by_net);
+	if (i)
+		dev_info(DEV, "pp_in_use_by_net = %d, expected 0\n", i);
+	i = atomic_read(&mdev->pp_in_use);
+	if (i)
+		dev_info(DEV, "pp_in_use = %d, expected 0\n", i);
+
+	D_ASSERT(list_empty(&mdev->read_ee));
+	D_ASSERT(list_empty(&mdev->active_ee));
+	D_ASSERT(list_empty(&mdev->sync_ee));
+	D_ASSERT(list_empty(&mdev->done_ee));
+
+	/* ok, no more ee's on the fly, it is safe to reset the epoch_size */
+	atomic_set(&mdev->current_epoch->epoch_size, 0);
+	D_ASSERT(list_empty(&mdev->current_epoch->list));
+}
+
+/*
+ * We support PRO_VERSION_MIN to PRO_VERSION_MAX. The protocol version
+ * we can agree on is stored in agreed_pro_version.
+ *
+ * feature flags and the reserved array should be enough room for future
+ * enhancements of the handshake protocol, and possible plugins...
+ *
+ * for now, they are expected to be zero, but ignored.
+ */
+static int drbd_send_handshake(struct drbd_conf *mdev)
+{
+	/* ASSERT current == mdev->receiver ... */
+	struct p_handshake *p = &mdev->data.sbuf.handshake;
+	int ok;
+
+	if (mutex_lock_interruptible(&mdev->data.mutex)) {
+		dev_err(DEV, "interrupted during initial handshake\n");
+		return 0; /* interrupted. not ok. */
+	}
+
+	if (mdev->data.socket == NULL) {
+		mutex_unlock(&mdev->data.mutex);
+		return 0;
+	}
+
+	memset(p, 0, sizeof(*p));
+	p->protocol_min = cpu_to_be32(PRO_VERSION_MIN);
+	p->protocol_max = cpu_to_be32(PRO_VERSION_MAX);
+	ok = _drbd_send_cmd( mdev, mdev->data.socket, P_HAND_SHAKE,
+			     (struct p_header80 *)p, sizeof(*p), 0 );
+	mutex_unlock(&mdev->data.mutex);
+	return ok;
+}
+
+/*
+ * return values:
+ *   1 yes, we have a valid connection
+ *   0 oops, did not work out, please try again
+ *  -1 peer talks different language,
+ *     no point in trying again, please go standalone.
+ */
+static int drbd_do_handshake(struct drbd_conf *mdev)
+{
+	/* ASSERT current == mdev->receiver ... */
+	struct p_handshake *p = &mdev->data.rbuf.handshake;
+	const int expect = sizeof(struct p_handshake) - sizeof(struct p_header80);
+	unsigned int length;
+	enum drbd_packets cmd;
+	int rv;
+
+	rv = drbd_send_handshake(mdev);
+	if (!rv)
+		return 0;
+
+	rv = drbd_recv_header(mdev, &cmd, &length);
+	if (!rv)
+		return 0;
+
+	if (cmd != P_HAND_SHAKE) {
+		dev_err(DEV, "expected HandShake packet, received: %s (0x%04x)\n",
+		     cmdname(cmd), cmd);
+		return -1;
+	}
+
+	if (length != expect) {
+		dev_err(DEV, "expected HandShake length: %u, received: %u\n",
+		     expect, length);
+		return -1;
+	}
+
+	rv = drbd_recv(mdev, &p->head.payload, expect);
+
+	if (rv != expect) {
+		if (!signal_pending(current))
+			dev_warn(DEV, "short read receiving handshake packet: l=%u\n", rv);
+		return 0;
+	}
+
+	p->protocol_min = be32_to_cpu(p->protocol_min);
+	p->protocol_max = be32_to_cpu(p->protocol_max);
+	if (p->protocol_max == 0)
+		p->protocol_max = p->protocol_min;
+
+	if (PRO_VERSION_MAX < p->protocol_min ||
+	    PRO_VERSION_MIN > p->protocol_max)
+		goto incompat;
+
+	mdev->agreed_pro_version = min_t(int, PRO_VERSION_MAX, p->protocol_max);
+
+	dev_info(DEV, "Handshake successful: "
+	     "Agreed network protocol version %d\n", mdev->agreed_pro_version);
+
+	return 1;
+
+ incompat:
+	dev_err(DEV, "incompatible DRBD dialects: "
+	    "I support %d-%d, peer supports %d-%d\n",
+	    PRO_VERSION_MIN, PRO_VERSION_MAX,
+	    p->protocol_min, p->protocol_max);
+	return -1;
+}
+
+#if !defined(CONFIG_CRYPTO_HMAC) && !defined(CONFIG_CRYPTO_HMAC_MODULE)
+static int drbd_do_auth(struct drbd_conf *mdev)
+{
+	dev_err(DEV, "This kernel was build without CONFIG_CRYPTO_HMAC.\n");
+	dev_err(DEV, "You need to disable 'cram-hmac-alg' in drbd.conf.\n");
+	return -1;
+}
+#else
+#define CHALLENGE_LEN 64
+
+/* Return value:
+	1 - auth succeeded,
+	0 - failed, try again (network error),
+	-1 - auth failed, don't try again.
+*/
+
+static int drbd_do_auth(struct drbd_conf *mdev)
+{
+	char my_challenge[CHALLENGE_LEN];  /* 64 Bytes... */
+	struct scatterlist sg;
+	char *response = NULL;
+	char *right_response = NULL;
+	char *peers_ch = NULL;
+	unsigned int key_len = strlen(mdev->net_conf->shared_secret);
+	unsigned int resp_size;
+	struct hash_desc desc;
+	enum drbd_packets cmd;
+	unsigned int length;
+	int rv;
+
+	desc.tfm = mdev->cram_hmac_tfm;
+	desc.flags = 0;
+
+	rv = crypto_hash_setkey(mdev->cram_hmac_tfm,
+				(u8 *)mdev->net_conf->shared_secret, key_len);
+	if (rv) {
+		dev_err(DEV, "crypto_hash_setkey() failed with %d\n", rv);
+		rv = -1;
+		goto fail;
+	}
+
+	get_random_bytes(my_challenge, CHALLENGE_LEN);
+
+	rv = drbd_send_cmd2(mdev, P_AUTH_CHALLENGE, my_challenge, CHALLENGE_LEN);
+	if (!rv)
+		goto fail;
+
+	rv = drbd_recv_header(mdev, &cmd, &length);
+	if (!rv)
+		goto fail;
+
+	if (cmd != P_AUTH_CHALLENGE) {
+		dev_err(DEV, "expected AuthChallenge packet, received: %s (0x%04x)\n",
+		    cmdname(cmd), cmd);
+		rv = 0;
+		goto fail;
+	}
+
+	if (length > CHALLENGE_LEN * 2) {
+		dev_err(DEV, "expected AuthChallenge payload too big.\n");
+		rv = -1;
+		goto fail;
+	}
+
+	peers_ch = kmalloc(length, GFP_NOIO);
+	if (peers_ch == NULL) {
+		dev_err(DEV, "kmalloc of peers_ch failed\n");
+		rv = -1;
+		goto fail;
+	}
+
+	rv = drbd_recv(mdev, peers_ch, length);
+
+	if (rv != length) {
+		if (!signal_pending(current))
+			dev_warn(DEV, "short read AuthChallenge: l=%u\n", rv);
+		rv = 0;
+		goto fail;
+	}
+
+	resp_size = crypto_hash_digestsize(mdev->cram_hmac_tfm);
+	response = kmalloc(resp_size, GFP_NOIO);
+	if (response == NULL) {
+		dev_err(DEV, "kmalloc of response failed\n");
+		rv = -1;
+		goto fail;
+	}
+
+	sg_init_table(&sg, 1);
+	sg_set_buf(&sg, peers_ch, length);
+
+	rv = crypto_hash_digest(&desc, &sg, sg.length, response);
+	if (rv) {
+		dev_err(DEV, "crypto_hash_digest() failed with %d\n", rv);
+		rv = -1;
+		goto fail;
+	}
+
+	rv = drbd_send_cmd2(mdev, P_AUTH_RESPONSE, response, resp_size);
+	if (!rv)
+		goto fail;
+
+	rv = drbd_recv_header(mdev, &cmd, &length);
+	if (!rv)
+		goto fail;
+
+	if (cmd != P_AUTH_RESPONSE) {
+		dev_err(DEV, "expected AuthResponse packet, received: %s (0x%04x)\n",
+			cmdname(cmd), cmd);
+		rv = 0;
+		goto fail;
+	}
+
+	if (length != resp_size) {
+		dev_err(DEV, "expected AuthResponse payload of wrong size\n");
+		rv = 0;
+		goto fail;
+	}
+
+	rv = drbd_recv(mdev, response , resp_size);
+
+	if (rv != resp_size) {
+		if (!signal_pending(current))
+			dev_warn(DEV, "short read receiving AuthResponse: l=%u\n", rv);
+		rv = 0;
+		goto fail;
+	}
+
+	right_response = kmalloc(resp_size, GFP_NOIO);
+	if (right_response == NULL) {
+		dev_err(DEV, "kmalloc of right_response failed\n");
+		rv = -1;
+		goto fail;
+	}
+
+	sg_set_buf(&sg, my_challenge, CHALLENGE_LEN);
+
+	rv = crypto_hash_digest(&desc, &sg, sg.length, right_response);
+	if (rv) {
+		dev_err(DEV, "crypto_hash_digest() failed with %d\n", rv);
+		rv = -1;
+		goto fail;
+	}
+
+	rv = !memcmp(response, right_response, resp_size);
+
+	if (rv)
+		dev_info(DEV, "Peer authenticated using %d bytes of '%s' HMAC\n",
+		     resp_size, mdev->net_conf->cram_hmac_alg);
+	else
+		rv = -1;
+
+ fail:
+	kfree(peers_ch);
+	kfree(response);
+	kfree(right_response);
+
+	return rv;
+}
+#endif
+
+int drbdd_init(struct drbd_thread *thi)
+{
+	struct drbd_conf *mdev = thi->mdev;
+	unsigned int minor = mdev_to_minor(mdev);
+	int h;
+
+	sprintf(current->comm, "drbd%d_receiver", minor);
+
+	dev_info(DEV, "receiver (re)started\n");
+
+	do {
+		h = drbd_connect(mdev);
+		if (h == 0) {
+			drbd_disconnect(mdev);
+			schedule_timeout_interruptible(HZ);
+		}
+		if (h == -1) {
+			dev_warn(DEV, "Discarding network configuration.\n");
+			drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
+		}
+	} while (h == 0);
+
+	if (h > 0) {
+		if (get_net_conf(mdev)) {
+			drbdd(mdev);
+			put_net_conf(mdev);
+		}
+	}
+
+	drbd_disconnect(mdev);
+
+	dev_info(DEV, "receiver terminated\n");
+	return 0;
+}
+
+/* ********* acknowledge sender ******** */
+
+static int got_RqSReply(struct drbd_conf *mdev, struct p_header80 *h)
+{
+	struct p_req_state_reply *p = (struct p_req_state_reply *)h;
+
+	int retcode = be32_to_cpu(p->retcode);
+
+	if (retcode >= SS_SUCCESS) {
+		set_bit(CL_ST_CHG_SUCCESS, &mdev->flags);
+	} else {
+		set_bit(CL_ST_CHG_FAIL, &mdev->flags);
+		dev_err(DEV, "Requested state change failed by peer: %s (%d)\n",
+		    drbd_set_st_err_str(retcode), retcode);
+	}
+	wake_up(&mdev->state_wait);
+
+	return true;
+}
+
+static int got_Ping(struct drbd_conf *mdev, struct p_header80 *h)
+{
+	return drbd_send_ping_ack(mdev);
+
+}
+
+static int got_PingAck(struct drbd_conf *mdev, struct p_header80 *h)
+{
+	/* restore idle timeout */
+	mdev->meta.socket->sk->sk_rcvtimeo = mdev->net_conf->ping_int*HZ;
+	if (!test_and_set_bit(GOT_PING_ACK, &mdev->flags))
+		wake_up(&mdev->misc_wait);
+
+	return true;
+}
+
+static int got_IsInSync(struct drbd_conf *mdev, struct p_header80 *h)
+{
+	struct p_block_ack *p = (struct p_block_ack *)h;
+	sector_t sector = be64_to_cpu(p->sector);
+	int blksize = be32_to_cpu(p->blksize);
+
+	D_ASSERT(mdev->agreed_pro_version >= 89);
+
+	update_peer_seq(mdev, be32_to_cpu(p->seq_num));
+
+	if (get_ldev(mdev)) {
+		drbd_rs_complete_io(mdev, sector);
+		drbd_set_in_sync(mdev, sector, blksize);
+		/* rs_same_csums is supposed to count in units of BM_BLOCK_SIZE */
+		mdev->rs_same_csum += (blksize >> BM_BLOCK_SHIFT);
+		put_ldev(mdev);
+	}
+	dec_rs_pending(mdev);
+	atomic_add(blksize >> 9, &mdev->rs_sect_in);
+
+	return true;
+}
+
+/* when we receive the ACK for a write request,
+ * verify that we actually know about it */
+static struct drbd_request *_ack_id_to_req(struct drbd_conf *mdev,
+	u64 id, sector_t sector)
+{
+	struct hlist_head *slot = tl_hash_slot(mdev, sector);
+	struct hlist_node *n;
+	struct drbd_request *req;
+
+	hlist_for_each_entry(req, n, slot, collision) {
+		if ((unsigned long)req == (unsigned long)id) {
+			if (req->sector != sector) {
+				dev_err(DEV, "_ack_id_to_req: found req %p but it has "
+				    "wrong sector (%llus versus %llus)\n", req,
+				    (unsigned long long)req->sector,
+				    (unsigned long long)sector);
+				break;
+			}
+			return req;
+		}
+	}
+	return NULL;
+}
+
+typedef struct drbd_request *(req_validator_fn)
+	(struct drbd_conf *mdev, u64 id, sector_t sector);
+
+static int validate_req_change_req_state(struct drbd_conf *mdev,
+	u64 id, sector_t sector, req_validator_fn validator,
+	const char *func, enum drbd_req_event what)
+{
+	struct drbd_request *req;
+	struct bio_and_error m;
+
+	spin_lock_irq(&mdev->req_lock);
+	req = validator(mdev, id, sector);
+	if (unlikely(!req)) {
+		spin_unlock_irq(&mdev->req_lock);
+
+		dev_err(DEV, "%s: failed to find req %p, sector %llus\n", func,
+			(void *)(unsigned long)id, (unsigned long long)sector);
+		return false;
+	}
+	__req_mod(req, what, &m);
+	spin_unlock_irq(&mdev->req_lock);
+
+	if (m.bio)
+		complete_master_bio(mdev, &m);
+	return true;
+}
+
+static int got_BlockAck(struct drbd_conf *mdev, struct p_header80 *h)
+{
+	struct p_block_ack *p = (struct p_block_ack *)h;
+	sector_t sector = be64_to_cpu(p->sector);
+	int blksize = be32_to_cpu(p->blksize);
+	enum drbd_req_event what;
+
+	update_peer_seq(mdev, be32_to_cpu(p->seq_num));
+
+	if (is_syncer_block_id(p->block_id)) {
+		drbd_set_in_sync(mdev, sector, blksize);
+		dec_rs_pending(mdev);
+		return true;
+	}
+	switch (be16_to_cpu(h->command)) {
+	case P_RS_WRITE_ACK:
+		D_ASSERT(mdev->net_conf->wire_protocol == DRBD_PROT_C);
+		what = write_acked_by_peer_and_sis;
+		break;
+	case P_WRITE_ACK:
+		D_ASSERT(mdev->net_conf->wire_protocol == DRBD_PROT_C);
+		what = write_acked_by_peer;
+		break;
+	case P_RECV_ACK:
+		D_ASSERT(mdev->net_conf->wire_protocol == DRBD_PROT_B);
+		what = recv_acked_by_peer;
+		break;
+	case P_DISCARD_ACK:
+		D_ASSERT(mdev->net_conf->wire_protocol == DRBD_PROT_C);
+		what = conflict_discarded_by_peer;
+		break;
+	default:
+		D_ASSERT(0);
+		return false;
+	}
+
+	return validate_req_change_req_state(mdev, p->block_id, sector,
+		_ack_id_to_req, __func__ , what);
+}
+
+static int got_NegAck(struct drbd_conf *mdev, struct p_header80 *h)
+{
+	struct p_block_ack *p = (struct p_block_ack *)h;
+	sector_t sector = be64_to_cpu(p->sector);
+	int size = be32_to_cpu(p->blksize);
+	struct drbd_request *req;
+	struct bio_and_error m;
+
+	update_peer_seq(mdev, be32_to_cpu(p->seq_num));
+
+	if (is_syncer_block_id(p->block_id)) {
+		dec_rs_pending(mdev);
+		drbd_rs_failed_io(mdev, sector, size);
+		return true;
+	}
+
+	spin_lock_irq(&mdev->req_lock);
+	req = _ack_id_to_req(mdev, p->block_id, sector);
+	if (!req) {
+		spin_unlock_irq(&mdev->req_lock);
+		if (mdev->net_conf->wire_protocol == DRBD_PROT_A ||
+		    mdev->net_conf->wire_protocol == DRBD_PROT_B) {
+			/* Protocol A has no P_WRITE_ACKs, but has P_NEG_ACKs.
+			   The master bio might already be completed, therefore the
+			   request is no longer in the collision hash.
+			   => Do not try to validate block_id as request. */
+			/* In Protocol B we might already have got a P_RECV_ACK
+			   but then get a P_NEG_ACK after wards. */
+			drbd_set_out_of_sync(mdev, sector, size);
+			return true;
+		} else {
+			dev_err(DEV, "%s: failed to find req %p, sector %llus\n", __func__,
+				(void *)(unsigned long)p->block_id, (unsigned long long)sector);
+			return false;
+		}
+	}
+	__req_mod(req, neg_acked, &m);
+	spin_unlock_irq(&mdev->req_lock);
+
+	if (m.bio)
+		complete_master_bio(mdev, &m);
+	return true;
+}
+
+static int got_NegDReply(struct drbd_conf *mdev, struct p_header80 *h)
+{
+	struct p_block_ack *p = (struct p_block_ack *)h;
+	sector_t sector = be64_to_cpu(p->sector);
+
+	update_peer_seq(mdev, be32_to_cpu(p->seq_num));
+	dev_err(DEV, "Got NegDReply; Sector %llus, len %u; Fail original request.\n",
+	    (unsigned long long)sector, be32_to_cpu(p->blksize));
+
+	return validate_req_change_req_state(mdev, p->block_id, sector,
+		_ar_id_to_req, __func__ , neg_acked);
+}
+
+static int got_NegRSDReply(struct drbd_conf *mdev, struct p_header80 *h)
+{
+	sector_t sector;
+	int size;
+	struct p_block_ack *p = (struct p_block_ack *)h;
+
+	sector = be64_to_cpu(p->sector);
+	size = be32_to_cpu(p->blksize);
+
+	update_peer_seq(mdev, be32_to_cpu(p->seq_num));
+
+	dec_rs_pending(mdev);
+
+	if (get_ldev_if_state(mdev, D_FAILED)) {
+		drbd_rs_complete_io(mdev, sector);
+		switch (be16_to_cpu(h->command)) {
+		case P_NEG_RS_DREPLY:
+			drbd_rs_failed_io(mdev, sector, size);
+		case P_RS_CANCEL:
+			break;
+		default:
+			D_ASSERT(0);
+			put_ldev(mdev);
+			return false;
+		}
+		put_ldev(mdev);
+	}
+
+	return true;
+}
+
+static int got_BarrierAck(struct drbd_conf *mdev, struct p_header80 *h)
+{
+	struct p_barrier_ack *p = (struct p_barrier_ack *)h;
+
+	tl_release(mdev, p->barrier, be32_to_cpu(p->set_size));
+
+	if (mdev->state.conn == C_AHEAD &&
+	    atomic_read(&mdev->ap_in_flight) == 0 &&
+	    !test_and_set_bit(AHEAD_TO_SYNC_SOURCE, &mdev->current_epoch->flags)) {
+		mdev->start_resync_timer.expires = jiffies + HZ;
+		add_timer(&mdev->start_resync_timer);
+	}
+
+	return true;
+}
+
+static int got_OVResult(struct drbd_conf *mdev, struct p_header80 *h)
+{
+	struct p_block_ack *p = (struct p_block_ack *)h;
+	struct drbd_work *w;
+	sector_t sector;
+	int size;
+
+	sector = be64_to_cpu(p->sector);
+	size = be32_to_cpu(p->blksize);
+
+	update_peer_seq(mdev, be32_to_cpu(p->seq_num));
+
+	if (be64_to_cpu(p->block_id) == ID_OUT_OF_SYNC)
+		drbd_ov_oos_found(mdev, sector, size);
+	else
+		ov_oos_print(mdev);
+
+	if (!get_ldev(mdev))
+		return true;
+
+	drbd_rs_complete_io(mdev, sector);
+	dec_rs_pending(mdev);
+
+	--mdev->ov_left;
+
+	/* let's advance progress step marks only for every other megabyte */
+	if ((mdev->ov_left & 0x200) == 0x200)
+		drbd_advance_rs_marks(mdev, mdev->ov_left);
+
+	if (mdev->ov_left == 0) {
+		w = kmalloc(sizeof(*w), GFP_NOIO);
+		if (w) {
+			w->cb = w_ov_finished;
+			drbd_queue_work_front(&mdev->data.work, w);
+		} else {
+			dev_err(DEV, "kmalloc(w) failed.");
+			ov_oos_print(mdev);
+			drbd_resync_finished(mdev);
+		}
+	}
+	put_ldev(mdev);
+	return true;
+}
+
+static int got_skip(struct drbd_conf *mdev, struct p_header80 *h)
+{
+	return true;
+}
+
+struct asender_cmd {
+	size_t pkt_size;
+	int (*process)(struct drbd_conf *mdev, struct p_header80 *h);
+};
+
+static struct asender_cmd *get_asender_cmd(int cmd)
+{
+	static struct asender_cmd asender_tbl[] = {
+		/* anything missing from this table is in
+		 * the drbd_cmd_handler (drbd_default_handler) table,
+		 * see the beginning of drbdd() */
+	[P_PING]	    = { sizeof(struct p_header80), got_Ping },
+	[P_PING_ACK]	    = { sizeof(struct p_header80), got_PingAck },
+	[P_RECV_ACK]	    = { sizeof(struct p_block_ack), got_BlockAck },
+	[P_WRITE_ACK]	    = { sizeof(struct p_block_ack), got_BlockAck },
+	[P_RS_WRITE_ACK]    = { sizeof(struct p_block_ack), got_BlockAck },
+	[P_DISCARD_ACK]	    = { sizeof(struct p_block_ack), got_BlockAck },
+	[P_NEG_ACK]	    = { sizeof(struct p_block_ack), got_NegAck },
+	[P_NEG_DREPLY]	    = { sizeof(struct p_block_ack), got_NegDReply },
+	[P_NEG_RS_DREPLY]   = { sizeof(struct p_block_ack), got_NegRSDReply},
+	[P_OV_RESULT]	    = { sizeof(struct p_block_ack), got_OVResult },
+	[P_BARRIER_ACK]	    = { sizeof(struct p_barrier_ack), got_BarrierAck },
+	[P_STATE_CHG_REPLY] = { sizeof(struct p_req_state_reply), got_RqSReply },
+	[P_RS_IS_IN_SYNC]   = { sizeof(struct p_block_ack), got_IsInSync },
+	[P_DELAY_PROBE]     = { sizeof(struct p_delay_probe93), got_skip },
+	[P_RS_CANCEL]       = { sizeof(struct p_block_ack), got_NegRSDReply},
+	[P_MAX_CMD]	    = { 0, NULL },
+	};
+	if (cmd > P_MAX_CMD || asender_tbl[cmd].process == NULL)
+		return NULL;
+	return &asender_tbl[cmd];
+}
+
+int drbd_asender(struct drbd_thread *thi)
+{
+	struct drbd_conf *mdev = thi->mdev;
+	struct p_header80 *h = &mdev->meta.rbuf.header.h80;
+	struct asender_cmd *cmd = NULL;
+
+	int rv, len;
+	void *buf    = h;
+	int received = 0;
+	int expect   = sizeof(struct p_header80);
+	int empty;
+	int ping_timeout_active = 0;
+
+	sprintf(current->comm, "drbd%d_asender", mdev_to_minor(mdev));
+
+	current->policy = SCHED_RR;  /* Make this a realtime task! */
+	current->rt_priority = 2;    /* more important than all other tasks */
+
+	while (get_t_state(thi) == Running) {
+		drbd_thread_current_set_cpu(mdev);
+		if (test_and_clear_bit(SEND_PING, &mdev->flags)) {
+			ERR_IF(!drbd_send_ping(mdev)) goto reconnect;
+			mdev->meta.socket->sk->sk_rcvtimeo =
+				mdev->net_conf->ping_timeo*HZ/10;
+			ping_timeout_active = 1;
+		}
+
+		/* conditionally cork;
+		 * it may hurt latency if we cork without much to send */
+		if (!mdev->net_conf->no_cork &&
+			3 < atomic_read(&mdev->unacked_cnt))
+			drbd_tcp_cork(mdev->meta.socket);
+		while (1) {
+			clear_bit(SIGNAL_ASENDER, &mdev->flags);
+			flush_signals(current);
+			if (!drbd_process_done_ee(mdev))
+				goto reconnect;
+			/* to avoid race with newly queued ACKs */
+			set_bit(SIGNAL_ASENDER, &mdev->flags);
+			spin_lock_irq(&mdev->req_lock);
+			empty = list_empty(&mdev->done_ee);
+			spin_unlock_irq(&mdev->req_lock);
+			/* new ack may have been queued right here,
+			 * but then there is also a signal pending,
+			 * and we start over... */
+			if (empty)
+				break;
+		}
+		/* but unconditionally uncork unless disabled */
+		if (!mdev->net_conf->no_cork)
+			drbd_tcp_uncork(mdev->meta.socket);
+
+		/* short circuit, recv_msg would return EINTR anyways. */
+		if (signal_pending(current))
+			continue;
+
+		rv = drbd_recv_short(mdev, mdev->meta.socket,
+				     buf, expect-received, 0);
+		clear_bit(SIGNAL_ASENDER, &mdev->flags);
+
+		flush_signals(current);
+
+		/* Note:
+		 * -EINTR	 (on meta) we got a signal
+		 * -EAGAIN	 (on meta) rcvtimeo expired
+		 * -ECONNRESET	 other side closed the connection
+		 * -ERESTARTSYS  (on data) we got a signal
+		 * rv <  0	 other than above: unexpected error!
+		 * rv == expected: full header or command
+		 * rv <  expected: "woken" by signal during receive
+		 * rv == 0	 : "connection shut down by peer"
+		 */
+		if (likely(rv > 0)) {
+			received += rv;
+			buf	 += rv;
+		} else if (rv == 0) {
+			dev_err(DEV, "meta connection shut down by peer.\n");
+			goto reconnect;
+		} else if (rv == -EAGAIN) {
+			/* If the data socket received something meanwhile,
+			 * that is good enough: peer is still alive. */
+			if (time_after(mdev->last_received,
+				jiffies - mdev->meta.socket->sk->sk_rcvtimeo))
+				continue;
+			if (ping_timeout_active) {
+				dev_err(DEV, "PingAck did not arrive in time.\n");
+				goto reconnect;
+			}
+			set_bit(SEND_PING, &mdev->flags);
+			continue;
+		} else if (rv == -EINTR) {
+			continue;
+		} else {
+			dev_err(DEV, "sock_recvmsg returned %d\n", rv);
+			goto reconnect;
+		}
+
+		if (received == expect && cmd == NULL) {
+			if (unlikely(h->magic != BE_DRBD_MAGIC)) {
+				dev_err(DEV, "magic?? on meta m: 0x%08x c: %d l: %d\n",
+				    be32_to_cpu(h->magic),
+				    be16_to_cpu(h->command),
+				    be16_to_cpu(h->length));
+				goto reconnect;
+			}
+			cmd = get_asender_cmd(be16_to_cpu(h->command));
+			len = be16_to_cpu(h->length);
+			if (unlikely(cmd == NULL)) {
+				dev_err(DEV, "unknown command?? on meta m: 0x%08x c: %d l: %d\n",
+				    be32_to_cpu(h->magic),
+				    be16_to_cpu(h->command),
+				    be16_to_cpu(h->length));
+				goto disconnect;
+			}
+			expect = cmd->pkt_size;
+			ERR_IF(len != expect-sizeof(struct p_header80))
+				goto reconnect;
+		}
+		if (received == expect) {
+			mdev->last_received = jiffies;
+			D_ASSERT(cmd != NULL);
+			if (!cmd->process(mdev, h))
+				goto reconnect;
+
+			/* the idle_timeout (ping-int)
+			 * has been restored in got_PingAck() */
+			if (cmd == get_asender_cmd(P_PING_ACK))
+				ping_timeout_active = 0;
+
+			buf	 = h;
+			received = 0;
+			expect	 = sizeof(struct p_header80);
+			cmd	 = NULL;
+		}
+	}
+
+	if (0) {
+reconnect:
+		drbd_force_state(mdev, NS(conn, C_NETWORK_FAILURE));
+		drbd_md_sync(mdev);
+	}
+	if (0) {
+disconnect:
+		drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
+		drbd_md_sync(mdev);
+	}
+	clear_bit(SIGNAL_ASENDER, &mdev->flags);
+
+	D_ASSERT(mdev->state.conn < C_CONNECTED);
+	dev_info(DEV, "asender terminated\n");
+
+	return 0;
+}
diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c
new file mode 100644
index 00000000..3424d675
--- /dev/null
+++ b/drivers/block/drbd/drbd_req.c
@@ -0,0 +1,1231 @@
+/*
+   drbd_req.c
+
+   This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
+
+   Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
+   Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
+   Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
+
+   drbd is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2, or (at your option)
+   any later version.
+
+   drbd is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with drbd; see the file COPYING.  If not, write to
+   the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ */
+
+#include <linux/module.h>
+
+#include <linux/slab.h>
+#include <linux/drbd.h>
+#include "drbd_int.h"
+#include "drbd_req.h"
+
+
+/* Update disk stats at start of I/O request */
+static void _drbd_start_io_acct(struct drbd_conf *mdev, struct drbd_request *req, struct bio *bio)
+{
+	const int rw = bio_data_dir(bio);
+	int cpu;
+	cpu = part_stat_lock();
+	part_stat_inc(cpu, &mdev->vdisk->part0, ios[rw]);
+	part_stat_add(cpu, &mdev->vdisk->part0, sectors[rw], bio_sectors(bio));
+	part_inc_in_flight(&mdev->vdisk->part0, rw);
+	part_stat_unlock();
+}
+
+/* Update disk stats when completing request upwards */
+static void _drbd_end_io_acct(struct drbd_conf *mdev, struct drbd_request *req)
+{
+	int rw = bio_data_dir(req->master_bio);
+	unsigned long duration = jiffies - req->start_time;
+	int cpu;
+	cpu = part_stat_lock();
+	part_stat_add(cpu, &mdev->vdisk->part0, ticks[rw], duration);
+	part_round_stats(cpu, &mdev->vdisk->part0);
+	part_dec_in_flight(&mdev->vdisk->part0, rw);
+	part_stat_unlock();
+}
+
+static void _req_is_done(struct drbd_conf *mdev, struct drbd_request *req, const int rw)
+{
+	const unsigned long s = req->rq_state;
+
+	/* remove it from the transfer log.
+	 * well, only if it had been there in the first
+	 * place... if it had not (local only or conflicting
+	 * and never sent), it should still be "empty" as
+	 * initialized in drbd_req_new(), so we can list_del() it
+	 * here unconditionally */
+	list_del(&req->tl_requests);
+
+	/* if it was a write, we may have to set the corresponding
+	 * bit(s) out-of-sync first. If it had a local part, we need to
+	 * release the reference to the activity log. */
+	if (rw == WRITE) {
+		/* Set out-of-sync unless both OK flags are set
+		 * (local only or remote failed).
+		 * Other places where we set out-of-sync:
+		 * READ with local io-error */
+		if (!(s & RQ_NET_OK) || !(s & RQ_LOCAL_OK))
+			drbd_set_out_of_sync(mdev, req->sector, req->size);
+
+		if ((s & RQ_NET_OK) && (s & RQ_LOCAL_OK) && (s & RQ_NET_SIS))
+			drbd_set_in_sync(mdev, req->sector, req->size);
+
+		/* one might be tempted to move the drbd_al_complete_io
+		 * to the local io completion callback drbd_endio_pri.
+		 * but, if this was a mirror write, we may only
+		 * drbd_al_complete_io after this is RQ_NET_DONE,
+		 * otherwise the extent could be dropped from the al
+		 * before it has actually been written on the peer.
+		 * if we crash before our peer knows about the request,
+		 * but after the extent has been dropped from the al,
+		 * we would forget to resync the corresponding extent.
+		 */
+		if (s & RQ_LOCAL_MASK) {
+			if (get_ldev_if_state(mdev, D_FAILED)) {
+				if (s & RQ_IN_ACT_LOG)
+					drbd_al_complete_io(mdev, req->sector);
+				put_ldev(mdev);
+			} else if (__ratelimit(&drbd_ratelimit_state)) {
+				dev_warn(DEV, "Should have called drbd_al_complete_io(, %llu), "
+				     "but my Disk seems to have failed :(\n",
+				     (unsigned long long) req->sector);
+			}
+		}
+	}
+
+	drbd_req_free(req);
+}
+
+static void queue_barrier(struct drbd_conf *mdev)
+{
+	struct drbd_tl_epoch *b;
+
+	/* We are within the req_lock. Once we queued the barrier for sending,
+	 * we set the CREATE_BARRIER bit. It is cleared as soon as a new
+	 * barrier/epoch object is added. This is the only place this bit is
+	 * set. It indicates that the barrier for this epoch is already queued,
+	 * and no new epoch has been created yet. */
+	if (test_bit(CREATE_BARRIER, &mdev->flags))
+		return;
+
+	b = mdev->newest_tle;
+	b->w.cb = w_send_barrier;
+	/* inc_ap_pending done here, so we won't
+	 * get imbalanced on connection loss.
+	 * dec_ap_pending will be done in got_BarrierAck
+	 * or (on connection loss) in tl_clear.  */
+	inc_ap_pending(mdev);
+	drbd_queue_work(&mdev->data.work, &b->w);
+	set_bit(CREATE_BARRIER, &mdev->flags);
+}
+
+static void _about_to_complete_local_write(struct drbd_conf *mdev,
+	struct drbd_request *req)
+{
+	const unsigned long s = req->rq_state;
+	struct drbd_request *i;
+	struct drbd_epoch_entry *e;
+	struct hlist_node *n;
+	struct hlist_head *slot;
+
+	/* Before we can signal completion to the upper layers,
+	 * we may need to close the current epoch.
+	 * We can skip this, if this request has not even been sent, because we
+	 * did not have a fully established connection yet/anymore, during
+	 * bitmap exchange, or while we are C_AHEAD due to congestion policy.
+	 */
+	if (mdev->state.conn >= C_CONNECTED &&
+	    (s & RQ_NET_SENT) != 0 &&
+	    req->epoch == mdev->newest_tle->br_number)
+		queue_barrier(mdev);
+
+	/* we need to do the conflict detection stuff,
+	 * if we have the ee_hash (two_primaries) and
+	 * this has been on the network */
+	if ((s & RQ_NET_DONE) && mdev->ee_hash != NULL) {
+		const sector_t sector = req->sector;
+		const int size = req->size;
+
+		/* ASSERT:
+		 * there must be no conflicting requests, since
+		 * they must have been failed on the spot */
+#define OVERLAPS overlaps(sector, size, i->sector, i->size)
+		slot = tl_hash_slot(mdev, sector);
+		hlist_for_each_entry(i, n, slot, collision) {
+			if (OVERLAPS) {
+				dev_alert(DEV, "LOGIC BUG: completed: %p %llus +%u; "
+				      "other: %p %llus +%u\n",
+				      req, (unsigned long long)sector, size,
+				      i, (unsigned long long)i->sector, i->size);
+			}
+		}
+
+		/* maybe "wake" those conflicting epoch entries
+		 * that wait for this request to finish.
+		 *
+		 * currently, there can be only _one_ such ee
+		 * (well, or some more, which would be pending
+		 * P_DISCARD_ACK not yet sent by the asender...),
+		 * since we block the receiver thread upon the
+		 * first conflict detection, which will wait on
+		 * misc_wait.  maybe we want to assert that?
+		 *
+		 * anyways, if we found one,
+		 * we just have to do a wake_up.  */
+#undef OVERLAPS
+#define OVERLAPS overlaps(sector, size, e->sector, e->size)
+		slot = ee_hash_slot(mdev, req->sector);
+		hlist_for_each_entry(e, n, slot, collision) {
+			if (OVERLAPS) {
+				wake_up(&mdev->misc_wait);
+				break;
+			}
+		}
+	}
+#undef OVERLAPS
+}
+
+void complete_master_bio(struct drbd_conf *mdev,
+		struct bio_and_error *m)
+{
+	bio_endio(m->bio, m->error);
+	dec_ap_bio(mdev);
+}
+
+/* Helper for __req_mod().
+ * Set m->bio to the master bio, if it is fit to be completed,
+ * or leave it alone (it is initialized to NULL in __req_mod),
+ * if it has already been completed, or cannot be completed yet.
+ * If m->bio is set, the error status to be returned is placed in m->error.
+ */
+void _req_may_be_done(struct drbd_request *req, struct bio_and_error *m)
+{
+	const unsigned long s = req->rq_state;
+	struct drbd_conf *mdev = req->mdev;
+	/* only WRITES may end up here without a master bio (on barrier ack) */
+	int rw = req->master_bio ? bio_data_dir(req->master_bio) : WRITE;
+
+	/* we must not complete the master bio, while it is
+	 *	still being processed by _drbd_send_zc_bio (drbd_send_dblock)
+	 *	not yet acknowledged by the peer
+	 *	not yet completed by the local io subsystem
+	 * these flags may get cleared in any order by
+	 *	the worker,
+	 *	the receiver,
+	 *	the bio_endio completion callbacks.
+	 */
+	if (s & RQ_NET_QUEUED)
+		return;
+	if (s & RQ_NET_PENDING)
+		return;
+	if (s & RQ_LOCAL_PENDING)
+		return;
+
+	if (req->master_bio) {
+		/* this is data_received (remote read)
+		 * or protocol C P_WRITE_ACK
+		 * or protocol B P_RECV_ACK
+		 * or protocol A "handed_over_to_network" (SendAck)
+		 * or canceled or failed,
+		 * or killed from the transfer log due to connection loss.
+		 */
+
+		/*
+		 * figure out whether to report success or failure.
+		 *
+		 * report success when at least one of the operations succeeded.
+		 * or, to put the other way,
+		 * only report failure, when both operations failed.
+		 *
+		 * what to do about the failures is handled elsewhere.
+		 * what we need to do here is just: complete the master_bio.
+		 *
+		 * local completion error, if any, has been stored as ERR_PTR
+		 * in private_bio within drbd_endio_pri.
+		 */
+		int ok = (s & RQ_LOCAL_OK) || (s & RQ_NET_OK);
+		int error = PTR_ERR(req->private_bio);
+
+		/* remove the request from the conflict detection
+		 * respective block_id verification hash */
+		if (!hlist_unhashed(&req->collision))
+			hlist_del(&req->collision);
+		else
+			D_ASSERT((s & (RQ_NET_MASK & ~RQ_NET_DONE)) == 0);
+
+		/* for writes we need to do some extra housekeeping */
+		if (rw == WRITE)
+			_about_to_complete_local_write(mdev, req);
+
+		/* Update disk stats */
+		_drbd_end_io_acct(mdev, req);
+
+		m->error = ok ? 0 : (error ?: -EIO);
+		m->bio = req->master_bio;
+		req->master_bio = NULL;
+	}
+
+	if ((s & RQ_NET_MASK) == 0 || (s & RQ_NET_DONE)) {
+		/* this is disconnected (local only) operation,
+		 * or protocol C P_WRITE_ACK,
+		 * or protocol A or B P_BARRIER_ACK,
+		 * or killed from the transfer log due to connection loss. */
+		_req_is_done(mdev, req, rw);
+	}
+	/* else: network part and not DONE yet. that is
+	 * protocol A or B, barrier ack still pending... */
+}
+
+static void _req_may_be_done_not_susp(struct drbd_request *req, struct bio_and_error *m)
+{
+	struct drbd_conf *mdev = req->mdev;
+
+	if (!is_susp(mdev->state))
+		_req_may_be_done(req, m);
+}
+
+/*
+ * checks whether there was an overlapping request
+ * or ee already registered.
+ *
+ * if so, return 1, in which case this request is completed on the spot,
+ * without ever being submitted or send.
+ *
+ * return 0 if it is ok to submit this request.
+ *
+ * NOTE:
+ * paranoia: assume something above us is broken, and issues different write
+ * requests for the same block simultaneously...
+ *
+ * To ensure these won't be reordered differently on both nodes, resulting in
+ * diverging data sets, we discard the later one(s). Not that this is supposed
+ * to happen, but this is the rationale why we also have to check for
+ * conflicting requests with local origin, and why we have to do so regardless
+ * of whether we allowed multiple primaries.
+ *
+ * BTW, in case we only have one primary, the ee_hash is empty anyways, and the
+ * second hlist_for_each_entry becomes a noop. This is even simpler than to
+ * grab a reference on the net_conf, and check for the two_primaries flag...
+ */
+static int _req_conflicts(struct drbd_request *req)
+{
+	struct drbd_conf *mdev = req->mdev;
+	const sector_t sector = req->sector;
+	const int size = req->size;
+	struct drbd_request *i;
+	struct drbd_epoch_entry *e;
+	struct hlist_node *n;
+	struct hlist_head *slot;
+
+	D_ASSERT(hlist_unhashed(&req->collision));
+
+	if (!get_net_conf(mdev))
+		return 0;
+
+	/* BUG_ON */
+	ERR_IF (mdev->tl_hash_s == 0)
+		goto out_no_conflict;
+	BUG_ON(mdev->tl_hash == NULL);
+
+#define OVERLAPS overlaps(i->sector, i->size, sector, size)
+	slot = tl_hash_slot(mdev, sector);
+	hlist_for_each_entry(i, n, slot, collision) {
+		if (OVERLAPS) {
+			dev_alert(DEV, "%s[%u] Concurrent local write detected! "
+			      "[DISCARD L] new: %llus +%u; "
+			      "pending: %llus +%u\n",
+			      current->comm, current->pid,
+			      (unsigned long long)sector, size,
+			      (unsigned long long)i->sector, i->size);
+			goto out_conflict;
+		}
+	}
+
+	if (mdev->ee_hash_s) {
+		/* now, check for overlapping requests with remote origin */
+		BUG_ON(mdev->ee_hash == NULL);
+#undef OVERLAPS
+#define OVERLAPS overlaps(e->sector, e->size, sector, size)
+		slot = ee_hash_slot(mdev, sector);
+		hlist_for_each_entry(e, n, slot, collision) {
+			if (OVERLAPS) {
+				dev_alert(DEV, "%s[%u] Concurrent remote write detected!"
+				      " [DISCARD L] new: %llus +%u; "
+				      "pending: %llus +%u\n",
+				      current->comm, current->pid,
+				      (unsigned long long)sector, size,
+				      (unsigned long long)e->sector, e->size);
+				goto out_conflict;
+			}
+		}
+	}
+#undef OVERLAPS
+
+out_no_conflict:
+	/* this is like it should be, and what we expected.
+	 * our users do behave after all... */
+	put_net_conf(mdev);
+	return 0;
+
+out_conflict:
+	put_net_conf(mdev);
+	return 1;
+}
+
+/* obviously this could be coded as many single functions
+ * instead of one huge switch,
+ * or by putting the code directly in the respective locations
+ * (as it has been before).
+ *
+ * but having it this way
+ *  enforces that it is all in this one place, where it is easier to audit,
+ *  it makes it obvious that whatever "event" "happens" to a request should
+ *  happen "atomically" within the req_lock,
+ *  and it enforces that we have to think in a very structured manner
+ *  about the "events" that may happen to a request during its life time ...
+ */
+int __req_mod(struct drbd_request *req, enum drbd_req_event what,
+		struct bio_and_error *m)
+{
+	struct drbd_conf *mdev = req->mdev;
+	int rv = 0;
+	m->bio = NULL;
+
+	switch (what) {
+	default:
+		dev_err(DEV, "LOGIC BUG in %s:%u\n", __FILE__ , __LINE__);
+		break;
+
+	/* does not happen...
+	 * initialization done in drbd_req_new
+	case created:
+		break;
+		*/
+
+	case to_be_send: /* via network */
+		/* reached via drbd_make_request_common
+		 * and from w_read_retry_remote */
+		D_ASSERT(!(req->rq_state & RQ_NET_MASK));
+		req->rq_state |= RQ_NET_PENDING;
+		inc_ap_pending(mdev);
+		break;
+
+	case to_be_submitted: /* locally */
+		/* reached via drbd_make_request_common */
+		D_ASSERT(!(req->rq_state & RQ_LOCAL_MASK));
+		req->rq_state |= RQ_LOCAL_PENDING;
+		break;
+
+	case completed_ok:
+		if (bio_data_dir(req->master_bio) == WRITE)
+			mdev->writ_cnt += req->size>>9;
+		else
+			mdev->read_cnt += req->size>>9;
+
+		req->rq_state |= (RQ_LOCAL_COMPLETED|RQ_LOCAL_OK);
+		req->rq_state &= ~RQ_LOCAL_PENDING;
+
+		_req_may_be_done_not_susp(req, m);
+		put_ldev(mdev);
+		break;
+
+	case write_completed_with_error:
+		req->rq_state |= RQ_LOCAL_COMPLETED;
+		req->rq_state &= ~RQ_LOCAL_PENDING;
+
+		__drbd_chk_io_error(mdev, false);
+		_req_may_be_done_not_susp(req, m);
+		put_ldev(mdev);
+		break;
+
+	case read_ahead_completed_with_error:
+		/* it is legal to fail READA */
+		req->rq_state |= RQ_LOCAL_COMPLETED;
+		req->rq_state &= ~RQ_LOCAL_PENDING;
+		_req_may_be_done_not_susp(req, m);
+		put_ldev(mdev);
+		break;
+
+	case read_completed_with_error:
+		drbd_set_out_of_sync(mdev, req->sector, req->size);
+
+		req->rq_state |= RQ_LOCAL_COMPLETED;
+		req->rq_state &= ~RQ_LOCAL_PENDING;
+
+		D_ASSERT(!(req->rq_state & RQ_NET_MASK));
+
+		__drbd_chk_io_error(mdev, false);
+		put_ldev(mdev);
+
+		/* no point in retrying if there is no good remote data,
+		 * or we have no connection. */
+		if (mdev->state.pdsk != D_UP_TO_DATE) {
+			_req_may_be_done_not_susp(req, m);
+			break;
+		}
+
+		/* _req_mod(req,to_be_send); oops, recursion... */
+		req->rq_state |= RQ_NET_PENDING;
+		inc_ap_pending(mdev);
+		/* fall through: _req_mod(req,queue_for_net_read); */
+
+	case queue_for_net_read:
+		/* READ or READA, and
+		 * no local disk,
+		 * or target area marked as invalid,
+		 * or just got an io-error. */
+		/* from drbd_make_request_common
+		 * or from bio_endio during read io-error recovery */
+
+		/* so we can verify the handle in the answer packet
+		 * corresponding hlist_del is in _req_may_be_done() */
+		hlist_add_head(&req->collision, ar_hash_slot(mdev, req->sector));
+
+		set_bit(UNPLUG_REMOTE, &mdev->flags);
+
+		D_ASSERT(req->rq_state & RQ_NET_PENDING);
+		req->rq_state |= RQ_NET_QUEUED;
+		req->w.cb = (req->rq_state & RQ_LOCAL_MASK)
+			? w_read_retry_remote
+			: w_send_read_req;
+		drbd_queue_work(&mdev->data.work, &req->w);
+		break;
+
+	case queue_for_net_write:
+		/* assert something? */
+		/* from drbd_make_request_common only */
+
+		hlist_add_head(&req->collision, tl_hash_slot(mdev, req->sector));
+		/* corresponding hlist_del is in _req_may_be_done() */
+
+		/* NOTE
+		 * In case the req ended up on the transfer log before being
+		 * queued on the worker, it could lead to this request being
+		 * missed during cleanup after connection loss.
+		 * So we have to do both operations here,
+		 * within the same lock that protects the transfer log.
+		 *
+		 * _req_add_to_epoch(req); this has to be after the
+		 * _maybe_start_new_epoch(req); which happened in
+		 * drbd_make_request_common, because we now may set the bit
+		 * again ourselves to close the current epoch.
+		 *
+		 * Add req to the (now) current epoch (barrier). */
+
+		/* otherwise we may lose an unplug, which may cause some remote
+		 * io-scheduler timeout to expire, increasing maximum latency,
+		 * hurting performance. */
+		set_bit(UNPLUG_REMOTE, &mdev->flags);
+
+		/* see drbd_make_request_common,
+		 * just after it grabs the req_lock */
+		D_ASSERT(test_bit(CREATE_BARRIER, &mdev->flags) == 0);
+
+		req->epoch = mdev->newest_tle->br_number;
+
+		/* increment size of current epoch */
+		mdev->newest_tle->n_writes++;
+
+		/* queue work item to send data */
+		D_ASSERT(req->rq_state & RQ_NET_PENDING);
+		req->rq_state |= RQ_NET_QUEUED;
+		req->w.cb =  w_send_dblock;
+		drbd_queue_work(&mdev->data.work, &req->w);
+
+		/* close the epoch, in case it outgrew the limit */
+		if (mdev->newest_tle->n_writes >= mdev->net_conf->max_epoch_size)
+			queue_barrier(mdev);
+
+		break;
+
+	case queue_for_send_oos:
+		req->rq_state |= RQ_NET_QUEUED;
+		req->w.cb =  w_send_oos;
+		drbd_queue_work(&mdev->data.work, &req->w);
+		break;
+
+	case oos_handed_to_network:
+		/* actually the same */
+	case send_canceled:
+		/* treat it the same */
+	case send_failed:
+		/* real cleanup will be done from tl_clear.  just update flags
+		 * so it is no longer marked as on the worker queue */
+		req->rq_state &= ~RQ_NET_QUEUED;
+		/* if we did it right, tl_clear should be scheduled only after
+		 * this, so this should not be necessary! */
+		_req_may_be_done_not_susp(req, m);
+		break;
+
+	case handed_over_to_network:
+		/* assert something? */
+		if (bio_data_dir(req->master_bio) == WRITE)
+			atomic_add(req->size>>9, &mdev->ap_in_flight);
+
+		if (bio_data_dir(req->master_bio) == WRITE &&
+		    mdev->net_conf->wire_protocol == DRBD_PROT_A) {
+			/* this is what is dangerous about protocol A:
+			 * pretend it was successfully written on the peer. */
+			if (req->rq_state & RQ_NET_PENDING) {
+				dec_ap_pending(mdev);
+				req->rq_state &= ~RQ_NET_PENDING;
+				req->rq_state |= RQ_NET_OK;
+			} /* else: neg-ack was faster... */
+			/* it is still not yet RQ_NET_DONE until the
+			 * corresponding epoch barrier got acked as well,
+			 * so we know what to dirty on connection loss */
+		}
+		req->rq_state &= ~RQ_NET_QUEUED;
+		req->rq_state |= RQ_NET_SENT;
+		/* because _drbd_send_zc_bio could sleep, and may want to
+		 * dereference the bio even after the "write_acked_by_peer" and
+		 * "completed_ok" events came in, once we return from
+		 * _drbd_send_zc_bio (drbd_send_dblock), we have to check
+		 * whether it is done already, and end it.  */
+		_req_may_be_done_not_susp(req, m);
+		break;
+
+	case read_retry_remote_canceled:
+		req->rq_state &= ~RQ_NET_QUEUED;
+		/* fall through, in case we raced with drbd_disconnect */
+	case connection_lost_while_pending:
+		/* transfer log cleanup after connection loss */
+		/* assert something? */
+		if (req->rq_state & RQ_NET_PENDING)
+			dec_ap_pending(mdev);
+		req->rq_state &= ~(RQ_NET_OK|RQ_NET_PENDING);
+		req->rq_state |= RQ_NET_DONE;
+		if (req->rq_state & RQ_NET_SENT && req->rq_state & RQ_WRITE)
+			atomic_sub(req->size>>9, &mdev->ap_in_flight);
+
+		/* if it is still queued, we may not complete it here.
+		 * it will be canceled soon. */
+		if (!(req->rq_state & RQ_NET_QUEUED))
+			_req_may_be_done(req, m); /* Allowed while state.susp */
+		break;
+
+	case write_acked_by_peer_and_sis:
+		req->rq_state |= RQ_NET_SIS;
+	case conflict_discarded_by_peer:
+		/* for discarded conflicting writes of multiple primaries,
+		 * there is no need to keep anything in the tl, potential
+		 * node crashes are covered by the activity log. */
+		if (what == conflict_discarded_by_peer)
+			dev_alert(DEV, "Got DiscardAck packet %llus +%u!"
+			      " DRBD is not a random data generator!\n",
+			      (unsigned long long)req->sector, req->size);
+		req->rq_state |= RQ_NET_DONE;
+		/* fall through */
+	case write_acked_by_peer:
+		/* protocol C; successfully written on peer.
+		 * Nothing to do here.
+		 * We want to keep the tl in place for all protocols, to cater
+		 * for volatile write-back caches on lower level devices.
+		 *
+		 * A barrier request is expected to have forced all prior
+		 * requests onto stable storage, so completion of a barrier
+		 * request could set NET_DONE right here, and not wait for the
+		 * P_BARRIER_ACK, but that is an unnecessary optimization. */
+
+		/* this makes it effectively the same as for: */
+	case recv_acked_by_peer:
+		/* protocol B; pretends to be successfully written on peer.
+		 * see also notes above in handed_over_to_network about
+		 * protocol != C */
+		req->rq_state |= RQ_NET_OK;
+		D_ASSERT(req->rq_state & RQ_NET_PENDING);
+		dec_ap_pending(mdev);
+		atomic_sub(req->size>>9, &mdev->ap_in_flight);
+		req->rq_state &= ~RQ_NET_PENDING;
+		_req_may_be_done_not_susp(req, m);
+		break;
+
+	case neg_acked:
+		/* assert something? */
+		if (req->rq_state & RQ_NET_PENDING) {
+			dec_ap_pending(mdev);
+			atomic_sub(req->size>>9, &mdev->ap_in_flight);
+		}
+		req->rq_state &= ~(RQ_NET_OK|RQ_NET_PENDING);
+
+		req->rq_state |= RQ_NET_DONE;
+		_req_may_be_done_not_susp(req, m);
+		/* else: done by handed_over_to_network */
+		break;
+
+	case fail_frozen_disk_io:
+		if (!(req->rq_state & RQ_LOCAL_COMPLETED))
+			break;
+
+		_req_may_be_done(req, m); /* Allowed while state.susp */
+		break;
+
+	case restart_frozen_disk_io:
+		if (!(req->rq_state & RQ_LOCAL_COMPLETED))
+			break;
+
+		req->rq_state &= ~RQ_LOCAL_COMPLETED;
+
+		rv = MR_READ;
+		if (bio_data_dir(req->master_bio) == WRITE)
+			rv = MR_WRITE;
+
+		get_ldev(mdev);
+		req->w.cb = w_restart_disk_io;
+		drbd_queue_work(&mdev->data.work, &req->w);
+		break;
+
+	case resend:
+		/* If RQ_NET_OK is already set, we got a P_WRITE_ACK or P_RECV_ACK
+		   before the connection loss (B&C only); only P_BARRIER_ACK was missing.
+		   Trowing them out of the TL here by pretending we got a BARRIER_ACK
+		   We ensure that the peer was not rebooted */
+		if (!(req->rq_state & RQ_NET_OK)) {
+			if (req->w.cb) {
+				drbd_queue_work(&mdev->data.work, &req->w);
+				rv = req->rq_state & RQ_WRITE ? MR_WRITE : MR_READ;
+			}
+			break;
+		}
+		/* else, fall through to barrier_acked */
+
+	case barrier_acked:
+		if (!(req->rq_state & RQ_WRITE))
+			break;
+
+		if (req->rq_state & RQ_NET_PENDING) {
+			/* barrier came in before all requests have been acked.
+			 * this is bad, because if the connection is lost now,
+			 * we won't be able to clean them up... */
+			dev_err(DEV, "FIXME (barrier_acked but pending)\n");
+			list_move(&req->tl_requests, &mdev->out_of_sequence_requests);
+		}
+		if ((req->rq_state & RQ_NET_MASK) != 0) {
+			req->rq_state |= RQ_NET_DONE;
+			if (mdev->net_conf->wire_protocol == DRBD_PROT_A)
+				atomic_sub(req->size>>9, &mdev->ap_in_flight);
+		}
+		_req_may_be_done(req, m); /* Allowed while state.susp */
+		break;
+
+	case data_received:
+		D_ASSERT(req->rq_state & RQ_NET_PENDING);
+		dec_ap_pending(mdev);
+		req->rq_state &= ~RQ_NET_PENDING;
+		req->rq_state |= (RQ_NET_OK|RQ_NET_DONE);
+		_req_may_be_done_not_susp(req, m);
+		break;
+	};
+
+	return rv;
+}
+
+/* we may do a local read if:
+ * - we are consistent (of course),
+ * - or we are generally inconsistent,
+ *   BUT we are still/already IN SYNC for this area.
+ *   since size may be bigger than BM_BLOCK_SIZE,
+ *   we may need to check several bits.
+ */
+static int drbd_may_do_local_read(struct drbd_conf *mdev, sector_t sector, int size)
+{
+	unsigned long sbnr, ebnr;
+	sector_t esector, nr_sectors;
+
+	if (mdev->state.disk == D_UP_TO_DATE)
+		return 1;
+	if (mdev->state.disk >= D_OUTDATED)
+		return 0;
+	if (mdev->state.disk <  D_INCONSISTENT)
+		return 0;
+	/* state.disk == D_INCONSISTENT   We will have a look at the BitMap */
+	nr_sectors = drbd_get_capacity(mdev->this_bdev);
+	esector = sector + (size >> 9) - 1;
+
+	D_ASSERT(sector  < nr_sectors);
+	D_ASSERT(esector < nr_sectors);
+
+	sbnr = BM_SECT_TO_BIT(sector);
+	ebnr = BM_SECT_TO_BIT(esector);
+
+	return 0 == drbd_bm_count_bits(mdev, sbnr, ebnr);
+}
+
+static int drbd_make_request_common(struct drbd_conf *mdev, struct bio *bio, unsigned long start_time)
+{
+	const int rw = bio_rw(bio);
+	const int size = bio->bi_size;
+	const sector_t sector = bio->bi_sector;
+	struct drbd_tl_epoch *b = NULL;
+	struct drbd_request *req;
+	int local, remote, send_oos = 0;
+	int err = -EIO;
+	int ret = 0;
+
+	/* allocate outside of all locks; */
+	req = drbd_req_new(mdev, bio);
+	if (!req) {
+		dec_ap_bio(mdev);
+		/* only pass the error to the upper layers.
+		 * if user cannot handle io errors, that's not our business. */
+		dev_err(DEV, "could not kmalloc() req\n");
+		bio_endio(bio, -ENOMEM);
+		return 0;
+	}
+	req->start_time = start_time;
+
+	local = get_ldev(mdev);
+	if (!local) {
+		bio_put(req->private_bio); /* or we get a bio leak */
+		req->private_bio = NULL;
+	}
+	if (rw == WRITE) {
+		remote = 1;
+	} else {
+		/* READ || READA */
+		if (local) {
+			if (!drbd_may_do_local_read(mdev, sector, size)) {
+				/* we could kick the syncer to
+				 * sync this extent asap, wait for
+				 * it, then continue locally.
+				 * Or just issue the request remotely.
+				 */
+				local = 0;
+				bio_put(req->private_bio);
+				req->private_bio = NULL;
+				put_ldev(mdev);
+			}
+		}
+		remote = !local && mdev->state.pdsk >= D_UP_TO_DATE;
+	}
+
+	/* If we have a disk, but a READA request is mapped to remote,
+	 * we are R_PRIMARY, D_INCONSISTENT, SyncTarget.
+	 * Just fail that READA request right here.
+	 *
+	 * THINK: maybe fail all READA when not local?
+	 *        or make this configurable...
+	 *        if network is slow, READA won't do any good.
+	 */
+	if (rw == READA && mdev->state.disk >= D_INCONSISTENT && !local) {
+		err = -EWOULDBLOCK;
+		goto fail_and_free_req;
+	}
+
+	/* For WRITES going to the local disk, grab a reference on the target
+	 * extent.  This waits for any resync activity in the corresponding
+	 * resync extent to finish, and, if necessary, pulls in the target
+	 * extent into the activity log, which involves further disk io because
+	 * of transactional on-disk meta data updates. */
+	if (rw == WRITE && local && !test_bit(AL_SUSPENDED, &mdev->flags)) {
+		req->rq_state |= RQ_IN_ACT_LOG;
+		drbd_al_begin_io(mdev, sector);
+	}
+
+	remote = remote && drbd_should_do_remote(mdev->state);
+	send_oos = rw == WRITE && drbd_should_send_oos(mdev->state);
+	D_ASSERT(!(remote && send_oos));
+
+	if (!(local || remote) && !is_susp(mdev->state)) {
+		if (__ratelimit(&drbd_ratelimit_state))
+			dev_err(DEV, "IO ERROR: neither local nor remote disk\n");
+		goto fail_free_complete;
+	}
+
+	/* For WRITE request, we have to make sure that we have an
+	 * unused_spare_tle, in case we need to start a new epoch.
+	 * I try to be smart and avoid to pre-allocate always "just in case",
+	 * but there is a race between testing the bit and pointer outside the
+	 * spinlock, and grabbing the spinlock.
+	 * if we lost that race, we retry.  */
+	if (rw == WRITE && (remote || send_oos) &&
+	    mdev->unused_spare_tle == NULL &&
+	    test_bit(CREATE_BARRIER, &mdev->flags)) {
+allocate_barrier:
+		b = kmalloc(sizeof(struct drbd_tl_epoch), GFP_NOIO);
+		if (!b) {
+			dev_err(DEV, "Failed to alloc barrier.\n");
+			err = -ENOMEM;
+			goto fail_free_complete;
+		}
+	}
+
+	/* GOOD, everything prepared, grab the spin_lock */
+	spin_lock_irq(&mdev->req_lock);
+
+	if (is_susp(mdev->state)) {
+		/* If we got suspended, use the retry mechanism of
+		   generic_make_request() to restart processing of this
+		   bio. In the next call to drbd_make_request
+		   we sleep in inc_ap_bio() */
+		ret = 1;
+		spin_unlock_irq(&mdev->req_lock);
+		goto fail_free_complete;
+	}
+
+	if (remote || send_oos) {
+		remote = drbd_should_do_remote(mdev->state);
+		send_oos = rw == WRITE && drbd_should_send_oos(mdev->state);
+		D_ASSERT(!(remote && send_oos));
+
+		if (!(remote || send_oos))
+			dev_warn(DEV, "lost connection while grabbing the req_lock!\n");
+		if (!(local || remote)) {
+			dev_err(DEV, "IO ERROR: neither local nor remote disk\n");
+			spin_unlock_irq(&mdev->req_lock);
+			goto fail_free_complete;
+		}
+	}
+
+	if (b && mdev->unused_spare_tle == NULL) {
+		mdev->unused_spare_tle = b;
+		b = NULL;
+	}
+	if (rw == WRITE && (remote || send_oos) &&
+	    mdev->unused_spare_tle == NULL &&
+	    test_bit(CREATE_BARRIER, &mdev->flags)) {
+		/* someone closed the current epoch
+		 * while we were grabbing the spinlock */
+		spin_unlock_irq(&mdev->req_lock);
+		goto allocate_barrier;
+	}
+
+
+	/* Update disk stats */
+	_drbd_start_io_acct(mdev, req, bio);
+
+	/* _maybe_start_new_epoch(mdev);
+	 * If we need to generate a write barrier packet, we have to add the
+	 * new epoch (barrier) object, and queue the barrier packet for sending,
+	 * and queue the req's data after it _within the same lock_, otherwise
+	 * we have race conditions were the reorder domains could be mixed up.
+	 *
+	 * Even read requests may start a new epoch and queue the corresponding
+	 * barrier packet.  To get the write ordering right, we only have to
+	 * make sure that, if this is a write request and it triggered a
+	 * barrier packet, this request is queued within the same spinlock. */
+	if ((remote || send_oos) && mdev->unused_spare_tle &&
+	    test_and_clear_bit(CREATE_BARRIER, &mdev->flags)) {
+		_tl_add_barrier(mdev, mdev->unused_spare_tle);
+		mdev->unused_spare_tle = NULL;
+	} else {
+		D_ASSERT(!(remote && rw == WRITE &&
+			   test_bit(CREATE_BARRIER, &mdev->flags)));
+	}
+
+	/* NOTE
+	 * Actually, 'local' may be wrong here already, since we may have failed
+	 * to write to the meta data, and may become wrong anytime because of
+	 * local io-error for some other request, which would lead to us
+	 * "detaching" the local disk.
+	 *
+	 * 'remote' may become wrong any time because the network could fail.
+	 *
+	 * This is a harmless race condition, though, since it is handled
+	 * correctly at the appropriate places; so it just defers the failure
+	 * of the respective operation.
+	 */
+
+	/* mark them early for readability.
+	 * this just sets some state flags. */
+	if (remote)
+		_req_mod(req, to_be_send);
+	if (local)
+		_req_mod(req, to_be_submitted);
+
+	/* check this request on the collision detection hash tables.
+	 * if we have a conflict, just complete it here.
+	 * THINK do we want to check reads, too? (I don't think so...) */
+	if (rw == WRITE && _req_conflicts(req))
+		goto fail_conflicting;
+
+	list_add_tail(&req->tl_requests, &mdev->newest_tle->requests);
+
+	/* NOTE remote first: to get the concurrent write detection right,
+	 * we must register the request before start of local IO.  */
+	if (remote) {
+		/* either WRITE and C_CONNECTED,
+		 * or READ, and no local disk,
+		 * or READ, but not in sync.
+		 */
+		_req_mod(req, (rw == WRITE)
+				? queue_for_net_write
+				: queue_for_net_read);
+	}
+	if (send_oos && drbd_set_out_of_sync(mdev, sector, size))
+		_req_mod(req, queue_for_send_oos);
+
+	if (remote &&
+	    mdev->net_conf->on_congestion != OC_BLOCK && mdev->agreed_pro_version >= 96) {
+		int congested = 0;
+
+		if (mdev->net_conf->cong_fill &&
+		    atomic_read(&mdev->ap_in_flight) >= mdev->net_conf->cong_fill) {
+			dev_info(DEV, "Congestion-fill threshold reached\n");
+			congested = 1;
+		}
+
+		if (mdev->act_log->used >= mdev->net_conf->cong_extents) {
+			dev_info(DEV, "Congestion-extents threshold reached\n");
+			congested = 1;
+		}
+
+		if (congested) {
+			queue_barrier(mdev); /* last barrier, after mirrored writes */
+
+			if (mdev->net_conf->on_congestion == OC_PULL_AHEAD)
+				_drbd_set_state(_NS(mdev, conn, C_AHEAD), 0, NULL);
+			else  /*mdev->net_conf->on_congestion == OC_DISCONNECT */
+				_drbd_set_state(_NS(mdev, conn, C_DISCONNECTING), 0, NULL);
+		}
+	}
+
+	spin_unlock_irq(&mdev->req_lock);
+	kfree(b); /* if someone else has beaten us to it... */
+
+	if (local) {
+		req->private_bio->bi_bdev = mdev->ldev->backing_bdev;
+
+		/* State may have changed since we grabbed our reference on the
+		 * mdev->ldev member. Double check, and short-circuit to endio.
+		 * In case the last activity log transaction failed to get on
+		 * stable storage, and this is a WRITE, we may not even submit
+		 * this bio. */
+		if (get_ldev(mdev)) {
+			if (drbd_insert_fault(mdev,   rw == WRITE ? DRBD_FAULT_DT_WR
+						    : rw == READ  ? DRBD_FAULT_DT_RD
+						    :               DRBD_FAULT_DT_RA))
+				bio_endio(req->private_bio, -EIO);
+			else
+				generic_make_request(req->private_bio);
+			put_ldev(mdev);
+		} else
+			bio_endio(req->private_bio, -EIO);
+	}
+
+	return 0;
+
+fail_conflicting:
+	/* this is a conflicting request.
+	 * even though it may have been only _partially_
+	 * overlapping with one of the currently pending requests,
+	 * without even submitting or sending it, we will
+	 * pretend that it was successfully served right now.
+	 */
+	_drbd_end_io_acct(mdev, req);
+	spin_unlock_irq(&mdev->req_lock);
+	if (remote)
+		dec_ap_pending(mdev);
+	/* THINK: do we want to fail it (-EIO), or pretend success?
+	 * this pretends success. */
+	err = 0;
+
+fail_free_complete:
+	if (req->rq_state & RQ_IN_ACT_LOG)
+		drbd_al_complete_io(mdev, sector);
+fail_and_free_req:
+	if (local) {
+		bio_put(req->private_bio);
+		req->private_bio = NULL;
+		put_ldev(mdev);
+	}
+	if (!ret)
+		bio_endio(bio, err);
+
+	drbd_req_free(req);
+	dec_ap_bio(mdev);
+	kfree(b);
+
+	return ret;
+}
+
+/* helper function for drbd_make_request
+ * if we can determine just by the mdev (state) that this request will fail,
+ * return 1
+ * otherwise return 0
+ */
+static int drbd_fail_request_early(struct drbd_conf *mdev, int is_write)
+{
+	if (mdev->state.role != R_PRIMARY &&
+		(!allow_oos || is_write)) {
+		if (__ratelimit(&drbd_ratelimit_state)) {
+			dev_err(DEV, "Process %s[%u] tried to %s; "
+			    "since we are not in Primary state, "
+			    "we cannot allow this\n",
+			    current->comm, current->pid,
+			    is_write ? "WRITE" : "READ");
+		}
+		return 1;
+	}
+
+	return 0;
+}
+
+int drbd_make_request(struct request_queue *q, struct bio *bio)
+{
+	unsigned int s_enr, e_enr;
+	struct drbd_conf *mdev = (struct drbd_conf *) q->queuedata;
+	unsigned long start_time;
+
+	if (drbd_fail_request_early(mdev, bio_data_dir(bio) & WRITE)) {
+		bio_endio(bio, -EPERM);
+		return 0;
+	}
+
+	start_time = jiffies;
+
+	/*
+	 * what we "blindly" assume:
+	 */
+	D_ASSERT(bio->bi_size > 0);
+	D_ASSERT((bio->bi_size & 0x1ff) == 0);
+	D_ASSERT(bio->bi_idx == 0);
+
+	/* to make some things easier, force alignment of requests within the
+	 * granularity of our hash tables */
+	s_enr = bio->bi_sector >> HT_SHIFT;
+	e_enr = (bio->bi_sector+(bio->bi_size>>9)-1) >> HT_SHIFT;
+
+	if (likely(s_enr == e_enr)) {
+		inc_ap_bio(mdev, 1);
+		return drbd_make_request_common(mdev, bio, start_time);
+	}
+
+	/* can this bio be split generically?
+	 * Maybe add our own split-arbitrary-bios function. */
+	if (bio->bi_vcnt != 1 || bio->bi_idx != 0 || bio->bi_size > DRBD_MAX_BIO_SIZE) {
+		/* rather error out here than BUG in bio_split */
+		dev_err(DEV, "bio would need to, but cannot, be split: "
+		    "(vcnt=%u,idx=%u,size=%u,sector=%llu)\n",
+		    bio->bi_vcnt, bio->bi_idx, bio->bi_size,
+		    (unsigned long long)bio->bi_sector);
+		bio_endio(bio, -EINVAL);
+	} else {
+		/* This bio crosses some boundary, so we have to split it. */
+		struct bio_pair *bp;
+		/* works for the "do not cross hash slot boundaries" case
+		 * e.g. sector 262269, size 4096
+		 * s_enr = 262269 >> 6 = 4097
+		 * e_enr = (262269+8-1) >> 6 = 4098
+		 * HT_SHIFT = 6
+		 * sps = 64, mask = 63
+		 * first_sectors = 64 - (262269 & 63) = 3
+		 */
+		const sector_t sect = bio->bi_sector;
+		const int sps = 1 << HT_SHIFT; /* sectors per slot */
+		const int mask = sps - 1;
+		const sector_t first_sectors = sps - (sect & mask);
+		bp = bio_split(bio, first_sectors);
+
+		/* we need to get a "reference count" (ap_bio_cnt)
+		 * to avoid races with the disconnect/reconnect/suspend code.
+		 * In case we need to split the bio here, we need to get three references
+		 * atomically, otherwise we might deadlock when trying to submit the
+		 * second one! */
+		inc_ap_bio(mdev, 3);
+
+		D_ASSERT(e_enr == s_enr + 1);
+
+		while (drbd_make_request_common(mdev, &bp->bio1, start_time))
+			inc_ap_bio(mdev, 1);
+
+		while (drbd_make_request_common(mdev, &bp->bio2, start_time))
+			inc_ap_bio(mdev, 1);
+
+		dec_ap_bio(mdev);
+
+		bio_pair_release(bp);
+	}
+	return 0;
+}
+
+/* This is called by bio_add_page().  With this function we reduce
+ * the number of BIOs that span over multiple DRBD_MAX_BIO_SIZEs
+ * units (was AL_EXTENTs).
+ *
+ * we do the calculation within the lower 32bit of the byte offsets,
+ * since we don't care for actual offset, but only check whether it
+ * would cross "activity log extent" boundaries.
+ *
+ * As long as the BIO is empty we have to allow at least one bvec,
+ * regardless of size and offset.  so the resulting bio may still
+ * cross extent boundaries.  those are dealt with (bio_split) in
+ * drbd_make_request.
+ */
+int drbd_merge_bvec(struct request_queue *q, struct bvec_merge_data *bvm, struct bio_vec *bvec)
+{
+	struct drbd_conf *mdev = (struct drbd_conf *) q->queuedata;
+	unsigned int bio_offset =
+		(unsigned int)bvm->bi_sector << 9; /* 32 bit */
+	unsigned int bio_size = bvm->bi_size;
+	int limit, backing_limit;
+
+	limit = DRBD_MAX_BIO_SIZE
+	      - ((bio_offset & (DRBD_MAX_BIO_SIZE-1)) + bio_size);
+	if (limit < 0)
+		limit = 0;
+	if (bio_size == 0) {
+		if (limit <= bvec->bv_len)
+			limit = bvec->bv_len;
+	} else if (limit && get_ldev(mdev)) {
+		struct request_queue * const b =
+			mdev->ldev->backing_bdev->bd_disk->queue;
+		if (b->merge_bvec_fn) {
+			backing_limit = b->merge_bvec_fn(b, bvm, bvec);
+			limit = min(limit, backing_limit);
+		}
+		put_ldev(mdev);
+	}
+	return limit;
+}
+
+void request_timer_fn(unsigned long data)
+{
+	struct drbd_conf *mdev = (struct drbd_conf *) data;
+	struct drbd_request *req; /* oldest request */
+	struct list_head *le;
+	unsigned long et = 0; /* effective timeout = ko_count * timeout */
+
+	if (get_net_conf(mdev)) {
+		et = mdev->net_conf->timeout*HZ/10 * mdev->net_conf->ko_count;
+		put_net_conf(mdev);
+	}
+	if (!et || mdev->state.conn < C_WF_REPORT_PARAMS)
+		return; /* Recurring timer stopped */
+
+	spin_lock_irq(&mdev->req_lock);
+	le = &mdev->oldest_tle->requests;
+	if (list_empty(le)) {
+		spin_unlock_irq(&mdev->req_lock);
+		mod_timer(&mdev->request_timer, jiffies + et);
+		return;
+	}
+
+	le = le->prev;
+	req = list_entry(le, struct drbd_request, tl_requests);
+	if (time_is_before_eq_jiffies(req->start_time + et)) {
+		if (req->rq_state & RQ_NET_PENDING) {
+			dev_warn(DEV, "Remote failed to finish a request within ko-count * timeout\n");
+			_drbd_set_state(_NS(mdev, conn, C_TIMEOUT), CS_VERBOSE, NULL);
+		} else {
+			dev_warn(DEV, "Local backing block device frozen?\n");
+			mod_timer(&mdev->request_timer, jiffies + et);
+		}
+	} else {
+		mod_timer(&mdev->request_timer, req->start_time + et);
+	}
+
+	spin_unlock_irq(&mdev->req_lock);
+}
diff --git a/drivers/block/drbd/drbd_req.h b/drivers/block/drbd/drbd_req.h
new file mode 100644
index 00000000..68a234a5
--- /dev/null
+++ b/drivers/block/drbd/drbd_req.h
@@ -0,0 +1,383 @@
+/*
+   drbd_req.h
+
+   This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
+
+   Copyright (C) 2006-2008, LINBIT Information Technologies GmbH.
+   Copyright (C) 2006-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
+   Copyright (C) 2006-2008, Philipp Reisner <philipp.reisner@linbit.com>.
+
+   DRBD is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2, or (at your option)
+   any later version.
+
+   DRBD is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with drbd; see the file COPYING.  If not, write to
+   the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef _DRBD_REQ_H
+#define _DRBD_REQ_H
+
+#include <linux/module.h>
+
+#include <linux/slab.h>
+#include <linux/drbd.h>
+#include "drbd_int.h"
+#include "drbd_wrappers.h"
+
+/* The request callbacks will be called in irq context by the IDE drivers,
+   and in Softirqs/Tasklets/BH context by the SCSI drivers,
+   and by the receiver and worker in kernel-thread context.
+   Try to get the locking right :) */
+
+/*
+ * Objects of type struct drbd_request do only exist on a R_PRIMARY node, and are
+ * associated with IO requests originating from the block layer above us.
+ *
+ * There are quite a few things that may happen to a drbd request
+ * during its lifetime.
+ *
+ *  It will be created.
+ *  It will be marked with the intention to be
+ *    submitted to local disk and/or
+ *    send via the network.
+ *
+ *  It has to be placed on the transfer log and other housekeeping lists,
+ *  In case we have a network connection.
+ *
+ *  It may be identified as a concurrent (write) request
+ *    and be handled accordingly.
+ *
+ *  It may me handed over to the local disk subsystem.
+ *  It may be completed by the local disk subsystem,
+ *    either successfully or with io-error.
+ *  In case it is a READ request, and it failed locally,
+ *    it may be retried remotely.
+ *
+ *  It may be queued for sending.
+ *  It may be handed over to the network stack,
+ *    which may fail.
+ *  It may be acknowledged by the "peer" according to the wire_protocol in use.
+ *    this may be a negative ack.
+ *  It may receive a faked ack when the network connection is lost and the
+ *  transfer log is cleaned up.
+ *  Sending may be canceled due to network connection loss.
+ *  When it finally has outlived its time,
+ *    corresponding dirty bits in the resync-bitmap may be cleared or set,
+ *    it will be destroyed,
+ *    and completion will be signalled to the originator,
+ *      with or without "success".
+ */
+
+enum drbd_req_event {
+	created,
+	to_be_send,
+	to_be_submitted,
+
+	/* XXX yes, now I am inconsistent...
+	 * these are not "events" but "actions"
+	 * oh, well... */
+	queue_for_net_write,
+	queue_for_net_read,
+	queue_for_send_oos,
+
+	send_canceled,
+	send_failed,
+	handed_over_to_network,
+	oos_handed_to_network,
+	connection_lost_while_pending,
+	read_retry_remote_canceled,
+	recv_acked_by_peer,
+	write_acked_by_peer,
+	write_acked_by_peer_and_sis, /* and set_in_sync */
+	conflict_discarded_by_peer,
+	neg_acked,
+	barrier_acked, /* in protocol A and B */
+	data_received, /* (remote read) */
+
+	read_completed_with_error,
+	read_ahead_completed_with_error,
+	write_completed_with_error,
+	completed_ok,
+	resend,
+	fail_frozen_disk_io,
+	restart_frozen_disk_io,
+	nothing, /* for tracing only */
+};
+
+/* encoding of request states for now.  we don't actually need that many bits.
+ * we don't need to do atomic bit operations either, since most of the time we
+ * need to look at the connection state and/or manipulate some lists at the
+ * same time, so we should hold the request lock anyways.
+ */
+enum drbd_req_state_bits {
+	/* 210
+	 * 000: no local possible
+	 * 001: to be submitted
+	 *    UNUSED, we could map: 011: submitted, completion still pending
+	 * 110: completed ok
+	 * 010: completed with error
+	 */
+	__RQ_LOCAL_PENDING,
+	__RQ_LOCAL_COMPLETED,
+	__RQ_LOCAL_OK,
+
+	/* 76543
+	 * 00000: no network possible
+	 * 00001: to be send
+	 * 00011: to be send, on worker queue
+	 * 00101: sent, expecting recv_ack (B) or write_ack (C)
+	 * 11101: sent,
+	 *        recv_ack (B) or implicit "ack" (A),
+	 *        still waiting for the barrier ack.
+	 *        master_bio may already be completed and invalidated.
+	 * 11100: write_acked (C),
+	 *        data_received (for remote read, any protocol)
+	 *        or finally the barrier ack has arrived (B,A)...
+	 *        request can be freed
+	 * 01100: neg-acked (write, protocol C)
+	 *        or neg-d-acked (read, any protocol)
+	 *        or killed from the transfer log
+	 *        during cleanup after connection loss
+	 *        request can be freed
+	 * 01000: canceled or send failed...
+	 *        request can be freed
+	 */
+
+	/* if "SENT" is not set, yet, this can still fail or be canceled.
+	 * if "SENT" is set already, we still wait for an Ack packet.
+	 * when cleared, the master_bio may be completed.
+	 * in (B,A) the request object may still linger on the transaction log
+	 * until the corresponding barrier ack comes in */
+	__RQ_NET_PENDING,
+
+	/* If it is QUEUED, and it is a WRITE, it is also registered in the
+	 * transfer log. Currently we need this flag to avoid conflicts between
+	 * worker canceling the request and tl_clear_barrier killing it from
+	 * transfer log.  We should restructure the code so this conflict does
+	 * no longer occur. */
+	__RQ_NET_QUEUED,
+
+	/* well, actually only "handed over to the network stack".
+	 *
+	 * TODO can potentially be dropped because of the similar meaning
+	 * of RQ_NET_SENT and ~RQ_NET_QUEUED.
+	 * however it is not exactly the same. before we drop it
+	 * we must ensure that we can tell a request with network part
+	 * from a request without, regardless of what happens to it. */
+	__RQ_NET_SENT,
+
+	/* when set, the request may be freed (if RQ_NET_QUEUED is clear).
+	 * basically this means the corresponding P_BARRIER_ACK was received */
+	__RQ_NET_DONE,
+
+	/* whether or not we know (C) or pretend (B,A) that the write
+	 * was successfully written on the peer.
+	 */
+	__RQ_NET_OK,
+
+	/* peer called drbd_set_in_sync() for this write */
+	__RQ_NET_SIS,
+
+	/* keep this last, its for the RQ_NET_MASK */
+	__RQ_NET_MAX,
+
+	/* Set when this is a write, clear for a read */
+	__RQ_WRITE,
+
+	/* Should call drbd_al_complete_io() for this request... */
+	__RQ_IN_ACT_LOG,
+};
+
+#define RQ_LOCAL_PENDING   (1UL << __RQ_LOCAL_PENDING)
+#define RQ_LOCAL_COMPLETED (1UL << __RQ_LOCAL_COMPLETED)
+#define RQ_LOCAL_OK        (1UL << __RQ_LOCAL_OK)
+
+#define RQ_LOCAL_MASK      ((RQ_LOCAL_OK << 1)-1) /* 0x07 */
+
+#define RQ_NET_PENDING     (1UL << __RQ_NET_PENDING)
+#define RQ_NET_QUEUED      (1UL << __RQ_NET_QUEUED)
+#define RQ_NET_SENT        (1UL << __RQ_NET_SENT)
+#define RQ_NET_DONE        (1UL << __RQ_NET_DONE)
+#define RQ_NET_OK          (1UL << __RQ_NET_OK)
+#define RQ_NET_SIS         (1UL << __RQ_NET_SIS)
+
+/* 0x1f8 */
+#define RQ_NET_MASK        (((1UL << __RQ_NET_MAX)-1) & ~RQ_LOCAL_MASK)
+
+#define RQ_WRITE           (1UL << __RQ_WRITE)
+#define RQ_IN_ACT_LOG      (1UL << __RQ_IN_ACT_LOG)
+
+/* For waking up the frozen transfer log mod_req() has to return if the request
+   should be counted in the epoch object*/
+#define MR_WRITE_SHIFT 0
+#define MR_WRITE       (1 << MR_WRITE_SHIFT)
+#define MR_READ_SHIFT  1
+#define MR_READ        (1 << MR_READ_SHIFT)
+
+/* epoch entries */
+static inline
+struct hlist_head *ee_hash_slot(struct drbd_conf *mdev, sector_t sector)
+{
+	BUG_ON(mdev->ee_hash_s == 0);
+	return mdev->ee_hash +
+		((unsigned int)(sector>>HT_SHIFT) % mdev->ee_hash_s);
+}
+
+/* transfer log (drbd_request objects) */
+static inline
+struct hlist_head *tl_hash_slot(struct drbd_conf *mdev, sector_t sector)
+{
+	BUG_ON(mdev->tl_hash_s == 0);
+	return mdev->tl_hash +
+		((unsigned int)(sector>>HT_SHIFT) % mdev->tl_hash_s);
+}
+
+/* application reads (drbd_request objects) */
+static struct hlist_head *ar_hash_slot(struct drbd_conf *mdev, sector_t sector)
+{
+	return mdev->app_reads_hash
+		+ ((unsigned int)(sector) % APP_R_HSIZE);
+}
+
+/* when we receive the answer for a read request,
+ * verify that we actually know about it */
+static inline struct drbd_request *_ar_id_to_req(struct drbd_conf *mdev,
+	u64 id, sector_t sector)
+{
+	struct hlist_head *slot = ar_hash_slot(mdev, sector);
+	struct hlist_node *n;
+	struct drbd_request *req;
+
+	hlist_for_each_entry(req, n, slot, collision) {
+		if ((unsigned long)req == (unsigned long)id) {
+			D_ASSERT(req->sector == sector);
+			return req;
+		}
+	}
+	return NULL;
+}
+
+static inline void drbd_req_make_private_bio(struct drbd_request *req, struct bio *bio_src)
+{
+	struct bio *bio;
+	bio = bio_clone(bio_src, GFP_NOIO); /* XXX cannot fail?? */
+
+	req->private_bio = bio;
+
+	bio->bi_private  = req;
+	bio->bi_end_io   = drbd_endio_pri;
+	bio->bi_next     = NULL;
+}
+
+static inline struct drbd_request *drbd_req_new(struct drbd_conf *mdev,
+	struct bio *bio_src)
+{
+	struct drbd_request *req =
+		mempool_alloc(drbd_request_mempool, GFP_NOIO);
+	if (likely(req)) {
+		drbd_req_make_private_bio(req, bio_src);
+
+		req->rq_state    = bio_data_dir(bio_src) == WRITE ? RQ_WRITE : 0;
+		req->mdev        = mdev;
+		req->master_bio  = bio_src;
+		req->epoch       = 0;
+		req->sector      = bio_src->bi_sector;
+		req->size        = bio_src->bi_size;
+		INIT_HLIST_NODE(&req->collision);
+		INIT_LIST_HEAD(&req->tl_requests);
+		INIT_LIST_HEAD(&req->w.list);
+	}
+	return req;
+}
+
+static inline void drbd_req_free(struct drbd_request *req)
+{
+	mempool_free(req, drbd_request_mempool);
+}
+
+static inline int overlaps(sector_t s1, int l1, sector_t s2, int l2)
+{
+	return !((s1 + (l1>>9) <= s2) || (s1 >= s2 + (l2>>9)));
+}
+
+/* Short lived temporary struct on the stack.
+ * We could squirrel the error to be returned into
+ * bio->bi_size, or similar. But that would be too ugly. */
+struct bio_and_error {
+	struct bio *bio;
+	int error;
+};
+
+extern void _req_may_be_done(struct drbd_request *req,
+		struct bio_and_error *m);
+extern int __req_mod(struct drbd_request *req, enum drbd_req_event what,
+		struct bio_and_error *m);
+extern void complete_master_bio(struct drbd_conf *mdev,
+		struct bio_and_error *m);
+extern void request_timer_fn(unsigned long data);
+extern void tl_restart(struct drbd_conf *mdev, enum drbd_req_event what);
+
+/* use this if you don't want to deal with calling complete_master_bio()
+ * outside the spinlock, e.g. when walking some list on cleanup. */
+static inline int _req_mod(struct drbd_request *req, enum drbd_req_event what)
+{
+	struct drbd_conf *mdev = req->mdev;
+	struct bio_and_error m;
+	int rv;
+
+	/* __req_mod possibly frees req, do not touch req after that! */
+	rv = __req_mod(req, what, &m);
+	if (m.bio)
+		complete_master_bio(mdev, &m);
+
+	return rv;
+}
+
+/* completion of master bio is outside of our spinlock.
+ * We still may or may not be inside some irqs disabled section
+ * of the lower level driver completion callback, so we need to
+ * spin_lock_irqsave here. */
+static inline int req_mod(struct drbd_request *req,
+		enum drbd_req_event what)
+{
+	unsigned long flags;
+	struct drbd_conf *mdev = req->mdev;
+	struct bio_and_error m;
+	int rv;
+
+	spin_lock_irqsave(&mdev->req_lock, flags);
+	rv = __req_mod(req, what, &m);
+	spin_unlock_irqrestore(&mdev->req_lock, flags);
+
+	if (m.bio)
+		complete_master_bio(mdev, &m);
+
+	return rv;
+}
+
+static inline bool drbd_should_do_remote(union drbd_state s)
+{
+	return s.pdsk == D_UP_TO_DATE ||
+		(s.pdsk >= D_INCONSISTENT &&
+		 s.conn >= C_WF_BITMAP_T &&
+		 s.conn < C_AHEAD);
+	/* Before proto 96 that was >= CONNECTED instead of >= C_WF_BITMAP_T.
+	   That is equivalent since before 96 IO was frozen in the C_WF_BITMAP*
+	   states. */
+}
+static inline bool drbd_should_send_oos(union drbd_state s)
+{
+	return s.conn == C_AHEAD || s.conn == C_WF_BITMAP_S;
+	/* pdsk = D_INCONSISTENT as a consequence. Protocol 96 check not necessary
+	   since we enter state C_AHEAD only if proto >= 96 */
+}
+
+#endif
diff --git a/drivers/block/drbd/drbd_strings.c b/drivers/block/drbd/drbd_strings.c
new file mode 100644
index 00000000..c44a2a60
--- /dev/null
+++ b/drivers/block/drbd/drbd_strings.c
@@ -0,0 +1,115 @@
+/*
+  drbd.h
+
+  This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
+
+  Copyright (C) 2003-2008, LINBIT Information Technologies GmbH.
+  Copyright (C) 2003-2008, Philipp Reisner <philipp.reisner@linbit.com>.
+  Copyright (C) 2003-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
+
+  drbd is free software; you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation; either version 2, or (at your option)
+  any later version.
+
+  drbd is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with drbd; see the file COPYING.  If not, write to
+  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+
+*/
+
+#include <linux/drbd.h>
+
+static const char *drbd_conn_s_names[] = {
+	[C_STANDALONE]       = "StandAlone",
+	[C_DISCONNECTING]    = "Disconnecting",
+	[C_UNCONNECTED]      = "Unconnected",
+	[C_TIMEOUT]          = "Timeout",
+	[C_BROKEN_PIPE]      = "BrokenPipe",
+	[C_NETWORK_FAILURE]  = "NetworkFailure",
+	[C_PROTOCOL_ERROR]   = "ProtocolError",
+	[C_WF_CONNECTION]    = "WFConnection",
+	[C_WF_REPORT_PARAMS] = "WFReportParams",
+	[C_TEAR_DOWN]        = "TearDown",
+	[C_CONNECTED]        = "Connected",
+	[C_STARTING_SYNC_S]  = "StartingSyncS",
+	[C_STARTING_SYNC_T]  = "StartingSyncT",
+	[C_WF_BITMAP_S]      = "WFBitMapS",
+	[C_WF_BITMAP_T]      = "WFBitMapT",
+	[C_WF_SYNC_UUID]     = "WFSyncUUID",
+	[C_SYNC_SOURCE]      = "SyncSource",
+	[C_SYNC_TARGET]      = "SyncTarget",
+	[C_PAUSED_SYNC_S]    = "PausedSyncS",
+	[C_PAUSED_SYNC_T]    = "PausedSyncT",
+	[C_VERIFY_S]         = "VerifyS",
+	[C_VERIFY_T]         = "VerifyT",
+	[C_AHEAD]            = "Ahead",
+	[C_BEHIND]           = "Behind",
+};
+
+static const char *drbd_role_s_names[] = {
+	[R_PRIMARY]   = "Primary",
+	[R_SECONDARY] = "Secondary",
+	[R_UNKNOWN]   = "Unknown"
+};
+
+static const char *drbd_disk_s_names[] = {
+	[D_DISKLESS]     = "Diskless",
+	[D_ATTACHING]    = "Attaching",
+	[D_FAILED]       = "Failed",
+	[D_NEGOTIATING]  = "Negotiating",
+	[D_INCONSISTENT] = "Inconsistent",
+	[D_OUTDATED]     = "Outdated",
+	[D_UNKNOWN]      = "DUnknown",
+	[D_CONSISTENT]   = "Consistent",
+	[D_UP_TO_DATE]   = "UpToDate",
+};
+
+static const char *drbd_state_sw_errors[] = {
+	[-SS_TWO_PRIMARIES] = "Multiple primaries not allowed by config",
+	[-SS_NO_UP_TO_DATE_DISK] = "Need access to UpToDate data",
+	[-SS_NO_LOCAL_DISK] = "Can not resync without local disk",
+	[-SS_NO_REMOTE_DISK] = "Can not resync without remote disk",
+	[-SS_CONNECTED_OUTDATES] = "Refusing to be Outdated while Connected",
+	[-SS_PRIMARY_NOP] = "Refusing to be Primary while peer is not outdated",
+	[-SS_RESYNC_RUNNING] = "Can not start OV/resync since it is already active",
+	[-SS_ALREADY_STANDALONE] = "Can not disconnect a StandAlone device",
+	[-SS_CW_FAILED_BY_PEER] = "State change was refused by peer node",
+	[-SS_IS_DISKLESS] = "Device is diskless, the requested operation requires a disk",
+	[-SS_DEVICE_IN_USE] = "Device is held open by someone",
+	[-SS_NO_NET_CONFIG] = "Have no net/connection configuration",
+	[-SS_NO_VERIFY_ALG] = "Need a verify algorithm to start online verify",
+	[-SS_NEED_CONNECTION] = "Need a connection to start verify or resync",
+	[-SS_NOT_SUPPORTED] = "Peer does not support protocol",
+	[-SS_LOWER_THAN_OUTDATED] = "Disk state is lower than outdated",
+	[-SS_IN_TRANSIENT_STATE] = "In transient state, retry after next state change",
+	[-SS_CONCURRENT_ST_CHG] = "Concurrent state changes detected and aborted",
+};
+
+const char *drbd_conn_str(enum drbd_conns s)
+{
+	/* enums are unsigned... */
+	return s > C_BEHIND ? "TOO_LARGE" : drbd_conn_s_names[s];
+}
+
+const char *drbd_role_str(enum drbd_role s)
+{
+	return s > R_SECONDARY   ? "TOO_LARGE" : drbd_role_s_names[s];
+}
+
+const char *drbd_disk_str(enum drbd_disk_state s)
+{
+	return s > D_UP_TO_DATE    ? "TOO_LARGE" : drbd_disk_s_names[s];
+}
+
+const char *drbd_set_st_err_str(enum drbd_state_rv err)
+{
+	return err <= SS_AFTER_LAST_ERROR ? "TOO_SMALL" :
+	       err > SS_TWO_PRIMARIES ? "TOO_LARGE"
+			: drbd_state_sw_errors[-err];
+}
diff --git a/drivers/block/drbd/drbd_vli.h b/drivers/block/drbd/drbd_vli.h
new file mode 100644
index 00000000..8cb1532a
--- /dev/null
+++ b/drivers/block/drbd/drbd_vli.h
@@ -0,0 +1,351 @@
+/*
+-*- linux-c -*-
+   drbd_receiver.c
+   This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
+
+   Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
+   Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
+   Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
+
+   drbd is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2, or (at your option)
+   any later version.
+
+   drbd is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with drbd; see the file COPYING.  If not, write to
+   the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef _DRBD_VLI_H
+#define _DRBD_VLI_H
+
+/*
+ * At a granularity of 4KiB storage represented per bit,
+ * and stroage sizes of several TiB,
+ * and possibly small-bandwidth replication,
+ * the bitmap transfer time can take much too long,
+ * if transmitted in plain text.
+ *
+ * We try to reduce the transferred bitmap information
+ * by encoding runlengths of bit polarity.
+ *
+ * We never actually need to encode a "zero" (runlengths are positive).
+ * But then we have to store the value of the first bit.
+ * The first bit of information thus shall encode if the first runlength
+ * gives the number of set or unset bits.
+ *
+ * We assume that large areas are either completely set or unset,
+ * which gives good compression with any runlength method,
+ * even when encoding the runlength as fixed size 32bit/64bit integers.
+ *
+ * Still, there may be areas where the polarity flips every few bits,
+ * and encoding the runlength sequence of those areas with fix size
+ * integers would be much worse than plaintext.
+ *
+ * We want to encode small runlength values with minimum code length,
+ * while still being able to encode a Huge run of all zeros.
+ *
+ * Thus we need a Variable Length Integer encoding, VLI.
+ *
+ * For some cases, we produce more code bits than plaintext input.
+ * We need to send incompressible chunks as plaintext, skip over them
+ * and then see if the next chunk compresses better.
+ *
+ * We don't care too much about "excellent" compression ratio for large
+ * runlengths (all set/all clear): whether we achieve a factor of 100
+ * or 1000 is not that much of an issue.
+ * We do not want to waste too much on short runlengths in the "noisy"
+ * parts of the bitmap, though.
+ *
+ * There are endless variants of VLI, we experimented with:
+ *  * simple byte-based
+ *  * various bit based with different code word length.
+ *
+ * To avoid yet an other configuration parameter (choice of bitmap compression
+ * algorithm) which was difficult to explain and tune, we just chose the one
+ * variant that turned out best in all test cases.
+ * Based on real world usage patterns, with device sizes ranging from a few GiB
+ * to several TiB, file server/mailserver/webserver/mysql/postgress,
+ * mostly idle to really busy, the all time winner (though sometimes only
+ * marginally better) is:
+ */
+
+/*
+ * encoding is "visualised" as
+ * __little endian__ bitstream, least significant bit first (left most)
+ *
+ * this particular encoding is chosen so that the prefix code
+ * starts as unary encoding the level, then modified so that
+ * 10 levels can be described in 8bit, with minimal overhead
+ * for the smaller levels.
+ *
+ * Number of data bits follow fibonacci sequence, with the exception of the
+ * last level (+1 data bit, so it makes 64bit total).  The only worse code when
+ * encoding bit polarity runlength is 1 plain bits => 2 code bits.
+prefix    data bits                                    max val  Nº data bits
+0 x                                                         0x2            1
+10 x                                                        0x4            1
+110 xx                                                      0x8            2
+1110 xxx                                                   0x10            3
+11110 xxx xx                                               0x30            5
+111110 xx xxxxxx                                          0x130            8
+11111100  xxxxxxxx xxxxx                                 0x2130           13
+11111110  xxxxxxxx xxxxxxxx xxxxx                      0x202130           21
+11111101  xxxxxxxx xxxxxxxx xxxxxxxx  xxxxxxxx xx   0x400202130           34
+11111111  xxxxxxxx xxxxxxxx xxxxxxxx  xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx 56
+ * maximum encodable value: 0x100000400202130 == 2**56 + some */
+
+/* compression "table":
+ transmitted   x                                0.29
+ as plaintext x                                  ........................
+             x                                   ........................
+            x                                    ........................
+           x    0.59                         0.21........................
+          x      ........................................................
+         x       .. c ...................................................
+        x    0.44.. o ...................................................
+       x .......... d ...................................................
+      x  .......... e ...................................................
+     X.............   ...................................................
+    x.............. b ...................................................
+2.0x............... i ...................................................
+ #X................ t ...................................................
+ #................. s ...........................  plain bits  ..........
+-+-----------------------------------------------------------------------
+ 1             16              32                              64
+*/
+
+/* LEVEL: (total bits, prefix bits, prefix value),
+ * sorted ascending by number of total bits.
+ * The rest of the code table is calculated at compiletime from this. */
+
+/* fibonacci data 1, 1, ... */
+#define VLI_L_1_1() do { \
+	LEVEL( 2, 1, 0x00); \
+	LEVEL( 3, 2, 0x01); \
+	LEVEL( 5, 3, 0x03); \
+	LEVEL( 7, 4, 0x07); \
+	LEVEL(10, 5, 0x0f); \
+	LEVEL(14, 6, 0x1f); \
+	LEVEL(21, 8, 0x3f); \
+	LEVEL(29, 8, 0x7f); \
+	LEVEL(42, 8, 0xbf); \
+	LEVEL(64, 8, 0xff); \
+	} while (0)
+
+/* finds a suitable level to decode the least significant part of in.
+ * returns number of bits consumed.
+ *
+ * BUG() for bad input, as that would mean a buggy code table. */
+static inline int vli_decode_bits(u64 *out, const u64 in)
+{
+	u64 adj = 1;
+
+#define LEVEL(t,b,v)					\
+	do {						\
+		if ((in & ((1 << b) -1)) == v) {	\
+			*out = ((in & ((~0ULL) >> (64-t))) >> b) + adj;	\
+			return t;			\
+		}					\
+		adj += 1ULL << (t - b);			\
+	} while (0)
+
+	VLI_L_1_1();
+
+	/* NOT REACHED, if VLI_LEVELS code table is defined properly */
+	BUG();
+#undef LEVEL
+}
+
+/* return number of code bits needed,
+ * or negative error number */
+static inline int __vli_encode_bits(u64 *out, const u64 in)
+{
+	u64 max = 0;
+	u64 adj = 1;
+
+	if (in == 0)
+		return -EINVAL;
+
+#define LEVEL(t,b,v) do {		\
+		max += 1ULL << (t - b);	\
+		if (in <= max) {	\
+			if (out)	\
+				*out = ((in - adj) << b) | v;	\
+			return t;	\
+		}			\
+		adj = max + 1;		\
+	} while (0)
+
+	VLI_L_1_1();
+
+	return -EOVERFLOW;
+#undef LEVEL
+}
+
+#undef VLI_L_1_1
+
+/* code from here down is independend of actually used bit code */
+
+/*
+ * Code length is determined by some unique (e.g. unary) prefix.
+ * This encodes arbitrary bit length, not whole bytes: we have a bit-stream,
+ * not a byte stream.
+ */
+
+/* for the bitstream, we need a cursor */
+struct bitstream_cursor {
+	/* the current byte */
+	u8 *b;
+	/* the current bit within *b, nomalized: 0..7 */
+	unsigned int bit;
+};
+
+/* initialize cursor to point to first bit of stream */
+static inline void bitstream_cursor_reset(struct bitstream_cursor *cur, void *s)
+{
+	cur->b = s;
+	cur->bit = 0;
+}
+
+/* advance cursor by that many bits; maximum expected input value: 64,
+ * but depending on VLI implementation, it may be more. */
+static inline void bitstream_cursor_advance(struct bitstream_cursor *cur, unsigned int bits)
+{
+	bits += cur->bit;
+	cur->b = cur->b + (bits >> 3);
+	cur->bit = bits & 7;
+}
+
+/* the bitstream itself knows its length */
+struct bitstream {
+	struct bitstream_cursor cur;
+	unsigned char *buf;
+	size_t buf_len;		/* in bytes */
+
+	/* for input stream:
+	 * number of trailing 0 bits for padding
+	 * total number of valid bits in stream: buf_len * 8 - pad_bits */
+	unsigned int pad_bits;
+};
+
+static inline void bitstream_init(struct bitstream *bs, void *s, size_t len, unsigned int pad_bits)
+{
+	bs->buf = s;
+	bs->buf_len = len;
+	bs->pad_bits = pad_bits;
+	bitstream_cursor_reset(&bs->cur, bs->buf);
+}
+
+static inline void bitstream_rewind(struct bitstream *bs)
+{
+	bitstream_cursor_reset(&bs->cur, bs->buf);
+	memset(bs->buf, 0, bs->buf_len);
+}
+
+/* Put (at most 64) least significant bits of val into bitstream, and advance cursor.
+ * Ignores "pad_bits".
+ * Returns zero if bits == 0 (nothing to do).
+ * Returns number of bits used if successful.
+ *
+ * If there is not enough room left in bitstream,
+ * leaves bitstream unchanged and returns -ENOBUFS.
+ */
+static inline int bitstream_put_bits(struct bitstream *bs, u64 val, const unsigned int bits)
+{
+	unsigned char *b = bs->cur.b;
+	unsigned int tmp;
+
+	if (bits == 0)
+		return 0;
+
+	if ((bs->cur.b + ((bs->cur.bit + bits -1) >> 3)) - bs->buf >= bs->buf_len)
+		return -ENOBUFS;
+
+	/* paranoia: strip off hi bits; they should not be set anyways. */
+	if (bits < 64)
+		val &= ~0ULL >> (64 - bits);
+
+	*b++ |= (val & 0xff) << bs->cur.bit;
+
+	for (tmp = 8 - bs->cur.bit; tmp < bits; tmp += 8)
+		*b++ |= (val >> tmp) & 0xff;
+
+	bitstream_cursor_advance(&bs->cur, bits);
+	return bits;
+}
+
+/* Fetch (at most 64) bits from bitstream into *out, and advance cursor.
+ *
+ * If more than 64 bits are requested, returns -EINVAL and leave *out unchanged.
+ *
+ * If there are less than the requested number of valid bits left in the
+ * bitstream, still fetches all available bits.
+ *
+ * Returns number of actually fetched bits.
+ */
+static inline int bitstream_get_bits(struct bitstream *bs, u64 *out, int bits)
+{
+	u64 val;
+	unsigned int n;
+
+	if (bits > 64)
+		return -EINVAL;
+
+	if (bs->cur.b + ((bs->cur.bit + bs->pad_bits + bits -1) >> 3) - bs->buf >= bs->buf_len)
+		bits = ((bs->buf_len - (bs->cur.b - bs->buf)) << 3)
+			- bs->cur.bit - bs->pad_bits;
+
+	if (bits == 0) {
+		*out = 0;
+		return 0;
+	}
+
+	/* get the high bits */
+	val = 0;
+	n = (bs->cur.bit + bits + 7) >> 3;
+	/* n may be at most 9, if cur.bit + bits > 64 */
+	/* which means this copies at most 8 byte */
+	if (n) {
+		memcpy(&val, bs->cur.b+1, n - 1);
+		val = le64_to_cpu(val) << (8 - bs->cur.bit);
+	}
+
+	/* we still need the low bits */
+	val |= bs->cur.b[0] >> bs->cur.bit;
+
+	/* and mask out bits we don't want */
+	val &= ~0ULL >> (64 - bits);
+
+	bitstream_cursor_advance(&bs->cur, bits);
+	*out = val;
+
+	return bits;
+}
+
+/* encodes @in as vli into @bs;
+
+ * return values
+ *  > 0: number of bits successfully stored in bitstream
+ * -ENOBUFS @bs is full
+ * -EINVAL input zero (invalid)
+ * -EOVERFLOW input too large for this vli code (invalid)
+ */
+static inline int vli_encode_bits(struct bitstream *bs, u64 in)
+{
+	u64 code = code;
+	int bits = __vli_encode_bits(&code, in);
+
+	if (bits <= 0)
+		return bits;
+
+	return bitstream_put_bits(bs, code, bits);
+}
+
+#endif
diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c
new file mode 100644
index 00000000..4d3e6f62
--- /dev/null
+++ b/drivers/block/drbd/drbd_worker.c
@@ -0,0 +1,1723 @@
+/*
+   drbd_worker.c
+
+   This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
+
+   Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
+   Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
+   Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
+
+   drbd is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2, or (at your option)
+   any later version.
+
+   drbd is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with drbd; see the file COPYING.  If not, write to
+   the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ */
+
+#include <linux/module.h>
+#include <linux/drbd.h>
+#include <linux/sched.h>
+#include <linux/wait.h>
+#include <linux/mm.h>
+#include <linux/memcontrol.h>
+#include <linux/mm_inline.h>
+#include <linux/slab.h>
+#include <linux/random.h>
+#include <linux/string.h>
+#include <linux/scatterlist.h>
+
+#include "drbd_int.h"
+#include "drbd_req.h"
+
+static int w_make_ov_request(struct drbd_conf *mdev, struct drbd_work *w, int cancel);
+static int w_make_resync_request(struct drbd_conf *mdev,
+				 struct drbd_work *w, int cancel);
+
+
+
+/* endio handlers:
+ *   drbd_md_io_complete (defined here)
+ *   drbd_endio_pri (defined here)
+ *   drbd_endio_sec (defined here)
+ *   bm_async_io_complete (defined in drbd_bitmap.c)
+ *
+ * For all these callbacks, note the following:
+ * The callbacks will be called in irq context by the IDE drivers,
+ * and in Softirqs/Tasklets/BH context by the SCSI drivers.
+ * Try to get the locking right :)
+ *
+ */
+
+
+/* About the global_state_lock
+   Each state transition on an device holds a read lock. In case we have
+   to evaluate the sync after dependencies, we grab a write lock, because
+   we need stable states on all devices for that.  */
+rwlock_t global_state_lock;
+
+/* used for synchronous meta data and bitmap IO
+ * submitted by drbd_md_sync_page_io()
+ */
+void drbd_md_io_complete(struct bio *bio, int error)
+{
+	struct drbd_md_io *md_io;
+
+	md_io = (struct drbd_md_io *)bio->bi_private;
+	md_io->error = error;
+
+	complete(&md_io->event);
+}
+
+/* reads on behalf of the partner,
+ * "submitted" by the receiver
+ */
+void drbd_endio_read_sec_final(struct drbd_epoch_entry *e) __releases(local)
+{
+	unsigned long flags = 0;
+	struct drbd_conf *mdev = e->mdev;
+
+	D_ASSERT(e->block_id != ID_VACANT);
+
+	spin_lock_irqsave(&mdev->req_lock, flags);
+	mdev->read_cnt += e->size >> 9;
+	list_del(&e->w.list);
+	if (list_empty(&mdev->read_ee))
+		wake_up(&mdev->ee_wait);
+	if (test_bit(__EE_WAS_ERROR, &e->flags))
+		__drbd_chk_io_error(mdev, false);
+	spin_unlock_irqrestore(&mdev->req_lock, flags);
+
+	drbd_queue_work(&mdev->data.work, &e->w);
+	put_ldev(mdev);
+}
+
+/* writes on behalf of the partner, or resync writes,
+ * "submitted" by the receiver, final stage.  */
+static void drbd_endio_write_sec_final(struct drbd_epoch_entry *e) __releases(local)
+{
+	unsigned long flags = 0;
+	struct drbd_conf *mdev = e->mdev;
+	sector_t e_sector;
+	int do_wake;
+	int is_syncer_req;
+	int do_al_complete_io;
+
+	D_ASSERT(e->block_id != ID_VACANT);
+
+	/* after we moved e to done_ee,
+	 * we may no longer access it,
+	 * it may be freed/reused already!
+	 * (as soon as we release the req_lock) */
+	e_sector = e->sector;
+	do_al_complete_io = e->flags & EE_CALL_AL_COMPLETE_IO;
+	is_syncer_req = is_syncer_block_id(e->block_id);
+
+	spin_lock_irqsave(&mdev->req_lock, flags);
+	mdev->writ_cnt += e->size >> 9;
+	list_del(&e->w.list); /* has been on active_ee or sync_ee */
+	list_add_tail(&e->w.list, &mdev->done_ee);
+
+	/* No hlist_del_init(&e->collision) here, we did not send the Ack yet,
+	 * neither did we wake possibly waiting conflicting requests.
+	 * done from "drbd_process_done_ee" within the appropriate w.cb
+	 * (e_end_block/e_end_resync_block) or from _drbd_clear_done_ee */
+
+	do_wake = is_syncer_req
+		? list_empty(&mdev->sync_ee)
+		: list_empty(&mdev->active_ee);
+
+	if (test_bit(__EE_WAS_ERROR, &e->flags))
+		__drbd_chk_io_error(mdev, false);
+	spin_unlock_irqrestore(&mdev->req_lock, flags);
+
+	if (is_syncer_req)
+		drbd_rs_complete_io(mdev, e_sector);
+
+	if (do_wake)
+		wake_up(&mdev->ee_wait);
+
+	if (do_al_complete_io)
+		drbd_al_complete_io(mdev, e_sector);
+
+	wake_asender(mdev);
+	put_ldev(mdev);
+}
+
+/* writes on behalf of the partner, or resync writes,
+ * "submitted" by the receiver.
+ */
+void drbd_endio_sec(struct bio *bio, int error)
+{
+	struct drbd_epoch_entry *e = bio->bi_private;
+	struct drbd_conf *mdev = e->mdev;
+	int uptodate = bio_flagged(bio, BIO_UPTODATE);
+	int is_write = bio_data_dir(bio) == WRITE;
+
+	if (error && __ratelimit(&drbd_ratelimit_state))
+		dev_warn(DEV, "%s: error=%d s=%llus\n",
+				is_write ? "write" : "read", error,
+				(unsigned long long)e->sector);
+	if (!error && !uptodate) {
+		if (__ratelimit(&drbd_ratelimit_state))
+			dev_warn(DEV, "%s: setting error to -EIO s=%llus\n",
+					is_write ? "write" : "read",
+					(unsigned long long)e->sector);
+		/* strange behavior of some lower level drivers...
+		 * fail the request by clearing the uptodate flag,
+		 * but do not return any error?! */
+		error = -EIO;
+	}
+
+	if (error)
+		set_bit(__EE_WAS_ERROR, &e->flags);
+
+	bio_put(bio); /* no need for the bio anymore */
+	if (atomic_dec_and_test(&e->pending_bios)) {
+		if (is_write)
+			drbd_endio_write_sec_final(e);
+		else
+			drbd_endio_read_sec_final(e);
+	}
+}
+
+/* read, readA or write requests on R_PRIMARY coming from drbd_make_request
+ */
+void drbd_endio_pri(struct bio *bio, int error)
+{
+	unsigned long flags;
+	struct drbd_request *req = bio->bi_private;
+	struct drbd_conf *mdev = req->mdev;
+	struct bio_and_error m;
+	enum drbd_req_event what;
+	int uptodate = bio_flagged(bio, BIO_UPTODATE);
+
+	if (!error && !uptodate) {
+		dev_warn(DEV, "p %s: setting error to -EIO\n",
+			 bio_data_dir(bio) == WRITE ? "write" : "read");
+		/* strange behavior of some lower level drivers...
+		 * fail the request by clearing the uptodate flag,
+		 * but do not return any error?! */
+		error = -EIO;
+	}
+
+	/* to avoid recursion in __req_mod */
+	if (unlikely(error)) {
+		what = (bio_data_dir(bio) == WRITE)
+			? write_completed_with_error
+			: (bio_rw(bio) == READ)
+			  ? read_completed_with_error
+			  : read_ahead_completed_with_error;
+	} else
+		what = completed_ok;
+
+	bio_put(req->private_bio);
+	req->private_bio = ERR_PTR(error);
+
+	/* not req_mod(), we need irqsave here! */
+	spin_lock_irqsave(&mdev->req_lock, flags);
+	__req_mod(req, what, &m);
+	spin_unlock_irqrestore(&mdev->req_lock, flags);
+
+	if (m.bio)
+		complete_master_bio(mdev, &m);
+}
+
+int w_read_retry_remote(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
+{
+	struct drbd_request *req = container_of(w, struct drbd_request, w);
+
+	/* We should not detach for read io-error,
+	 * but try to WRITE the P_DATA_REPLY to the failed location,
+	 * to give the disk the chance to relocate that block */
+
+	spin_lock_irq(&mdev->req_lock);
+	if (cancel || mdev->state.pdsk != D_UP_TO_DATE) {
+		_req_mod(req, read_retry_remote_canceled);
+		spin_unlock_irq(&mdev->req_lock);
+		return 1;
+	}
+	spin_unlock_irq(&mdev->req_lock);
+
+	return w_send_read_req(mdev, w, 0);
+}
+
+void drbd_csum_ee(struct drbd_conf *mdev, struct crypto_hash *tfm, struct drbd_epoch_entry *e, void *digest)
+{
+	struct hash_desc desc;
+	struct scatterlist sg;
+	struct page *page = e->pages;
+	struct page *tmp;
+	unsigned len;
+
+	desc.tfm = tfm;
+	desc.flags = 0;
+
+	sg_init_table(&sg, 1);
+	crypto_hash_init(&desc);
+
+	while ((tmp = page_chain_next(page))) {
+		/* all but the last page will be fully used */
+		sg_set_page(&sg, page, PAGE_SIZE, 0);
+		crypto_hash_update(&desc, &sg, sg.length);
+		page = tmp;
+	}
+	/* and now the last, possibly only partially used page */
+	len = e->size & (PAGE_SIZE - 1);
+	sg_set_page(&sg, page, len ?: PAGE_SIZE, 0);
+	crypto_hash_update(&desc, &sg, sg.length);
+	crypto_hash_final(&desc, digest);
+}
+
+void drbd_csum_bio(struct drbd_conf *mdev, struct crypto_hash *tfm, struct bio *bio, void *digest)
+{
+	struct hash_desc desc;
+	struct scatterlist sg;
+	struct bio_vec *bvec;
+	int i;
+
+	desc.tfm = tfm;
+	desc.flags = 0;
+
+	sg_init_table(&sg, 1);
+	crypto_hash_init(&desc);
+
+	__bio_for_each_segment(bvec, bio, i, 0) {
+		sg_set_page(&sg, bvec->bv_page, bvec->bv_len, bvec->bv_offset);
+		crypto_hash_update(&desc, &sg, sg.length);
+	}
+	crypto_hash_final(&desc, digest);
+}
+
+/* TODO merge common code with w_e_end_ov_req */
+int w_e_send_csum(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
+{
+	struct drbd_epoch_entry *e = container_of(w, struct drbd_epoch_entry, w);
+	int digest_size;
+	void *digest;
+	int ok = 1;
+
+	D_ASSERT(e->block_id == DRBD_MAGIC + 0xbeef);
+
+	if (unlikely(cancel))
+		goto out;
+
+	if (likely((e->flags & EE_WAS_ERROR) != 0))
+		goto out;
+
+	digest_size = crypto_hash_digestsize(mdev->csums_tfm);
+	digest = kmalloc(digest_size, GFP_NOIO);
+	if (digest) {
+		sector_t sector = e->sector;
+		unsigned int size = e->size;
+		drbd_csum_ee(mdev, mdev->csums_tfm, e, digest);
+		/* Free e and pages before send.
+		 * In case we block on congestion, we could otherwise run into
+		 * some distributed deadlock, if the other side blocks on
+		 * congestion as well, because our receiver blocks in
+		 * drbd_pp_alloc due to pp_in_use > max_buffers. */
+		drbd_free_ee(mdev, e);
+		e = NULL;
+		inc_rs_pending(mdev);
+		ok = drbd_send_drequest_csum(mdev, sector, size,
+					     digest, digest_size,
+					     P_CSUM_RS_REQUEST);
+		kfree(digest);
+	} else {
+		dev_err(DEV, "kmalloc() of digest failed.\n");
+		ok = 0;
+	}
+
+out:
+	if (e)
+		drbd_free_ee(mdev, e);
+
+	if (unlikely(!ok))
+		dev_err(DEV, "drbd_send_drequest(..., csum) failed\n");
+	return ok;
+}
+
+#define GFP_TRY	(__GFP_HIGHMEM | __GFP_NOWARN)
+
+static int read_for_csum(struct drbd_conf *mdev, sector_t sector, int size)
+{
+	struct drbd_epoch_entry *e;
+
+	if (!get_ldev(mdev))
+		return -EIO;
+
+	if (drbd_rs_should_slow_down(mdev, sector))
+		goto defer;
+
+	/* GFP_TRY, because if there is no memory available right now, this may
+	 * be rescheduled for later. It is "only" background resync, after all. */
+	e = drbd_alloc_ee(mdev, DRBD_MAGIC+0xbeef, sector, size, GFP_TRY);
+	if (!e)
+		goto defer;
+
+	e->w.cb = w_e_send_csum;
+	spin_lock_irq(&mdev->req_lock);
+	list_add(&e->w.list, &mdev->read_ee);
+	spin_unlock_irq(&mdev->req_lock);
+
+	atomic_add(size >> 9, &mdev->rs_sect_ev);
+	if (drbd_submit_ee(mdev, e, READ, DRBD_FAULT_RS_RD) == 0)
+		return 0;
+
+	/* If it failed because of ENOMEM, retry should help.  If it failed
+	 * because bio_add_page failed (probably broken lower level driver),
+	 * retry may or may not help.
+	 * If it does not, you may need to force disconnect. */
+	spin_lock_irq(&mdev->req_lock);
+	list_del(&e->w.list);
+	spin_unlock_irq(&mdev->req_lock);
+
+	drbd_free_ee(mdev, e);
+defer:
+	put_ldev(mdev);
+	return -EAGAIN;
+}
+
+int w_resync_timer(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
+{
+	switch (mdev->state.conn) {
+	case C_VERIFY_S:
+		w_make_ov_request(mdev, w, cancel);
+		break;
+	case C_SYNC_TARGET:
+		w_make_resync_request(mdev, w, cancel);
+		break;
+	}
+
+	return 1;
+}
+
+void resync_timer_fn(unsigned long data)
+{
+	struct drbd_conf *mdev = (struct drbd_conf *) data;
+
+	if (list_empty(&mdev->resync_work.list))
+		drbd_queue_work(&mdev->data.work, &mdev->resync_work);
+}
+
+static void fifo_set(struct fifo_buffer *fb, int value)
+{
+	int i;
+
+	for (i = 0; i < fb->size; i++)
+		fb->values[i] = value;
+}
+
+static int fifo_push(struct fifo_buffer *fb, int value)
+{
+	int ov;
+
+	ov = fb->values[fb->head_index];
+	fb->values[fb->head_index++] = value;
+
+	if (fb->head_index >= fb->size)
+		fb->head_index = 0;
+
+	return ov;
+}
+
+static void fifo_add_val(struct fifo_buffer *fb, int value)
+{
+	int i;
+
+	for (i = 0; i < fb->size; i++)
+		fb->values[i] += value;
+}
+
+static int drbd_rs_controller(struct drbd_conf *mdev)
+{
+	unsigned int sect_in;  /* Number of sectors that came in since the last turn */
+	unsigned int want;     /* The number of sectors we want in the proxy */
+	int req_sect; /* Number of sectors to request in this turn */
+	int correction; /* Number of sectors more we need in the proxy*/
+	int cps; /* correction per invocation of drbd_rs_controller() */
+	int steps; /* Number of time steps to plan ahead */
+	int curr_corr;
+	int max_sect;
+
+	sect_in = atomic_xchg(&mdev->rs_sect_in, 0); /* Number of sectors that came in */
+	mdev->rs_in_flight -= sect_in;
+
+	spin_lock(&mdev->peer_seq_lock); /* get an atomic view on mdev->rs_plan_s */
+
+	steps = mdev->rs_plan_s.size; /* (mdev->sync_conf.c_plan_ahead * 10 * SLEEP_TIME) / HZ; */
+
+	if (mdev->rs_in_flight + sect_in == 0) { /* At start of resync */
+		want = ((mdev->sync_conf.rate * 2 * SLEEP_TIME) / HZ) * steps;
+	} else { /* normal path */
+		want = mdev->sync_conf.c_fill_target ? mdev->sync_conf.c_fill_target :
+			sect_in * mdev->sync_conf.c_delay_target * HZ / (SLEEP_TIME * 10);
+	}
+
+	correction = want - mdev->rs_in_flight - mdev->rs_planed;
+
+	/* Plan ahead */
+	cps = correction / steps;
+	fifo_add_val(&mdev->rs_plan_s, cps);
+	mdev->rs_planed += cps * steps;
+
+	/* What we do in this step */
+	curr_corr = fifo_push(&mdev->rs_plan_s, 0);
+	spin_unlock(&mdev->peer_seq_lock);
+	mdev->rs_planed -= curr_corr;
+
+	req_sect = sect_in + curr_corr;
+	if (req_sect < 0)
+		req_sect = 0;
+
+	max_sect = (mdev->sync_conf.c_max_rate * 2 * SLEEP_TIME) / HZ;
+	if (req_sect > max_sect)
+		req_sect = max_sect;
+
+	/*
+	dev_warn(DEV, "si=%u if=%d wa=%u co=%d st=%d cps=%d pl=%d cc=%d rs=%d\n",
+		 sect_in, mdev->rs_in_flight, want, correction,
+		 steps, cps, mdev->rs_planed, curr_corr, req_sect);
+	*/
+
+	return req_sect;
+}
+
+static int drbd_rs_number_requests(struct drbd_conf *mdev)
+{
+	int number;
+	if (mdev->rs_plan_s.size) { /* mdev->sync_conf.c_plan_ahead */
+		number = drbd_rs_controller(mdev) >> (BM_BLOCK_SHIFT - 9);
+		mdev->c_sync_rate = number * HZ * (BM_BLOCK_SIZE / 1024) / SLEEP_TIME;
+	} else {
+		mdev->c_sync_rate = mdev->sync_conf.rate;
+		number = SLEEP_TIME * mdev->c_sync_rate  / ((BM_BLOCK_SIZE / 1024) * HZ);
+	}
+
+	/* ignore the amount of pending requests, the resync controller should
+	 * throttle down to incoming reply rate soon enough anyways. */
+	return number;
+}
+
+static int w_make_resync_request(struct drbd_conf *mdev,
+				 struct drbd_work *w, int cancel)
+{
+	unsigned long bit;
+	sector_t sector;
+	const sector_t capacity = drbd_get_capacity(mdev->this_bdev);
+	int max_bio_size;
+	int number, rollback_i, size;
+	int align, queued, sndbuf;
+	int i = 0;
+
+	if (unlikely(cancel))
+		return 1;
+
+	if (mdev->rs_total == 0) {
+		/* empty resync? */
+		drbd_resync_finished(mdev);
+		return 1;
+	}
+
+	if (!get_ldev(mdev)) {
+		/* Since we only need to access mdev->rsync a
+		   get_ldev_if_state(mdev,D_FAILED) would be sufficient, but
+		   to continue resync with a broken disk makes no sense at
+		   all */
+		dev_err(DEV, "Disk broke down during resync!\n");
+		return 1;
+	}
+
+	max_bio_size = queue_max_hw_sectors(mdev->rq_queue) << 9;
+	number = drbd_rs_number_requests(mdev);
+	if (number == 0)
+		goto requeue;
+
+	for (i = 0; i < number; i++) {
+		/* Stop generating RS requests, when half of the send buffer is filled */
+		mutex_lock(&mdev->data.mutex);
+		if (mdev->data.socket) {
+			queued = mdev->data.socket->sk->sk_wmem_queued;
+			sndbuf = mdev->data.socket->sk->sk_sndbuf;
+		} else {
+			queued = 1;
+			sndbuf = 0;
+		}
+		mutex_unlock(&mdev->data.mutex);
+		if (queued > sndbuf / 2)
+			goto requeue;
+
+next_sector:
+		size = BM_BLOCK_SIZE;
+		bit  = drbd_bm_find_next(mdev, mdev->bm_resync_fo);
+
+		if (bit == DRBD_END_OF_BITMAP) {
+			mdev->bm_resync_fo = drbd_bm_bits(mdev);
+			put_ldev(mdev);
+			return 1;
+		}
+
+		sector = BM_BIT_TO_SECT(bit);
+
+		if (drbd_rs_should_slow_down(mdev, sector) ||
+		    drbd_try_rs_begin_io(mdev, sector)) {
+			mdev->bm_resync_fo = bit;
+			goto requeue;
+		}
+		mdev->bm_resync_fo = bit + 1;
+
+		if (unlikely(drbd_bm_test_bit(mdev, bit) == 0)) {
+			drbd_rs_complete_io(mdev, sector);
+			goto next_sector;
+		}
+
+#if DRBD_MAX_BIO_SIZE > BM_BLOCK_SIZE
+		/* try to find some adjacent bits.
+		 * we stop if we have already the maximum req size.
+		 *
+		 * Additionally always align bigger requests, in order to
+		 * be prepared for all stripe sizes of software RAIDs.
+		 */
+		align = 1;
+		rollback_i = i;
+		for (;;) {
+			if (size + BM_BLOCK_SIZE > max_bio_size)
+				break;
+
+			/* Be always aligned */
+			if (sector & ((1<<(align+3))-1))
+				break;
+
+			/* do not cross extent boundaries */
+			if (((bit+1) & BM_BLOCKS_PER_BM_EXT_MASK) == 0)
+				break;
+			/* now, is it actually dirty, after all?
+			 * caution, drbd_bm_test_bit is tri-state for some
+			 * obscure reason; ( b == 0 ) would get the out-of-band
+			 * only accidentally right because of the "oddly sized"
+			 * adjustment below */
+			if (drbd_bm_test_bit(mdev, bit+1) != 1)
+				break;
+			bit++;
+			size += BM_BLOCK_SIZE;
+			if ((BM_BLOCK_SIZE << align) <= size)
+				align++;
+			i++;
+		}
+		/* if we merged some,
+		 * reset the offset to start the next drbd_bm_find_next from */
+		if (size > BM_BLOCK_SIZE)
+			mdev->bm_resync_fo = bit + 1;
+#endif
+
+		/* adjust very last sectors, in case we are oddly sized */
+		if (sector + (size>>9) > capacity)
+			size = (capacity-sector)<<9;
+		if (mdev->agreed_pro_version >= 89 && mdev->csums_tfm) {
+			switch (read_for_csum(mdev, sector, size)) {
+			case -EIO: /* Disk failure */
+				put_ldev(mdev);
+				return 0;
+			case -EAGAIN: /* allocation failed, or ldev busy */
+				drbd_rs_complete_io(mdev, sector);
+				mdev->bm_resync_fo = BM_SECT_TO_BIT(sector);
+				i = rollback_i;
+				goto requeue;
+			case 0:
+				/* everything ok */
+				break;
+			default:
+				BUG();
+			}
+		} else {
+			inc_rs_pending(mdev);
+			if (!drbd_send_drequest(mdev, P_RS_DATA_REQUEST,
+					       sector, size, ID_SYNCER)) {
+				dev_err(DEV, "drbd_send_drequest() failed, aborting...\n");
+				dec_rs_pending(mdev);
+				put_ldev(mdev);
+				return 0;
+			}
+		}
+	}
+
+	if (mdev->bm_resync_fo >= drbd_bm_bits(mdev)) {
+		/* last syncer _request_ was sent,
+		 * but the P_RS_DATA_REPLY not yet received.  sync will end (and
+		 * next sync group will resume), as soon as we receive the last
+		 * resync data block, and the last bit is cleared.
+		 * until then resync "work" is "inactive" ...
+		 */
+		put_ldev(mdev);
+		return 1;
+	}
+
+ requeue:
+	mdev->rs_in_flight += (i << (BM_BLOCK_SHIFT - 9));
+	mod_timer(&mdev->resync_timer, jiffies + SLEEP_TIME);
+	put_ldev(mdev);
+	return 1;
+}
+
+static int w_make_ov_request(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
+{
+	int number, i, size;
+	sector_t sector;
+	const sector_t capacity = drbd_get_capacity(mdev->this_bdev);
+
+	if (unlikely(cancel))
+		return 1;
+
+	number = drbd_rs_number_requests(mdev);
+
+	sector = mdev->ov_position;
+	for (i = 0; i < number; i++) {
+		if (sector >= capacity) {
+			return 1;
+		}
+
+		size = BM_BLOCK_SIZE;
+
+		if (drbd_rs_should_slow_down(mdev, sector) ||
+		    drbd_try_rs_begin_io(mdev, sector)) {
+			mdev->ov_position = sector;
+			goto requeue;
+		}
+
+		if (sector + (size>>9) > capacity)
+			size = (capacity-sector)<<9;
+
+		inc_rs_pending(mdev);
+		if (!drbd_send_ov_request(mdev, sector, size)) {
+			dec_rs_pending(mdev);
+			return 0;
+		}
+		sector += BM_SECT_PER_BIT;
+	}
+	mdev->ov_position = sector;
+
+ requeue:
+	mdev->rs_in_flight += (i << (BM_BLOCK_SHIFT - 9));
+	mod_timer(&mdev->resync_timer, jiffies + SLEEP_TIME);
+	return 1;
+}
+
+
+void start_resync_timer_fn(unsigned long data)
+{
+	struct drbd_conf *mdev = (struct drbd_conf *) data;
+
+	drbd_queue_work(&mdev->data.work, &mdev->start_resync_work);
+}
+
+int w_start_resync(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
+{
+	if (atomic_read(&mdev->unacked_cnt) || atomic_read(&mdev->rs_pending_cnt)) {
+		dev_warn(DEV, "w_start_resync later...\n");
+		mdev->start_resync_timer.expires = jiffies + HZ/10;
+		add_timer(&mdev->start_resync_timer);
+		return 1;
+	}
+
+	drbd_start_resync(mdev, C_SYNC_SOURCE);
+	clear_bit(AHEAD_TO_SYNC_SOURCE, &mdev->current_epoch->flags);
+	return 1;
+}
+
+int w_ov_finished(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
+{
+	kfree(w);
+	ov_oos_print(mdev);
+	drbd_resync_finished(mdev);
+
+	return 1;
+}
+
+static int w_resync_finished(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
+{
+	kfree(w);
+
+	drbd_resync_finished(mdev);
+
+	return 1;
+}
+
+static void ping_peer(struct drbd_conf *mdev)
+{
+	clear_bit(GOT_PING_ACK, &mdev->flags);
+	request_ping(mdev);
+	wait_event(mdev->misc_wait,
+		   test_bit(GOT_PING_ACK, &mdev->flags) || mdev->state.conn < C_CONNECTED);
+}
+
+int drbd_resync_finished(struct drbd_conf *mdev)
+{
+	unsigned long db, dt, dbdt;
+	unsigned long n_oos;
+	union drbd_state os, ns;
+	struct drbd_work *w;
+	char *khelper_cmd = NULL;
+	int verify_done = 0;
+
+	/* Remove all elements from the resync LRU. Since future actions
+	 * might set bits in the (main) bitmap, then the entries in the
+	 * resync LRU would be wrong. */
+	if (drbd_rs_del_all(mdev)) {
+		/* In case this is not possible now, most probably because
+		 * there are P_RS_DATA_REPLY Packets lingering on the worker's
+		 * queue (or even the read operations for those packets
+		 * is not finished by now).   Retry in 100ms. */
+
+		schedule_timeout_interruptible(HZ / 10);
+		w = kmalloc(sizeof(struct drbd_work), GFP_ATOMIC);
+		if (w) {
+			w->cb = w_resync_finished;
+			drbd_queue_work(&mdev->data.work, w);
+			return 1;
+		}
+		dev_err(DEV, "Warn failed to drbd_rs_del_all() and to kmalloc(w).\n");
+	}
+
+	dt = (jiffies - mdev->rs_start - mdev->rs_paused) / HZ;
+	if (dt <= 0)
+		dt = 1;
+	db = mdev->rs_total;
+	dbdt = Bit2KB(db/dt);
+	mdev->rs_paused /= HZ;
+
+	if (!get_ldev(mdev))
+		goto out;
+
+	ping_peer(mdev);
+
+	spin_lock_irq(&mdev->req_lock);
+	os = mdev->state;
+
+	verify_done = (os.conn == C_VERIFY_S || os.conn == C_VERIFY_T);
+
+	/* This protects us against multiple calls (that can happen in the presence
+	   of application IO), and against connectivity loss just before we arrive here. */
+	if (os.conn <= C_CONNECTED)
+		goto out_unlock;
+
+	ns = os;
+	ns.conn = C_CONNECTED;
+
+	dev_info(DEV, "%s done (total %lu sec; paused %lu sec; %lu K/sec)\n",
+	     verify_done ? "Online verify " : "Resync",
+	     dt + mdev->rs_paused, mdev->rs_paused, dbdt);
+
+	n_oos = drbd_bm_total_weight(mdev);
+
+	if (os.conn == C_VERIFY_S || os.conn == C_VERIFY_T) {
+		if (n_oos) {
+			dev_alert(DEV, "Online verify found %lu %dk block out of sync!\n",
+			      n_oos, Bit2KB(1));
+			khelper_cmd = "out-of-sync";
+		}
+	} else {
+		D_ASSERT((n_oos - mdev->rs_failed) == 0);
+
+		if (os.conn == C_SYNC_TARGET || os.conn == C_PAUSED_SYNC_T)
+			khelper_cmd = "after-resync-target";
+
+		if (mdev->csums_tfm && mdev->rs_total) {
+			const unsigned long s = mdev->rs_same_csum;
+			const unsigned long t = mdev->rs_total;
+			const int ratio =
+				(t == 0)     ? 0 :
+			(t < 100000) ? ((s*100)/t) : (s/(t/100));
+			dev_info(DEV, "%u %% had equal checksums, eliminated: %luK; "
+			     "transferred %luK total %luK\n",
+			     ratio,
+			     Bit2KB(mdev->rs_same_csum),
+			     Bit2KB(mdev->rs_total - mdev->rs_same_csum),
+			     Bit2KB(mdev->rs_total));
+		}
+	}
+
+	if (mdev->rs_failed) {
+		dev_info(DEV, "            %lu failed blocks\n", mdev->rs_failed);
+
+		if (os.conn == C_SYNC_TARGET || os.conn == C_PAUSED_SYNC_T) {
+			ns.disk = D_INCONSISTENT;
+			ns.pdsk = D_UP_TO_DATE;
+		} else {
+			ns.disk = D_UP_TO_DATE;
+			ns.pdsk = D_INCONSISTENT;
+		}
+	} else {
+		ns.disk = D_UP_TO_DATE;
+		ns.pdsk = D_UP_TO_DATE;
+
+		if (os.conn == C_SYNC_TARGET || os.conn == C_PAUSED_SYNC_T) {
+			if (mdev->p_uuid) {
+				int i;
+				for (i = UI_BITMAP ; i <= UI_HISTORY_END ; i++)
+					_drbd_uuid_set(mdev, i, mdev->p_uuid[i]);
+				drbd_uuid_set(mdev, UI_BITMAP, mdev->ldev->md.uuid[UI_CURRENT]);
+				_drbd_uuid_set(mdev, UI_CURRENT, mdev->p_uuid[UI_CURRENT]);
+			} else {
+				dev_err(DEV, "mdev->p_uuid is NULL! BUG\n");
+			}
+		}
+
+		if (!(os.conn == C_VERIFY_S || os.conn == C_VERIFY_T)) {
+			/* for verify runs, we don't update uuids here,
+			 * so there would be nothing to report. */
+			drbd_uuid_set_bm(mdev, 0UL);
+			drbd_print_uuids(mdev, "updated UUIDs");
+			if (mdev->p_uuid) {
+				/* Now the two UUID sets are equal, update what we
+				 * know of the peer. */
+				int i;
+				for (i = UI_CURRENT ; i <= UI_HISTORY_END ; i++)
+					mdev->p_uuid[i] = mdev->ldev->md.uuid[i];
+			}
+		}
+	}
+
+	_drbd_set_state(mdev, ns, CS_VERBOSE, NULL);
+out_unlock:
+	spin_unlock_irq(&mdev->req_lock);
+	put_ldev(mdev);
+out:
+	mdev->rs_total  = 0;
+	mdev->rs_failed = 0;
+	mdev->rs_paused = 0;
+	if (verify_done)
+		mdev->ov_start_sector = 0;
+
+	drbd_md_sync(mdev);
+
+	if (khelper_cmd)
+		drbd_khelper(mdev, khelper_cmd);
+
+	return 1;
+}
+
+/* helper */
+static void move_to_net_ee_or_free(struct drbd_conf *mdev, struct drbd_epoch_entry *e)
+{
+	if (drbd_ee_has_active_page(e)) {
+		/* This might happen if sendpage() has not finished */
+		int i = (e->size + PAGE_SIZE -1) >> PAGE_SHIFT;
+		atomic_add(i, &mdev->pp_in_use_by_net);
+		atomic_sub(i, &mdev->pp_in_use);
+		spin_lock_irq(&mdev->req_lock);
+		list_add_tail(&e->w.list, &mdev->net_ee);
+		spin_unlock_irq(&mdev->req_lock);
+		wake_up(&drbd_pp_wait);
+	} else
+		drbd_free_ee(mdev, e);
+}
+
+/**
+ * w_e_end_data_req() - Worker callback, to send a P_DATA_REPLY packet in response to a P_DATA_REQUEST
+ * @mdev:	DRBD device.
+ * @w:		work object.
+ * @cancel:	The connection will be closed anyways
+ */
+int w_e_end_data_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
+{
+	struct drbd_epoch_entry *e = container_of(w, struct drbd_epoch_entry, w);
+	int ok;
+
+	if (unlikely(cancel)) {
+		drbd_free_ee(mdev, e);
+		dec_unacked(mdev);
+		return 1;
+	}
+
+	if (likely((e->flags & EE_WAS_ERROR) == 0)) {
+		ok = drbd_send_block(mdev, P_DATA_REPLY, e);
+	} else {
+		if (__ratelimit(&drbd_ratelimit_state))
+			dev_err(DEV, "Sending NegDReply. sector=%llus.\n",
+			    (unsigned long long)e->sector);
+
+		ok = drbd_send_ack(mdev, P_NEG_DREPLY, e);
+	}
+
+	dec_unacked(mdev);
+
+	move_to_net_ee_or_free(mdev, e);
+
+	if (unlikely(!ok))
+		dev_err(DEV, "drbd_send_block() failed\n");
+	return ok;
+}
+
+/**
+ * w_e_end_rsdata_req() - Worker callback to send a P_RS_DATA_REPLY packet in response to a P_RS_DATA_REQUESTRS
+ * @mdev:	DRBD device.
+ * @w:		work object.
+ * @cancel:	The connection will be closed anyways
+ */
+int w_e_end_rsdata_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
+{
+	struct drbd_epoch_entry *e = container_of(w, struct drbd_epoch_entry, w);
+	int ok;
+
+	if (unlikely(cancel)) {
+		drbd_free_ee(mdev, e);
+		dec_unacked(mdev);
+		return 1;
+	}
+
+	if (get_ldev_if_state(mdev, D_FAILED)) {
+		drbd_rs_complete_io(mdev, e->sector);
+		put_ldev(mdev);
+	}
+
+	if (mdev->state.conn == C_AHEAD) {
+		ok = drbd_send_ack(mdev, P_RS_CANCEL, e);
+	} else if (likely((e->flags & EE_WAS_ERROR) == 0)) {
+		if (likely(mdev->state.pdsk >= D_INCONSISTENT)) {
+			inc_rs_pending(mdev);
+			ok = drbd_send_block(mdev, P_RS_DATA_REPLY, e);
+		} else {
+			if (__ratelimit(&drbd_ratelimit_state))
+				dev_err(DEV, "Not sending RSDataReply, "
+				    "partner DISKLESS!\n");
+			ok = 1;
+		}
+	} else {
+		if (__ratelimit(&drbd_ratelimit_state))
+			dev_err(DEV, "Sending NegRSDReply. sector %llus.\n",
+			    (unsigned long long)e->sector);
+
+		ok = drbd_send_ack(mdev, P_NEG_RS_DREPLY, e);
+
+		/* update resync data with failure */
+		drbd_rs_failed_io(mdev, e->sector, e->size);
+	}
+
+	dec_unacked(mdev);
+
+	move_to_net_ee_or_free(mdev, e);
+
+	if (unlikely(!ok))
+		dev_err(DEV, "drbd_send_block() failed\n");
+	return ok;
+}
+
+int w_e_end_csum_rs_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
+{
+	struct drbd_epoch_entry *e = container_of(w, struct drbd_epoch_entry, w);
+	struct digest_info *di;
+	int digest_size;
+	void *digest = NULL;
+	int ok, eq = 0;
+
+	if (unlikely(cancel)) {
+		drbd_free_ee(mdev, e);
+		dec_unacked(mdev);
+		return 1;
+	}
+
+	if (get_ldev(mdev)) {
+		drbd_rs_complete_io(mdev, e->sector);
+		put_ldev(mdev);
+	}
+
+	di = e->digest;
+
+	if (likely((e->flags & EE_WAS_ERROR) == 0)) {
+		/* quick hack to try to avoid a race against reconfiguration.
+		 * a real fix would be much more involved,
+		 * introducing more locking mechanisms */
+		if (mdev->csums_tfm) {
+			digest_size = crypto_hash_digestsize(mdev->csums_tfm);
+			D_ASSERT(digest_size == di->digest_size);
+			digest = kmalloc(digest_size, GFP_NOIO);
+		}
+		if (digest) {
+			drbd_csum_ee(mdev, mdev->csums_tfm, e, digest);
+			eq = !memcmp(digest, di->digest, digest_size);
+			kfree(digest);
+		}
+
+		if (eq) {
+			drbd_set_in_sync(mdev, e->sector, e->size);
+			/* rs_same_csums unit is BM_BLOCK_SIZE */
+			mdev->rs_same_csum += e->size >> BM_BLOCK_SHIFT;
+			ok = drbd_send_ack(mdev, P_RS_IS_IN_SYNC, e);
+		} else {
+			inc_rs_pending(mdev);
+			e->block_id = ID_SYNCER; /* By setting block_id, digest pointer becomes invalid! */
+			e->flags &= ~EE_HAS_DIGEST; /* This e no longer has a digest pointer */
+			kfree(di);
+			ok = drbd_send_block(mdev, P_RS_DATA_REPLY, e);
+		}
+	} else {
+		ok = drbd_send_ack(mdev, P_NEG_RS_DREPLY, e);
+		if (__ratelimit(&drbd_ratelimit_state))
+			dev_err(DEV, "Sending NegDReply. I guess it gets messy.\n");
+	}
+
+	dec_unacked(mdev);
+	move_to_net_ee_or_free(mdev, e);
+
+	if (unlikely(!ok))
+		dev_err(DEV, "drbd_send_block/ack() failed\n");
+	return ok;
+}
+
+/* TODO merge common code with w_e_send_csum */
+int w_e_end_ov_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
+{
+	struct drbd_epoch_entry *e = container_of(w, struct drbd_epoch_entry, w);
+	sector_t sector = e->sector;
+	unsigned int size = e->size;
+	int digest_size;
+	void *digest;
+	int ok = 1;
+
+	if (unlikely(cancel))
+		goto out;
+
+	digest_size = crypto_hash_digestsize(mdev->verify_tfm);
+	digest = kmalloc(digest_size, GFP_NOIO);
+	if (!digest) {
+		ok = 0;	/* terminate the connection in case the allocation failed */
+		goto out;
+	}
+
+	if (likely(!(e->flags & EE_WAS_ERROR)))
+		drbd_csum_ee(mdev, mdev->verify_tfm, e, digest);
+	else
+		memset(digest, 0, digest_size);
+
+	/* Free e and pages before send.
+	 * In case we block on congestion, we could otherwise run into
+	 * some distributed deadlock, if the other side blocks on
+	 * congestion as well, because our receiver blocks in
+	 * drbd_pp_alloc due to pp_in_use > max_buffers. */
+	drbd_free_ee(mdev, e);
+	e = NULL;
+	inc_rs_pending(mdev);
+	ok = drbd_send_drequest_csum(mdev, sector, size,
+				     digest, digest_size,
+				     P_OV_REPLY);
+	if (!ok)
+		dec_rs_pending(mdev);
+	kfree(digest);
+
+out:
+	if (e)
+		drbd_free_ee(mdev, e);
+	dec_unacked(mdev);
+	return ok;
+}
+
+void drbd_ov_oos_found(struct drbd_conf *mdev, sector_t sector, int size)
+{
+	if (mdev->ov_last_oos_start + mdev->ov_last_oos_size == sector) {
+		mdev->ov_last_oos_size += size>>9;
+	} else {
+		mdev->ov_last_oos_start = sector;
+		mdev->ov_last_oos_size = size>>9;
+	}
+	drbd_set_out_of_sync(mdev, sector, size);
+}
+
+int w_e_end_ov_reply(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
+{
+	struct drbd_epoch_entry *e = container_of(w, struct drbd_epoch_entry, w);
+	struct digest_info *di;
+	void *digest;
+	sector_t sector = e->sector;
+	unsigned int size = e->size;
+	int digest_size;
+	int ok, eq = 0;
+
+	if (unlikely(cancel)) {
+		drbd_free_ee(mdev, e);
+		dec_unacked(mdev);
+		return 1;
+	}
+
+	/* after "cancel", because after drbd_disconnect/drbd_rs_cancel_all
+	 * the resync lru has been cleaned up already */
+	if (get_ldev(mdev)) {
+		drbd_rs_complete_io(mdev, e->sector);
+		put_ldev(mdev);
+	}
+
+	di = e->digest;
+
+	if (likely((e->flags & EE_WAS_ERROR) == 0)) {
+		digest_size = crypto_hash_digestsize(mdev->verify_tfm);
+		digest = kmalloc(digest_size, GFP_NOIO);
+		if (digest) {
+			drbd_csum_ee(mdev, mdev->verify_tfm, e, digest);
+
+			D_ASSERT(digest_size == di->digest_size);
+			eq = !memcmp(digest, di->digest, digest_size);
+			kfree(digest);
+		}
+	}
+
+		/* Free e and pages before send.
+		 * In case we block on congestion, we could otherwise run into
+		 * some distributed deadlock, if the other side blocks on
+		 * congestion as well, because our receiver blocks in
+		 * drbd_pp_alloc due to pp_in_use > max_buffers. */
+	drbd_free_ee(mdev, e);
+	if (!eq)
+		drbd_ov_oos_found(mdev, sector, size);
+	else
+		ov_oos_print(mdev);
+
+	ok = drbd_send_ack_ex(mdev, P_OV_RESULT, sector, size,
+			      eq ? ID_IN_SYNC : ID_OUT_OF_SYNC);
+
+	dec_unacked(mdev);
+
+	--mdev->ov_left;
+
+	/* let's advance progress step marks only for every other megabyte */
+	if ((mdev->ov_left & 0x200) == 0x200)
+		drbd_advance_rs_marks(mdev, mdev->ov_left);
+
+	if (mdev->ov_left == 0) {
+		ov_oos_print(mdev);
+		drbd_resync_finished(mdev);
+	}
+
+	return ok;
+}
+
+int w_prev_work_done(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
+{
+	struct drbd_wq_barrier *b = container_of(w, struct drbd_wq_barrier, w);
+	complete(&b->done);
+	return 1;
+}
+
+int w_send_barrier(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
+{
+	struct drbd_tl_epoch *b = container_of(w, struct drbd_tl_epoch, w);
+	struct p_barrier *p = &mdev->data.sbuf.barrier;
+	int ok = 1;
+
+	/* really avoid racing with tl_clear.  w.cb may have been referenced
+	 * just before it was reassigned and re-queued, so double check that.
+	 * actually, this race was harmless, since we only try to send the
+	 * barrier packet here, and otherwise do nothing with the object.
+	 * but compare with the head of w_clear_epoch */
+	spin_lock_irq(&mdev->req_lock);
+	if (w->cb != w_send_barrier || mdev->state.conn < C_CONNECTED)
+		cancel = 1;
+	spin_unlock_irq(&mdev->req_lock);
+	if (cancel)
+		return 1;
+
+	if (!drbd_get_data_sock(mdev))
+		return 0;
+	p->barrier = b->br_number;
+	/* inc_ap_pending was done where this was queued.
+	 * dec_ap_pending will be done in got_BarrierAck
+	 * or (on connection loss) in w_clear_epoch.  */
+	ok = _drbd_send_cmd(mdev, mdev->data.socket, P_BARRIER,
+				(struct p_header80 *)p, sizeof(*p), 0);
+	drbd_put_data_sock(mdev);
+
+	return ok;
+}
+
+int w_send_write_hint(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
+{
+	if (cancel)
+		return 1;
+	return drbd_send_short_cmd(mdev, P_UNPLUG_REMOTE);
+}
+
+int w_send_oos(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
+{
+	struct drbd_request *req = container_of(w, struct drbd_request, w);
+	int ok;
+
+	if (unlikely(cancel)) {
+		req_mod(req, send_canceled);
+		return 1;
+	}
+
+	ok = drbd_send_oos(mdev, req);
+	req_mod(req, oos_handed_to_network);
+
+	return ok;
+}
+
+/**
+ * w_send_dblock() - Worker callback to send a P_DATA packet in order to mirror a write request
+ * @mdev:	DRBD device.
+ * @w:		work object.
+ * @cancel:	The connection will be closed anyways
+ */
+int w_send_dblock(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
+{
+	struct drbd_request *req = container_of(w, struct drbd_request, w);
+	int ok;
+
+	if (unlikely(cancel)) {
+		req_mod(req, send_canceled);
+		return 1;
+	}
+
+	ok = drbd_send_dblock(mdev, req);
+	req_mod(req, ok ? handed_over_to_network : send_failed);
+
+	return ok;
+}
+
+/**
+ * w_send_read_req() - Worker callback to send a read request (P_DATA_REQUEST) packet
+ * @mdev:	DRBD device.
+ * @w:		work object.
+ * @cancel:	The connection will be closed anyways
+ */
+int w_send_read_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
+{
+	struct drbd_request *req = container_of(w, struct drbd_request, w);
+	int ok;
+
+	if (unlikely(cancel)) {
+		req_mod(req, send_canceled);
+		return 1;
+	}
+
+	ok = drbd_send_drequest(mdev, P_DATA_REQUEST, req->sector, req->size,
+				(unsigned long)req);
+
+	if (!ok) {
+		/* ?? we set C_TIMEOUT or C_BROKEN_PIPE in drbd_send();
+		 * so this is probably redundant */
+		if (mdev->state.conn >= C_CONNECTED)
+			drbd_force_state(mdev, NS(conn, C_NETWORK_FAILURE));
+	}
+	req_mod(req, ok ? handed_over_to_network : send_failed);
+
+	return ok;
+}
+
+int w_restart_disk_io(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
+{
+	struct drbd_request *req = container_of(w, struct drbd_request, w);
+
+	if (bio_data_dir(req->master_bio) == WRITE && req->rq_state & RQ_IN_ACT_LOG)
+		drbd_al_begin_io(mdev, req->sector);
+	/* Calling drbd_al_begin_io() out of the worker might deadlocks
+	   theoretically. Practically it can not deadlock, since this is
+	   only used when unfreezing IOs. All the extents of the requests
+	   that made it into the TL are already active */
+
+	drbd_req_make_private_bio(req, req->master_bio);
+	req->private_bio->bi_bdev = mdev->ldev->backing_bdev;
+	generic_make_request(req->private_bio);
+
+	return 1;
+}
+
+static int _drbd_may_sync_now(struct drbd_conf *mdev)
+{
+	struct drbd_conf *odev = mdev;
+
+	while (1) {
+		if (odev->sync_conf.after == -1)
+			return 1;
+		odev = minor_to_mdev(odev->sync_conf.after);
+		ERR_IF(!odev) return 1;
+		if ((odev->state.conn >= C_SYNC_SOURCE &&
+		     odev->state.conn <= C_PAUSED_SYNC_T) ||
+		    odev->state.aftr_isp || odev->state.peer_isp ||
+		    odev->state.user_isp)
+			return 0;
+	}
+}
+
+/**
+ * _drbd_pause_after() - Pause resync on all devices that may not resync now
+ * @mdev:	DRBD device.
+ *
+ * Called from process context only (admin command and after_state_ch).
+ */
+static int _drbd_pause_after(struct drbd_conf *mdev)
+{
+	struct drbd_conf *odev;
+	int i, rv = 0;
+
+	for (i = 0; i < minor_count; i++) {
+		odev = minor_to_mdev(i);
+		if (!odev)
+			continue;
+		if (odev->state.conn == C_STANDALONE && odev->state.disk == D_DISKLESS)
+			continue;
+		if (!_drbd_may_sync_now(odev))
+			rv |= (__drbd_set_state(_NS(odev, aftr_isp, 1), CS_HARD, NULL)
+			       != SS_NOTHING_TO_DO);
+	}
+
+	return rv;
+}
+
+/**
+ * _drbd_resume_next() - Resume resync on all devices that may resync now
+ * @mdev:	DRBD device.
+ *
+ * Called from process context only (admin command and worker).
+ */
+static int _drbd_resume_next(struct drbd_conf *mdev)
+{
+	struct drbd_conf *odev;
+	int i, rv = 0;
+
+	for (i = 0; i < minor_count; i++) {
+		odev = minor_to_mdev(i);
+		if (!odev)
+			continue;
+		if (odev->state.conn == C_STANDALONE && odev->state.disk == D_DISKLESS)
+			continue;
+		if (odev->state.aftr_isp) {
+			if (_drbd_may_sync_now(odev))
+				rv |= (__drbd_set_state(_NS(odev, aftr_isp, 0),
+							CS_HARD, NULL)
+				       != SS_NOTHING_TO_DO) ;
+		}
+	}
+	return rv;
+}
+
+void resume_next_sg(struct drbd_conf *mdev)
+{
+	write_lock_irq(&global_state_lock);
+	_drbd_resume_next(mdev);
+	write_unlock_irq(&global_state_lock);
+}
+
+void suspend_other_sg(struct drbd_conf *mdev)
+{
+	write_lock_irq(&global_state_lock);
+	_drbd_pause_after(mdev);
+	write_unlock_irq(&global_state_lock);
+}
+
+static int sync_after_error(struct drbd_conf *mdev, int o_minor)
+{
+	struct drbd_conf *odev;
+
+	if (o_minor == -1)
+		return NO_ERROR;
+	if (o_minor < -1 || minor_to_mdev(o_minor) == NULL)
+		return ERR_SYNC_AFTER;
+
+	/* check for loops */
+	odev = minor_to_mdev(o_minor);
+	while (1) {
+		if (odev == mdev)
+			return ERR_SYNC_AFTER_CYCLE;
+
+		/* dependency chain ends here, no cycles. */
+		if (odev->sync_conf.after == -1)
+			return NO_ERROR;
+
+		/* follow the dependency chain */
+		odev = minor_to_mdev(odev->sync_conf.after);
+	}
+}
+
+int drbd_alter_sa(struct drbd_conf *mdev, int na)
+{
+	int changes;
+	int retcode;
+
+	write_lock_irq(&global_state_lock);
+	retcode = sync_after_error(mdev, na);
+	if (retcode == NO_ERROR) {
+		mdev->sync_conf.after = na;
+		do {
+			changes  = _drbd_pause_after(mdev);
+			changes |= _drbd_resume_next(mdev);
+		} while (changes);
+	}
+	write_unlock_irq(&global_state_lock);
+	return retcode;
+}
+
+void drbd_rs_controller_reset(struct drbd_conf *mdev)
+{
+	atomic_set(&mdev->rs_sect_in, 0);
+	atomic_set(&mdev->rs_sect_ev, 0);
+	mdev->rs_in_flight = 0;
+	mdev->rs_planed = 0;
+	spin_lock(&mdev->peer_seq_lock);
+	fifo_set(&mdev->rs_plan_s, 0);
+	spin_unlock(&mdev->peer_seq_lock);
+}
+
+/**
+ * drbd_start_resync() - Start the resync process
+ * @mdev:	DRBD device.
+ * @side:	Either C_SYNC_SOURCE or C_SYNC_TARGET
+ *
+ * This function might bring you directly into one of the
+ * C_PAUSED_SYNC_* states.
+ */
+void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side)
+{
+	union drbd_state ns;
+	int r;
+
+	if (mdev->state.conn >= C_SYNC_SOURCE && mdev->state.conn < C_AHEAD) {
+		dev_err(DEV, "Resync already running!\n");
+		return;
+	}
+
+	if (mdev->state.conn < C_AHEAD) {
+		/* In case a previous resync run was aborted by an IO error/detach on the peer. */
+		drbd_rs_cancel_all(mdev);
+		/* This should be done when we abort the resync. We definitely do not
+		   want to have this for connections going back and forth between
+		   Ahead/Behind and SyncSource/SyncTarget */
+	}
+
+	if (side == C_SYNC_TARGET) {
+		/* Since application IO was locked out during C_WF_BITMAP_T and
+		   C_WF_SYNC_UUID we are still unmodified. Before going to C_SYNC_TARGET
+		   we check that we might make the data inconsistent. */
+		r = drbd_khelper(mdev, "before-resync-target");
+		r = (r >> 8) & 0xff;
+		if (r > 0) {
+			dev_info(DEV, "before-resync-target handler returned %d, "
+			     "dropping connection.\n", r);
+			drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
+			return;
+		}
+	} else /* C_SYNC_SOURCE */ {
+		r = drbd_khelper(mdev, "before-resync-source");
+		r = (r >> 8) & 0xff;
+		if (r > 0) {
+			if (r == 3) {
+				dev_info(DEV, "before-resync-source handler returned %d, "
+					 "ignoring. Old userland tools?", r);
+			} else {
+				dev_info(DEV, "before-resync-source handler returned %d, "
+					 "dropping connection.\n", r);
+				drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
+				return;
+			}
+		}
+	}
+
+	drbd_state_lock(mdev);
+
+	if (!get_ldev_if_state(mdev, D_NEGOTIATING)) {
+		drbd_state_unlock(mdev);
+		return;
+	}
+
+	write_lock_irq(&global_state_lock);
+	ns = mdev->state;
+
+	ns.aftr_isp = !_drbd_may_sync_now(mdev);
+
+	ns.conn = side;
+
+	if (side == C_SYNC_TARGET)
+		ns.disk = D_INCONSISTENT;
+	else /* side == C_SYNC_SOURCE */
+		ns.pdsk = D_INCONSISTENT;
+
+	r = __drbd_set_state(mdev, ns, CS_VERBOSE, NULL);
+	ns = mdev->state;
+
+	if (ns.conn < C_CONNECTED)
+		r = SS_UNKNOWN_ERROR;
+
+	if (r == SS_SUCCESS) {
+		unsigned long tw = drbd_bm_total_weight(mdev);
+		unsigned long now = jiffies;
+		int i;
+
+		mdev->rs_failed    = 0;
+		mdev->rs_paused    = 0;
+		mdev->rs_same_csum = 0;
+		mdev->rs_last_events = 0;
+		mdev->rs_last_sect_ev = 0;
+		mdev->rs_total     = tw;
+		mdev->rs_start     = now;
+		for (i = 0; i < DRBD_SYNC_MARKS; i++) {
+			mdev->rs_mark_left[i] = tw;
+			mdev->rs_mark_time[i] = now;
+		}
+		_drbd_pause_after(mdev);
+	}
+	write_unlock_irq(&global_state_lock);
+
+	if (r == SS_SUCCESS) {
+		dev_info(DEV, "Began resync as %s (will sync %lu KB [%lu bits set]).\n",
+		     drbd_conn_str(ns.conn),
+		     (unsigned long) mdev->rs_total << (BM_BLOCK_SHIFT-10),
+		     (unsigned long) mdev->rs_total);
+		if (side == C_SYNC_TARGET)
+			mdev->bm_resync_fo = 0;
+
+		/* Since protocol 96, we must serialize drbd_gen_and_send_sync_uuid
+		 * with w_send_oos, or the sync target will get confused as to
+		 * how much bits to resync.  We cannot do that always, because for an
+		 * empty resync and protocol < 95, we need to do it here, as we call
+		 * drbd_resync_finished from here in that case.
+		 * We drbd_gen_and_send_sync_uuid here for protocol < 96,
+		 * and from after_state_ch otherwise. */
+		if (side == C_SYNC_SOURCE && mdev->agreed_pro_version < 96)
+			drbd_gen_and_send_sync_uuid(mdev);
+
+		if (mdev->agreed_pro_version < 95 && mdev->rs_total == 0) {
+			/* This still has a race (about when exactly the peers
+			 * detect connection loss) that can lead to a full sync
+			 * on next handshake. In 8.3.9 we fixed this with explicit
+			 * resync-finished notifications, but the fix
+			 * introduces a protocol change.  Sleeping for some
+			 * time longer than the ping interval + timeout on the
+			 * SyncSource, to give the SyncTarget the chance to
+			 * detect connection loss, then waiting for a ping
+			 * response (implicit in drbd_resync_finished) reduces
+			 * the race considerably, but does not solve it. */
+			if (side == C_SYNC_SOURCE)
+				schedule_timeout_interruptible(
+					mdev->net_conf->ping_int * HZ +
+					mdev->net_conf->ping_timeo*HZ/9);
+			drbd_resync_finished(mdev);
+		}
+
+		drbd_rs_controller_reset(mdev);
+		/* ns.conn may already be != mdev->state.conn,
+		 * we may have been paused in between, or become paused until
+		 * the timer triggers.
+		 * No matter, that is handled in resync_timer_fn() */
+		if (ns.conn == C_SYNC_TARGET)
+			mod_timer(&mdev->resync_timer, jiffies);
+
+		drbd_md_sync(mdev);
+	}
+	put_ldev(mdev);
+	drbd_state_unlock(mdev);
+}
+
+int drbd_worker(struct drbd_thread *thi)
+{
+	struct drbd_conf *mdev = thi->mdev;
+	struct drbd_work *w = NULL;
+	LIST_HEAD(work_list);
+	int intr = 0, i;
+
+	sprintf(current->comm, "drbd%d_worker", mdev_to_minor(mdev));
+
+	while (get_t_state(thi) == Running) {
+		drbd_thread_current_set_cpu(mdev);
+
+		if (down_trylock(&mdev->data.work.s)) {
+			mutex_lock(&mdev->data.mutex);
+			if (mdev->data.socket && !mdev->net_conf->no_cork)
+				drbd_tcp_uncork(mdev->data.socket);
+			mutex_unlock(&mdev->data.mutex);
+
+			intr = down_interruptible(&mdev->data.work.s);
+
+			mutex_lock(&mdev->data.mutex);
+			if (mdev->data.socket  && !mdev->net_conf->no_cork)
+				drbd_tcp_cork(mdev->data.socket);
+			mutex_unlock(&mdev->data.mutex);
+		}
+
+		if (intr) {
+			D_ASSERT(intr == -EINTR);
+			flush_signals(current);
+			ERR_IF (get_t_state(thi) == Running)
+				continue;
+			break;
+		}
+
+		if (get_t_state(thi) != Running)
+			break;
+		/* With this break, we have done a down() but not consumed
+		   the entry from the list. The cleanup code takes care of
+		   this...   */
+
+		w = NULL;
+		spin_lock_irq(&mdev->data.work.q_lock);
+		ERR_IF(list_empty(&mdev->data.work.q)) {
+			/* something terribly wrong in our logic.
+			 * we were able to down() the semaphore,
+			 * but the list is empty... doh.
+			 *
+			 * what is the best thing to do now?
+			 * try again from scratch, restarting the receiver,
+			 * asender, whatnot? could break even more ugly,
+			 * e.g. when we are primary, but no good local data.
+			 *
+			 * I'll try to get away just starting over this loop.
+			 */
+			spin_unlock_irq(&mdev->data.work.q_lock);
+			continue;
+		}
+		w = list_entry(mdev->data.work.q.next, struct drbd_work, list);
+		list_del_init(&w->list);
+		spin_unlock_irq(&mdev->data.work.q_lock);
+
+		if (!w->cb(mdev, w, mdev->state.conn < C_CONNECTED)) {
+			/* dev_warn(DEV, "worker: a callback failed! \n"); */
+			if (mdev->state.conn >= C_CONNECTED)
+				drbd_force_state(mdev,
+						NS(conn, C_NETWORK_FAILURE));
+		}
+	}
+	D_ASSERT(test_bit(DEVICE_DYING, &mdev->flags));
+	D_ASSERT(test_bit(CONFIG_PENDING, &mdev->flags));
+
+	spin_lock_irq(&mdev->data.work.q_lock);
+	i = 0;
+	while (!list_empty(&mdev->data.work.q)) {
+		list_splice_init(&mdev->data.work.q, &work_list);
+		spin_unlock_irq(&mdev->data.work.q_lock);
+
+		while (!list_empty(&work_list)) {
+			w = list_entry(work_list.next, struct drbd_work, list);
+			list_del_init(&w->list);
+			w->cb(mdev, w, 1);
+			i++; /* dead debugging code */
+		}
+
+		spin_lock_irq(&mdev->data.work.q_lock);
+	}
+	sema_init(&mdev->data.work.s, 0);
+	/* DANGEROUS race: if someone did queue his work within the spinlock,
+	 * but up() ed outside the spinlock, we could get an up() on the
+	 * semaphore without corresponding list entry.
+	 * So don't do that.
+	 */
+	spin_unlock_irq(&mdev->data.work.q_lock);
+
+	D_ASSERT(mdev->state.disk == D_DISKLESS && mdev->state.conn == C_STANDALONE);
+	/* _drbd_set_state only uses stop_nowait.
+	 * wait here for the Exiting receiver. */
+	drbd_thread_stop(&mdev->receiver);
+	drbd_mdev_cleanup(mdev);
+
+	dev_info(DEV, "worker terminated\n");
+
+	clear_bit(DEVICE_DYING, &mdev->flags);
+	clear_bit(CONFIG_PENDING, &mdev->flags);
+	wake_up(&mdev->state_wait);
+
+	return 0;
+}
diff --git a/drivers/block/drbd/drbd_wrappers.h b/drivers/block/drbd/drbd_wrappers.h
new file mode 100644
index 00000000..151f1a37
--- /dev/null
+++ b/drivers/block/drbd/drbd_wrappers.h
@@ -0,0 +1,59 @@
+#ifndef _DRBD_WRAPPERS_H
+#define _DRBD_WRAPPERS_H
+
+#include <linux/ctype.h>
+#include <linux/mm.h>
+
+/* see get_sb_bdev and bd_claim */
+extern char *drbd_sec_holder;
+
+/* sets the number of 512 byte sectors of our virtual device */
+static inline void drbd_set_my_capacity(struct drbd_conf *mdev,
+					sector_t size)
+{
+	/* set_capacity(mdev->this_bdev->bd_disk, size); */
+	set_capacity(mdev->vdisk, size);
+	mdev->this_bdev->bd_inode->i_size = (loff_t)size << 9;
+}
+
+#define drbd_bio_uptodate(bio) bio_flagged(bio, BIO_UPTODATE)
+
+/* bi_end_io handlers */
+extern void drbd_md_io_complete(struct bio *bio, int error);
+extern void drbd_endio_sec(struct bio *bio, int error);
+extern void drbd_endio_pri(struct bio *bio, int error);
+
+/*
+ * used to submit our private bio
+ */
+static inline void drbd_generic_make_request(struct drbd_conf *mdev,
+					     int fault_type, struct bio *bio)
+{
+	__release(local);
+	if (!bio->bi_bdev) {
+		printk(KERN_ERR "drbd%d: drbd_generic_make_request: "
+				"bio->bi_bdev == NULL\n",
+		       mdev_to_minor(mdev));
+		dump_stack();
+		bio_endio(bio, -ENODEV);
+		return;
+	}
+
+	if (drbd_insert_fault(mdev, fault_type))
+		bio_endio(bio, -EIO);
+	else
+		generic_make_request(bio);
+}
+
+static inline int drbd_crypto_is_hash(struct crypto_tfm *tfm)
+{
+        return (crypto_tfm_alg_type(tfm) & CRYPTO_ALG_TYPE_HASH_MASK)
+                == CRYPTO_ALG_TYPE_HASH;
+}
+
+#ifndef __CHECKER__
+# undef __cond_lock
+# define __cond_lock(x,c) (c)
+#endif
+
+#endif
diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
new file mode 100644
index 00000000..9955a537
--- /dev/null
+++ b/drivers/block/floppy.c
@@ -0,0 +1,4619 @@
+/*
+ *  linux/drivers/block/floppy.c
+ *
+ *  Copyright (C) 1991, 1992  Linus Torvalds
+ *  Copyright (C) 1993, 1994  Alain Knaff
+ *  Copyright (C) 1998 Alan Cox
+ */
+
+/*
+ * 02.12.91 - Changed to static variables to indicate need for reset
+ * and recalibrate. This makes some things easier (output_byte reset
+ * checking etc), and means less interrupt jumping in case of errors,
+ * so the code is hopefully easier to understand.
+ */
+
+/*
+ * This file is certainly a mess. I've tried my best to get it working,
+ * but I don't like programming floppies, and I have only one anyway.
+ * Urgel. I should check for more errors, and do more graceful error
+ * recovery. Seems there are problems with several drives. I've tried to
+ * correct them. No promises.
+ */
+
+/*
+ * As with hd.c, all routines within this file can (and will) be called
+ * by interrupts, so extreme caution is needed. A hardware interrupt
+ * handler may not sleep, or a kernel panic will happen. Thus I cannot
+ * call "floppy-on" directly, but have to set a special timer interrupt
+ * etc.
+ */
+
+/*
+ * 28.02.92 - made track-buffering routines, based on the routines written
+ * by entropy@wintermute.wpi.edu (Lawrence Foard). Linus.
+ */
+
+/*
+ * Automatic floppy-detection and formatting written by Werner Almesberger
+ * (almesber@nessie.cs.id.ethz.ch), who also corrected some problems with
+ * the floppy-change signal detection.
+ */
+
+/*
+ * 1992/7/22 -- Hennus Bergman: Added better error reporting, fixed
+ * FDC data overrun bug, added some preliminary stuff for vertical
+ * recording support.
+ *
+ * 1992/9/17: Added DMA allocation & DMA functions. -- hhb.
+ *
+ * TODO: Errors are still not counted properly.
+ */
+
+/* 1992/9/20
+ * Modifications for ``Sector Shifting'' by Rob Hooft (hooft@chem.ruu.nl)
+ * modeled after the freeware MS-DOS program fdformat/88 V1.8 by
+ * Christoph H. Hochst\"atter.
+ * I have fixed the shift values to the ones I always use. Maybe a new
+ * ioctl() should be created to be able to modify them.
+ * There is a bug in the driver that makes it impossible to format a
+ * floppy as the first thing after bootup.
+ */
+
+/*
+ * 1993/4/29 -- Linus -- cleaned up the timer handling in the kernel, and
+ * this helped the floppy driver as well. Much cleaner, and still seems to
+ * work.
+ */
+
+/* 1994/6/24 --bbroad-- added the floppy table entries and made
+ * minor modifications to allow 2.88 floppies to be run.
+ */
+
+/* 1994/7/13 -- Paul Vojta -- modified the probing code to allow three or more
+ * disk types.
+ */
+
+/*
+ * 1994/8/8 -- Alain Knaff -- Switched to fdpatch driver: Support for bigger
+ * format bug fixes, but unfortunately some new bugs too...
+ */
+
+/* 1994/9/17 -- Koen Holtman -- added logging of physical floppy write
+ * errors to allow safe writing by specialized programs.
+ */
+
+/* 1995/4/24 -- Dan Fandrich -- added support for Commodore 1581 3.5" disks
+ * by defining bit 1 of the "stretch" parameter to mean put sectors on the
+ * opposite side of the disk, leaving the sector IDs alone (i.e. Commodore's
+ * drives are "upside-down").
+ */
+
+/*
+ * 1995/8/26 -- Andreas Busse -- added Mips support.
+ */
+
+/*
+ * 1995/10/18 -- Ralf Baechle -- Portability cleanup; move machine dependent
+ * features to asm/floppy.h.
+ */
+
+/*
+ * 1998/1/21 -- Richard Gooch <rgooch@atnf.csiro.au> -- devfs support
+ */
+
+/*
+ * 1998/05/07 -- Russell King -- More portability cleanups; moved definition of
+ * interrupt and dma channel to asm/floppy.h. Cleaned up some formatting &
+ * use of '0' for NULL.
+ */
+
+/*
+ * 1998/06/07 -- Alan Cox -- Merged the 2.0.34 fixes for resource allocation
+ * failures.
+ */
+
+/*
+ * 1998/09/20 -- David Weinehall -- Added slow-down code for buggy PS/2-drives.
+ */
+
+/*
+ * 1999/08/13 -- Paul Slootman -- floppy stopped working on Alpha after 24
+ * days, 6 hours, 32 minutes and 32 seconds (i.e. MAXINT jiffies; ints were
+ * being used to store jiffies, which are unsigned longs).
+ */
+
+/*
+ * 2000/08/28 -- Arnaldo Carvalho de Melo <acme@conectiva.com.br>
+ * - get rid of check_region
+ * - s/suser/capable/
+ */
+
+/*
+ * 2001/08/26 -- Paul Gortmaker - fix insmod oops on machines with no
+ * floppy controller (lingering task on list after module is gone... boom.)
+ */
+
+/*
+ * 2002/02/07 -- Anton Altaparmakov - Fix io ports reservation to correct range
+ * (0x3f2-0x3f5, 0x3f7). This fix is a bit of a hack but the proper fix
+ * requires many non-obvious changes in arch dependent code.
+ */
+
+/* 2003/07/28 -- Daniele Bellucci <bellucda@tiscali.it>.
+ * Better audit of register_blkdev.
+ */
+
+#undef  FLOPPY_SILENT_DCL_CLEAR
+
+#define REALLY_SLOW_IO
+
+#define DEBUGT 2
+
+#define DPRINT(format, args...) \
+	pr_info("floppy%d: " format, current_drive, ##args)
+
+#define DCL_DEBUG		/* debug disk change line */
+#ifdef DCL_DEBUG
+#define debug_dcl(test, fmt, args...) \
+	do { if ((test) & FD_DEBUG) DPRINT(fmt, ##args); } while (0)
+#else
+#define debug_dcl(test, fmt, args...) \
+	do { if (0) DPRINT(fmt, ##args); } while (0)
+#endif
+
+/* do print messages for unexpected interrupts */
+static int print_unex = 1;
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/fs.h>
+#include <linux/kernel.h>
+#include <linux/timer.h>
+#include <linux/workqueue.h>
+#define FDPATCHES
+#include <linux/fdreg.h>
+#include <linux/fd.h>
+#include <linux/hdreg.h>
+#include <linux/errno.h>
+#include <linux/slab.h>
+#include <linux/mm.h>
+#include <linux/bio.h>
+#include <linux/string.h>
+#include <linux/jiffies.h>
+#include <linux/fcntl.h>
+#include <linux/delay.h>
+#include <linux/mc146818rtc.h>	/* CMOS defines */
+#include <linux/ioport.h>
+#include <linux/interrupt.h>
+#include <linux/init.h>
+#include <linux/platform_device.h>
+#include <linux/mod_devicetable.h>
+#include <linux/buffer_head.h>	/* for invalidate_buffers() */
+#include <linux/mutex.h>
+#include <linux/io.h>
+#include <linux/uaccess.h>
+
+/*
+ * PS/2 floppies have much slower step rates than regular floppies.
+ * It's been recommended that take about 1/4 of the default speed
+ * in some more extreme cases.
+ */
+static DEFINE_MUTEX(floppy_mutex);
+static int slow_floppy;
+
+#include <asm/dma.h>
+#include <asm/irq.h>
+#include <asm/system.h>
+
+static int FLOPPY_IRQ = 6;
+static int FLOPPY_DMA = 2;
+static int can_use_virtual_dma = 2;
+/* =======
+ * can use virtual DMA:
+ * 0 = use of virtual DMA disallowed by config
+ * 1 = use of virtual DMA prescribed by config
+ * 2 = no virtual DMA preference configured.  By default try hard DMA,
+ * but fall back on virtual DMA when not enough memory available
+ */
+
+static int use_virtual_dma;
+/* =======
+ * use virtual DMA
+ * 0 using hard DMA
+ * 1 using virtual DMA
+ * This variable is set to virtual when a DMA mem problem arises, and
+ * reset back in floppy_grab_irq_and_dma.
+ * It is not safe to reset it in other circumstances, because the floppy
+ * driver may have several buffers in use at once, and we do currently not
+ * record each buffers capabilities
+ */
+
+static DEFINE_SPINLOCK(floppy_lock);
+
+static unsigned short virtual_dma_port = 0x3f0;
+irqreturn_t floppy_interrupt(int irq, void *dev_id);
+static int set_dor(int fdc, char mask, char data);
+
+#define K_64	0x10000		/* 64KB */
+
+/* the following is the mask of allowed drives. By default units 2 and
+ * 3 of both floppy controllers are disabled, because switching on the
+ * motor of these drives causes system hangs on some PCI computers. drive
+ * 0 is the low bit (0x1), and drive 7 is the high bit (0x80). Bits are on if
+ * a drive is allowed.
+ *
+ * NOTE: This must come before we include the arch floppy header because
+ *       some ports reference this variable from there. -DaveM
+ */
+
+static int allowed_drive_mask = 0x33;
+
+#include <asm/floppy.h>
+
+static int irqdma_allocated;
+
+#include <linux/blkdev.h>
+#include <linux/blkpg.h>
+#include <linux/cdrom.h>	/* for the compatibility eject ioctl */
+#include <linux/completion.h>
+
+static struct request *current_req;
+static void do_fd_request(struct request_queue *q);
+static int set_next_request(void);
+
+#ifndef fd_get_dma_residue
+#define fd_get_dma_residue() get_dma_residue(FLOPPY_DMA)
+#endif
+
+/* Dma Memory related stuff */
+
+#ifndef fd_dma_mem_free
+#define fd_dma_mem_free(addr, size) free_pages(addr, get_order(size))
+#endif
+
+#ifndef fd_dma_mem_alloc
+#define fd_dma_mem_alloc(size) __get_dma_pages(GFP_KERNEL, get_order(size))
+#endif
+
+static inline void fallback_on_nodma_alloc(char **addr, size_t l)
+{
+#ifdef FLOPPY_CAN_FALLBACK_ON_NODMA
+	if (*addr)
+		return;		/* we have the memory */
+	if (can_use_virtual_dma != 2)
+		return;		/* no fallback allowed */
+	pr_info("DMA memory shortage. Temporarily falling back on virtual DMA\n");
+	*addr = (char *)nodma_mem_alloc(l);
+#else
+	return;
+#endif
+}
+
+/* End dma memory related stuff */
+
+static unsigned long fake_change;
+static bool initialized;
+
+#define ITYPE(x)	(((x) >> 2) & 0x1f)
+#define TOMINOR(x)	((x & 3) | ((x & 4) << 5))
+#define UNIT(x)		((x) & 0x03)		/* drive on fdc */
+#define FDC(x)		(((x) & 0x04) >> 2)	/* fdc of drive */
+	/* reverse mapping from unit and fdc to drive */
+#define REVDRIVE(fdc, unit) ((unit) + ((fdc) << 2))
+
+#define DP	(&drive_params[current_drive])
+#define DRS	(&drive_state[current_drive])
+#define DRWE	(&write_errors[current_drive])
+#define FDCS	(&fdc_state[fdc])
+
+#define UDP	(&drive_params[drive])
+#define UDRS	(&drive_state[drive])
+#define UDRWE	(&write_errors[drive])
+#define UFDCS	(&fdc_state[FDC(drive)])
+
+#define PH_HEAD(floppy, head) (((((floppy)->stretch & 2) >> 1) ^ head) << 2)
+#define STRETCH(floppy)	((floppy)->stretch & FD_STRETCH)
+
+/* read/write */
+#define COMMAND		(raw_cmd->cmd[0])
+#define DR_SELECT	(raw_cmd->cmd[1])
+#define TRACK		(raw_cmd->cmd[2])
+#define HEAD		(raw_cmd->cmd[3])
+#define SECTOR		(raw_cmd->cmd[4])
+#define SIZECODE	(raw_cmd->cmd[5])
+#define SECT_PER_TRACK	(raw_cmd->cmd[6])
+#define GAP		(raw_cmd->cmd[7])
+#define SIZECODE2	(raw_cmd->cmd[8])
+#define NR_RW 9
+
+/* format */
+#define F_SIZECODE	(raw_cmd->cmd[2])
+#define F_SECT_PER_TRACK (raw_cmd->cmd[3])
+#define F_GAP		(raw_cmd->cmd[4])
+#define F_FILL		(raw_cmd->cmd[5])
+#define NR_F 6
+
+/*
+ * Maximum disk size (in kilobytes).
+ * This default is used whenever the current disk size is unknown.
+ * [Now it is rather a minimum]
+ */
+#define MAX_DISK_SIZE 4		/* 3984 */
+
+/*
+ * globals used by 'result()'
+ */
+#define MAX_REPLIES 16
+static unsigned char reply_buffer[MAX_REPLIES];
+static int inr;		/* size of reply buffer, when called from interrupt */
+#define ST0		(reply_buffer[0])
+#define ST1		(reply_buffer[1])
+#define ST2		(reply_buffer[2])
+#define ST3		(reply_buffer[0])	/* result of GETSTATUS */
+#define R_TRACK		(reply_buffer[3])
+#define R_HEAD		(reply_buffer[4])
+#define R_SECTOR	(reply_buffer[5])
+#define R_SIZECODE	(reply_buffer[6])
+
+#define SEL_DLY		(2 * HZ / 100)
+
+/*
+ * this struct defines the different floppy drive types.
+ */
+static struct {
+	struct floppy_drive_params params;
+	const char *name;	/* name printed while booting */
+} default_drive_params[] = {
+/* NOTE: the time values in jiffies should be in msec!
+ CMOS drive type
+  |     Maximum data rate supported by drive type
+  |     |   Head load time, msec
+  |     |   |   Head unload time, msec (not used)
+  |     |   |   |     Step rate interval, usec
+  |     |   |   |     |       Time needed for spinup time (jiffies)
+  |     |   |   |     |       |      Timeout for spinning down (jiffies)
+  |     |   |   |     |       |      |   Spindown offset (where disk stops)
+  |     |   |   |     |       |      |   |     Select delay
+  |     |   |   |     |       |      |   |     |     RPS
+  |     |   |   |     |       |      |   |     |     |    Max number of tracks
+  |     |   |   |     |       |      |   |     |     |    |     Interrupt timeout
+  |     |   |   |     |       |      |   |     |     |    |     |   Max nonintlv. sectors
+  |     |   |   |     |       |      |   |     |     |    |     |   | -Max Errors- flags */
+{{0,  500, 16, 16, 8000,    1*HZ, 3*HZ,  0, SEL_DLY, 5,  80, 3*HZ, 20, {3,1,2,0,2}, 0,
+      0, { 7, 4, 8, 2, 1, 5, 3,10}, 3*HZ/2, 0 }, "unknown" },
+
+{{1,  300, 16, 16, 8000,    1*HZ, 3*HZ,  0, SEL_DLY, 5,  40, 3*HZ, 17, {3,1,2,0,2}, 0,
+      0, { 1, 0, 0, 0, 0, 0, 0, 0}, 3*HZ/2, 1 }, "360K PC" }, /*5 1/4 360 KB PC*/
+
+{{2,  500, 16, 16, 6000, 4*HZ/10, 3*HZ, 14, SEL_DLY, 6,  83, 3*HZ, 17, {3,1,2,0,2}, 0,
+      0, { 2, 5, 6,23,10,20,12, 0}, 3*HZ/2, 2 }, "1.2M" }, /*5 1/4 HD AT*/
+
+{{3,  250, 16, 16, 3000,    1*HZ, 3*HZ,  0, SEL_DLY, 5,  83, 3*HZ, 20, {3,1,2,0,2}, 0,
+      0, { 4,22,21,30, 3, 0, 0, 0}, 3*HZ/2, 4 }, "720k" }, /*3 1/2 DD*/
+
+{{4,  500, 16, 16, 4000, 4*HZ/10, 3*HZ, 10, SEL_DLY, 5,  83, 3*HZ, 20, {3,1,2,0,2}, 0,
+      0, { 7, 4,25,22,31,21,29,11}, 3*HZ/2, 7 }, "1.44M" }, /*3 1/2 HD*/
+
+{{5, 1000, 15,  8, 3000, 4*HZ/10, 3*HZ, 10, SEL_DLY, 5,  83, 3*HZ, 40, {3,1,2,0,2}, 0,
+      0, { 7, 8, 4,25,28,22,31,21}, 3*HZ/2, 8 }, "2.88M AMI BIOS" }, /*3 1/2 ED*/
+
+{{6, 1000, 15,  8, 3000, 4*HZ/10, 3*HZ, 10, SEL_DLY, 5,  83, 3*HZ, 40, {3,1,2,0,2}, 0,
+      0, { 7, 8, 4,25,28,22,31,21}, 3*HZ/2, 8 }, "2.88M" } /*3 1/2 ED*/
+/*    |  --autodetected formats---    |      |      |
+ *    read_track                      |      |    Name printed when booting
+ *				      |     Native format
+ *	            Frequency of disk change checks */
+};
+
+static struct floppy_drive_params drive_params[N_DRIVE];
+static struct floppy_drive_struct drive_state[N_DRIVE];
+static struct floppy_write_errors write_errors[N_DRIVE];
+static struct timer_list motor_off_timer[N_DRIVE];
+static struct gendisk *disks[N_DRIVE];
+static struct block_device *opened_bdev[N_DRIVE];
+static DEFINE_MUTEX(open_lock);
+static struct floppy_raw_cmd *raw_cmd, default_raw_cmd;
+static int fdc_queue;
+
+/*
+ * This struct defines the different floppy types.
+ *
+ * Bit 0 of 'stretch' tells if the tracks need to be doubled for some
+ * types (e.g. 360kB diskette in 1.2MB drive, etc.).  Bit 1 of 'stretch'
+ * tells if the disk is in Commodore 1581 format, which means side 0 sectors
+ * are located on side 1 of the disk but with a side 0 ID, and vice-versa.
+ * This is the same as the Sharp MZ-80 5.25" CP/M disk format, except that the
+ * 1581's logical side 0 is on physical side 1, whereas the Sharp's logical
+ * side 0 is on physical side 0 (but with the misnamed sector IDs).
+ * 'stretch' should probably be renamed to something more general, like
+ * 'options'.
+ *
+ * Bits 2 through 9 of 'stretch' tell the number of the first sector.
+ * The LSB (bit 2) is flipped. For most disks, the first sector
+ * is 1 (represented by 0x00<<2).  For some CP/M and music sampler
+ * disks (such as Ensoniq EPS 16plus) it is 0 (represented as 0x01<<2).
+ * For Amstrad CPC disks it is 0xC1 (represented as 0xC0<<2).
+ *
+ * Other parameters should be self-explanatory (see also setfdprm(8)).
+ */
+/*
+	    Size
+	     |  Sectors per track
+	     |  | Head
+	     |  | |  Tracks
+	     |  | |  | Stretch
+	     |  | |  | |  Gap 1 size
+	     |  | |  | |    |  Data rate, | 0x40 for perp
+	     |  | |  | |    |    |  Spec1 (stepping rate, head unload
+	     |  | |  | |    |    |    |    /fmt gap (gap2) */
+static struct floppy_struct floppy_type[32] = {
+	{    0, 0,0, 0,0,0x00,0x00,0x00,0x00,NULL    },	/*  0 no testing    */
+	{  720, 9,2,40,0,0x2A,0x02,0xDF,0x50,"d360"  }, /*  1 360KB PC      */
+	{ 2400,15,2,80,0,0x1B,0x00,0xDF,0x54,"h1200" },	/*  2 1.2MB AT      */
+	{  720, 9,1,80,0,0x2A,0x02,0xDF,0x50,"D360"  },	/*  3 360KB SS 3.5" */
+	{ 1440, 9,2,80,0,0x2A,0x02,0xDF,0x50,"D720"  },	/*  4 720KB 3.5"    */
+	{  720, 9,2,40,1,0x23,0x01,0xDF,0x50,"h360"  },	/*  5 360KB AT      */
+	{ 1440, 9,2,80,0,0x23,0x01,0xDF,0x50,"h720"  },	/*  6 720KB AT      */
+	{ 2880,18,2,80,0,0x1B,0x00,0xCF,0x6C,"H1440" },	/*  7 1.44MB 3.5"   */
+	{ 5760,36,2,80,0,0x1B,0x43,0xAF,0x54,"E2880" },	/*  8 2.88MB 3.5"   */
+	{ 6240,39,2,80,0,0x1B,0x43,0xAF,0x28,"E3120" },	/*  9 3.12MB 3.5"   */
+
+	{ 2880,18,2,80,0,0x25,0x00,0xDF,0x02,"h1440" }, /* 10 1.44MB 5.25"  */
+	{ 3360,21,2,80,0,0x1C,0x00,0xCF,0x0C,"H1680" }, /* 11 1.68MB 3.5"   */
+	{  820,10,2,41,1,0x25,0x01,0xDF,0x2E,"h410"  },	/* 12 410KB 5.25"   */
+	{ 1640,10,2,82,0,0x25,0x02,0xDF,0x2E,"H820"  },	/* 13 820KB 3.5"    */
+	{ 2952,18,2,82,0,0x25,0x00,0xDF,0x02,"h1476" },	/* 14 1.48MB 5.25"  */
+	{ 3444,21,2,82,0,0x25,0x00,0xDF,0x0C,"H1722" },	/* 15 1.72MB 3.5"   */
+	{  840,10,2,42,1,0x25,0x01,0xDF,0x2E,"h420"  },	/* 16 420KB 5.25"   */
+	{ 1660,10,2,83,0,0x25,0x02,0xDF,0x2E,"H830"  },	/* 17 830KB 3.5"    */
+	{ 2988,18,2,83,0,0x25,0x00,0xDF,0x02,"h1494" },	/* 18 1.49MB 5.25"  */
+	{ 3486,21,2,83,0,0x25,0x00,0xDF,0x0C,"H1743" }, /* 19 1.74 MB 3.5"  */
+
+	{ 1760,11,2,80,0,0x1C,0x09,0xCF,0x00,"h880"  }, /* 20 880KB 5.25"   */
+	{ 2080,13,2,80,0,0x1C,0x01,0xCF,0x00,"D1040" }, /* 21 1.04MB 3.5"   */
+	{ 2240,14,2,80,0,0x1C,0x19,0xCF,0x00,"D1120" }, /* 22 1.12MB 3.5"   */
+	{ 3200,20,2,80,0,0x1C,0x20,0xCF,0x2C,"h1600" }, /* 23 1.6MB 5.25"   */
+	{ 3520,22,2,80,0,0x1C,0x08,0xCF,0x2e,"H1760" }, /* 24 1.76MB 3.5"   */
+	{ 3840,24,2,80,0,0x1C,0x20,0xCF,0x00,"H1920" }, /* 25 1.92MB 3.5"   */
+	{ 6400,40,2,80,0,0x25,0x5B,0xCF,0x00,"E3200" }, /* 26 3.20MB 3.5"   */
+	{ 7040,44,2,80,0,0x25,0x5B,0xCF,0x00,"E3520" }, /* 27 3.52MB 3.5"   */
+	{ 7680,48,2,80,0,0x25,0x63,0xCF,0x00,"E3840" }, /* 28 3.84MB 3.5"   */
+	{ 3680,23,2,80,0,0x1C,0x10,0xCF,0x00,"H1840" }, /* 29 1.84MB 3.5"   */
+
+	{ 1600,10,2,80,0,0x25,0x02,0xDF,0x2E,"D800"  },	/* 30 800KB 3.5"    */
+	{ 3200,20,2,80,0,0x1C,0x00,0xCF,0x2C,"H1600" }, /* 31 1.6MB 3.5"    */
+};
+
+#define SECTSIZE (_FD_SECTSIZE(*floppy))
+
+/* Auto-detection: Disk type used until the next media change occurs. */
+static struct floppy_struct *current_type[N_DRIVE];
+
+/*
+ * User-provided type information. current_type points to
+ * the respective entry of this array.
+ */
+static struct floppy_struct user_params[N_DRIVE];
+
+static sector_t floppy_sizes[256];
+
+static char floppy_device_name[] = "floppy";
+
+/*
+ * The driver is trying to determine the correct media format
+ * while probing is set. rw_interrupt() clears it after a
+ * successful access.
+ */
+static int probing;
+
+/* Synchronization of FDC access. */
+#define FD_COMMAND_NONE		-1
+#define FD_COMMAND_ERROR	2
+#define FD_COMMAND_OKAY		3
+
+static volatile int command_status = FD_COMMAND_NONE;
+static unsigned long fdc_busy;
+static DECLARE_WAIT_QUEUE_HEAD(fdc_wait);
+static DECLARE_WAIT_QUEUE_HEAD(command_done);
+
+/* Errors during formatting are counted here. */
+static int format_errors;
+
+/* Format request descriptor. */
+static struct format_descr format_req;
+
+/*
+ * Rate is 0 for 500kb/s, 1 for 300kbps, 2 for 250kbps
+ * Spec1 is 0xSH, where S is stepping rate (F=1ms, E=2ms, D=3ms etc),
+ * H is head unload time (1=16ms, 2=32ms, etc)
+ */
+
+/*
+ * Track buffer
+ * Because these are written to by the DMA controller, they must
+ * not contain a 64k byte boundary crossing, or data will be
+ * corrupted/lost.
+ */
+static char *floppy_track_buffer;
+static int max_buffer_sectors;
+
+static int *errors;
+typedef void (*done_f)(int);
+static const struct cont_t {
+	void (*interrupt)(void);
+				/* this is called after the interrupt of the
+				 * main command */
+	void (*redo)(void);	/* this is called to retry the operation */
+	void (*error)(void);	/* this is called to tally an error */
+	done_f done;		/* this is called to say if the operation has
+				 * succeeded/failed */
+} *cont;
+
+static void floppy_ready(void);
+static void floppy_start(void);
+static void process_fd_request(void);
+static void recalibrate_floppy(void);
+static void floppy_shutdown(unsigned long);
+
+static int floppy_request_regions(int);
+static void floppy_release_regions(int);
+static int floppy_grab_irq_and_dma(void);
+static void floppy_release_irq_and_dma(void);
+
+/*
+ * The "reset" variable should be tested whenever an interrupt is scheduled,
+ * after the commands have been sent. This is to ensure that the driver doesn't
+ * get wedged when the interrupt doesn't come because of a failed command.
+ * reset doesn't need to be tested before sending commands, because
+ * output_byte is automatically disabled when reset is set.
+ */
+static void reset_fdc(void);
+
+/*
+ * These are global variables, as that's the easiest way to give
+ * information to interrupts. They are the data used for the current
+ * request.
+ */
+#define NO_TRACK	-1
+#define NEED_1_RECAL	-2
+#define NEED_2_RECAL	-3
+
+static atomic_t usage_count = ATOMIC_INIT(0);
+
+/* buffer related variables */
+static int buffer_track = -1;
+static int buffer_drive = -1;
+static int buffer_min = -1;
+static int buffer_max = -1;
+
+/* fdc related variables, should end up in a struct */
+static struct floppy_fdc_state fdc_state[N_FDC];
+static int fdc;			/* current fdc */
+
+static struct floppy_struct *_floppy = floppy_type;
+static unsigned char current_drive;
+static long current_count_sectors;
+static unsigned char fsector_t;	/* sector in track */
+static unsigned char in_sector_offset;	/* offset within physical sector,
+					 * expressed in units of 512 bytes */
+
+static inline bool drive_no_geom(int drive)
+{
+	return !current_type[drive] && !ITYPE(UDRS->fd_device);
+}
+
+#ifndef fd_eject
+static inline int fd_eject(int drive)
+{
+	return -EINVAL;
+}
+#endif
+
+/*
+ * Debugging
+ * =========
+ */
+#ifdef DEBUGT
+static long unsigned debugtimer;
+
+static inline void set_debugt(void)
+{
+	debugtimer = jiffies;
+}
+
+static inline void debugt(const char *func, const char *msg)
+{
+	if (DP->flags & DEBUGT)
+		pr_info("%s:%s dtime=%lu\n", func, msg, jiffies - debugtimer);
+}
+#else
+static inline void set_debugt(void) { }
+static inline void debugt(const char *func, const char *msg) { }
+#endif /* DEBUGT */
+
+typedef void (*timeout_fn)(unsigned long);
+static DEFINE_TIMER(fd_timeout, floppy_shutdown, 0, 0);
+
+static const char *timeout_message;
+
+static void is_alive(const char *func, const char *message)
+{
+	/* this routine checks whether the floppy driver is "alive" */
+	if (test_bit(0, &fdc_busy) && command_status < 2 &&
+	    !timer_pending(&fd_timeout)) {
+		DPRINT("%s: timeout handler died.  %s\n", func, message);
+	}
+}
+
+static void (*do_floppy)(void) = NULL;
+
+#define OLOGSIZE 20
+
+static void (*lasthandler)(void);
+static unsigned long interruptjiffies;
+static unsigned long resultjiffies;
+static int resultsize;
+static unsigned long lastredo;
+
+static struct output_log {
+	unsigned char data;
+	unsigned char status;
+	unsigned long jiffies;
+} output_log[OLOGSIZE];
+
+static int output_log_pos;
+
+#define current_reqD -1
+#define MAXTIMEOUT -2
+
+static void __reschedule_timeout(int drive, const char *message)
+{
+	if (drive == current_reqD)
+		drive = current_drive;
+	del_timer(&fd_timeout);
+	if (drive < 0 || drive >= N_DRIVE) {
+		fd_timeout.expires = jiffies + 20UL * HZ;
+		drive = 0;
+	} else
+		fd_timeout.expires = jiffies + UDP->timeout;
+	add_timer(&fd_timeout);
+	if (UDP->flags & FD_DEBUG)
+		DPRINT("reschedule timeout %s\n", message);
+	timeout_message = message;
+}
+
+static void reschedule_timeout(int drive, const char *message)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&floppy_lock, flags);
+	__reschedule_timeout(drive, message);
+	spin_unlock_irqrestore(&floppy_lock, flags);
+}
+
+#define INFBOUND(a, b) (a) = max_t(int, a, b)
+#define SUPBOUND(a, b) (a) = min_t(int, a, b)
+
+/*
+ * Bottom half floppy driver.
+ * ==========================
+ *
+ * This part of the file contains the code talking directly to the hardware,
+ * and also the main service loop (seek-configure-spinup-command)
+ */
+
+/*
+ * disk change.
+ * This routine is responsible for maintaining the FD_DISK_CHANGE flag,
+ * and the last_checked date.
+ *
+ * last_checked is the date of the last check which showed 'no disk change'
+ * FD_DISK_CHANGE is set under two conditions:
+ * 1. The floppy has been changed after some i/o to that floppy already
+ *    took place.
+ * 2. No floppy disk is in the drive. This is done in order to ensure that
+ *    requests are quickly flushed in case there is no disk in the drive. It
+ *    follows that FD_DISK_CHANGE can only be cleared if there is a disk in
+ *    the drive.
+ *
+ * For 1., maxblock is observed. Maxblock is 0 if no i/o has taken place yet.
+ * For 2., FD_DISK_NEWCHANGE is watched. FD_DISK_NEWCHANGE is cleared on
+ *  each seek. If a disk is present, the disk change line should also be
+ *  cleared on each seek. Thus, if FD_DISK_NEWCHANGE is clear, but the disk
+ *  change line is set, this means either that no disk is in the drive, or
+ *  that it has been removed since the last seek.
+ *
+ * This means that we really have a third possibility too:
+ *  The floppy has been changed after the last seek.
+ */
+
+static int disk_change(int drive)
+{
+	int fdc = FDC(drive);
+
+	if (time_before(jiffies, UDRS->select_date + UDP->select_delay))
+		DPRINT("WARNING disk change called early\n");
+	if (!(FDCS->dor & (0x10 << UNIT(drive))) ||
+	    (FDCS->dor & 3) != UNIT(drive) || fdc != FDC(drive)) {
+		DPRINT("probing disk change on unselected drive\n");
+		DPRINT("drive=%d fdc=%d dor=%x\n", drive, FDC(drive),
+		       (unsigned int)FDCS->dor);
+	}
+
+	debug_dcl(UDP->flags,
+		  "checking disk change line for drive %d\n", drive);
+	debug_dcl(UDP->flags, "jiffies=%lu\n", jiffies);
+	debug_dcl(UDP->flags, "disk change line=%x\n", fd_inb(FD_DIR) & 0x80);
+	debug_dcl(UDP->flags, "flags=%lx\n", UDRS->flags);
+
+	if (UDP->flags & FD_BROKEN_DCL)
+		return test_bit(FD_DISK_CHANGED_BIT, &UDRS->flags);
+	if ((fd_inb(FD_DIR) ^ UDP->flags) & 0x80) {
+		set_bit(FD_VERIFY_BIT, &UDRS->flags);
+					/* verify write protection */
+
+		if (UDRS->maxblock)	/* mark it changed */
+			set_bit(FD_DISK_CHANGED_BIT, &UDRS->flags);
+
+		/* invalidate its geometry */
+		if (UDRS->keep_data >= 0) {
+			if ((UDP->flags & FTD_MSG) &&
+			    current_type[drive] != NULL)
+				DPRINT("Disk type is undefined after disk change\n");
+			current_type[drive] = NULL;
+			floppy_sizes[TOMINOR(drive)] = MAX_DISK_SIZE << 1;
+		}
+
+		return 1;
+	} else {
+		UDRS->last_checked = jiffies;
+		clear_bit(FD_DISK_NEWCHANGE_BIT, &UDRS->flags);
+	}
+	return 0;
+}
+
+static inline int is_selected(int dor, int unit)
+{
+	return ((dor & (0x10 << unit)) && (dor & 3) == unit);
+}
+
+static bool is_ready_state(int status)
+{
+	int state = status & (STATUS_READY | STATUS_DIR | STATUS_DMA);
+	return state == STATUS_READY;
+}
+
+static int set_dor(int fdc, char mask, char data)
+{
+	unsigned char unit;
+	unsigned char drive;
+	unsigned char newdor;
+	unsigned char olddor;
+
+	if (FDCS->address == -1)
+		return -1;
+
+	olddor = FDCS->dor;
+	newdor = (olddor & mask) | data;
+	if (newdor != olddor) {
+		unit = olddor & 0x3;
+		if (is_selected(olddor, unit) && !is_selected(newdor, unit)) {
+			drive = REVDRIVE(fdc, unit);
+			debug_dcl(UDP->flags,
+				  "calling disk change from set_dor\n");
+			disk_change(drive);
+		}
+		FDCS->dor = newdor;
+		fd_outb(newdor, FD_DOR);
+
+		unit = newdor & 0x3;
+		if (!is_selected(olddor, unit) && is_selected(newdor, unit)) {
+			drive = REVDRIVE(fdc, unit);
+			UDRS->select_date = jiffies;
+		}
+	}
+	return olddor;
+}
+
+static void twaddle(void)
+{
+	if (DP->select_delay)
+		return;
+	fd_outb(FDCS->dor & ~(0x10 << UNIT(current_drive)), FD_DOR);
+	fd_outb(FDCS->dor, FD_DOR);
+	DRS->select_date = jiffies;
+}
+
+/*
+ * Reset all driver information about the current fdc.
+ * This is needed after a reset, and after a raw command.
+ */
+static void reset_fdc_info(int mode)
+{
+	int drive;
+
+	FDCS->spec1 = FDCS->spec2 = -1;
+	FDCS->need_configure = 1;
+	FDCS->perp_mode = 1;
+	FDCS->rawcmd = 0;
+	for (drive = 0; drive < N_DRIVE; drive++)
+		if (FDC(drive) == fdc && (mode || UDRS->track != NEED_1_RECAL))
+			UDRS->track = NEED_2_RECAL;
+}
+
+/* selects the fdc and drive, and enables the fdc's input/dma. */
+static void set_fdc(int drive)
+{
+	if (drive >= 0 && drive < N_DRIVE) {
+		fdc = FDC(drive);
+		current_drive = drive;
+	}
+	if (fdc != 1 && fdc != 0) {
+		pr_info("bad fdc value\n");
+		return;
+	}
+	set_dor(fdc, ~0, 8);
+#if N_FDC > 1
+	set_dor(1 - fdc, ~8, 0);
+#endif
+	if (FDCS->rawcmd == 2)
+		reset_fdc_info(1);
+	if (fd_inb(FD_STATUS) != STATUS_READY)
+		FDCS->reset = 1;
+}
+
+/* locks the driver */
+static int lock_fdc(int drive, bool interruptible)
+{
+	if (WARN(atomic_read(&usage_count) == 0,
+		 "Trying to lock fdc while usage count=0\n"))
+		return -1;
+
+	if (wait_event_interruptible(fdc_wait, !test_and_set_bit(0, &fdc_busy)))
+		return -EINTR;
+
+	command_status = FD_COMMAND_NONE;
+
+	__reschedule_timeout(drive, "lock fdc");
+	set_fdc(drive);
+	return 0;
+}
+
+/* unlocks the driver */
+static void unlock_fdc(void)
+{
+	unsigned long flags;
+
+	raw_cmd = NULL;
+	if (!test_bit(0, &fdc_busy))
+		DPRINT("FDC access conflict!\n");
+
+	if (do_floppy)
+		DPRINT("device interrupt still active at FDC release: %pf!\n",
+		       do_floppy);
+	command_status = FD_COMMAND_NONE;
+	spin_lock_irqsave(&floppy_lock, flags);
+	del_timer(&fd_timeout);
+	cont = NULL;
+	clear_bit(0, &fdc_busy);
+	if (current_req || set_next_request())
+		do_fd_request(current_req->q);
+	spin_unlock_irqrestore(&floppy_lock, flags);
+	wake_up(&fdc_wait);
+}
+
+/* switches the motor off after a given timeout */
+static void motor_off_callback(unsigned long nr)
+{
+	unsigned char mask = ~(0x10 << UNIT(nr));
+
+	set_dor(FDC(nr), mask, 0);
+}
+
+/* schedules motor off */
+static void floppy_off(unsigned int drive)
+{
+	unsigned long volatile delta;
+	int fdc = FDC(drive);
+
+	if (!(FDCS->dor & (0x10 << UNIT(drive))))
+		return;
+
+	del_timer(motor_off_timer + drive);
+
+	/* make spindle stop in a position which minimizes spinup time
+	 * next time */
+	if (UDP->rps) {
+		delta = jiffies - UDRS->first_read_date + HZ -
+		    UDP->spindown_offset;
+		delta = ((delta * UDP->rps) % HZ) / UDP->rps;
+		motor_off_timer[drive].expires =
+		    jiffies + UDP->spindown - delta;
+	}
+	add_timer(motor_off_timer + drive);
+}
+
+/*
+ * cycle through all N_DRIVE floppy drives, for disk change testing.
+ * stopping at current drive. This is done before any long operation, to
+ * be sure to have up to date disk change information.
+ */
+static void scandrives(void)
+{
+	int i;
+	int drive;
+	int saved_drive;
+
+	if (DP->select_delay)
+		return;
+
+	saved_drive = current_drive;
+	for (i = 0; i < N_DRIVE; i++) {
+		drive = (saved_drive + i + 1) % N_DRIVE;
+		if (UDRS->fd_ref == 0 || UDP->select_delay != 0)
+			continue;	/* skip closed drives */
+		set_fdc(drive);
+		if (!(set_dor(fdc, ~3, UNIT(drive) | (0x10 << UNIT(drive))) &
+		      (0x10 << UNIT(drive))))
+			/* switch the motor off again, if it was off to
+			 * begin with */
+			set_dor(fdc, ~(0x10 << UNIT(drive)), 0);
+	}
+	set_fdc(saved_drive);
+}
+
+static void empty(void)
+{
+}
+
+static DECLARE_WORK(floppy_work, NULL);
+
+static void schedule_bh(void (*handler)(void))
+{
+	PREPARE_WORK(&floppy_work, (work_func_t)handler);
+	schedule_work(&floppy_work);
+}
+
+static DEFINE_TIMER(fd_timer, NULL, 0, 0);
+
+static void cancel_activity(void)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&floppy_lock, flags);
+	do_floppy = NULL;
+	PREPARE_WORK(&floppy_work, (work_func_t)empty);
+	del_timer(&fd_timer);
+	spin_unlock_irqrestore(&floppy_lock, flags);
+}
+
+/* this function makes sure that the disk stays in the drive during the
+ * transfer */
+static void fd_watchdog(void)
+{
+	debug_dcl(DP->flags, "calling disk change from watchdog\n");
+
+	if (disk_change(current_drive)) {
+		DPRINT("disk removed during i/o\n");
+		cancel_activity();
+		cont->done(0);
+		reset_fdc();
+	} else {
+		del_timer(&fd_timer);
+		fd_timer.function = (timeout_fn)fd_watchdog;
+		fd_timer.expires = jiffies + HZ / 10;
+		add_timer(&fd_timer);
+	}
+}
+
+static void main_command_interrupt(void)
+{
+	del_timer(&fd_timer);
+	cont->interrupt();
+}
+
+/* waits for a delay (spinup or select) to pass */
+static int fd_wait_for_completion(unsigned long delay, timeout_fn function)
+{
+	if (FDCS->reset) {
+		reset_fdc();	/* do the reset during sleep to win time
+				 * if we don't need to sleep, it's a good
+				 * occasion anyways */
+		return 1;
+	}
+
+	if (time_before(jiffies, delay)) {
+		del_timer(&fd_timer);
+		fd_timer.function = function;
+		fd_timer.expires = delay;
+		add_timer(&fd_timer);
+		return 1;
+	}
+	return 0;
+}
+
+static DEFINE_SPINLOCK(floppy_hlt_lock);
+static int hlt_disabled;
+static void floppy_disable_hlt(void)
+{
+	unsigned long flags;
+
+	WARN_ONCE(1, "floppy_disable_hlt() scheduled for removal in 2012");
+	spin_lock_irqsave(&floppy_hlt_lock, flags);
+	if (!hlt_disabled) {
+		hlt_disabled = 1;
+#ifdef HAVE_DISABLE_HLT
+		disable_hlt();
+#endif
+	}
+	spin_unlock_irqrestore(&floppy_hlt_lock, flags);
+}
+
+static void floppy_enable_hlt(void)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&floppy_hlt_lock, flags);
+	if (hlt_disabled) {
+		hlt_disabled = 0;
+#ifdef HAVE_DISABLE_HLT
+		enable_hlt();
+#endif
+	}
+	spin_unlock_irqrestore(&floppy_hlt_lock, flags);
+}
+
+static void setup_DMA(void)
+{
+	unsigned long f;
+
+	if (raw_cmd->length == 0) {
+		int i;
+
+		pr_info("zero dma transfer size:");
+		for (i = 0; i < raw_cmd->cmd_count; i++)
+			pr_cont("%x,", raw_cmd->cmd[i]);
+		pr_cont("\n");
+		cont->done(0);
+		FDCS->reset = 1;
+		return;
+	}
+	if (((unsigned long)raw_cmd->kernel_data) % 512) {
+		pr_info("non aligned address: %p\n", raw_cmd->kernel_data);
+		cont->done(0);
+		FDCS->reset = 1;
+		return;
+	}
+	f = claim_dma_lock();
+	fd_disable_dma();
+#ifdef fd_dma_setup
+	if (fd_dma_setup(raw_cmd->kernel_data, raw_cmd->length,
+			 (raw_cmd->flags & FD_RAW_READ) ?
+			 DMA_MODE_READ : DMA_MODE_WRITE, FDCS->address) < 0) {
+		release_dma_lock(f);
+		cont->done(0);
+		FDCS->reset = 1;
+		return;
+	}
+	release_dma_lock(f);
+#else
+	fd_clear_dma_ff();
+	fd_cacheflush(raw_cmd->kernel_data, raw_cmd->length);
+	fd_set_dma_mode((raw_cmd->flags & FD_RAW_READ) ?
+			DMA_MODE_READ : DMA_MODE_WRITE);
+	fd_set_dma_addr(raw_cmd->kernel_data);
+	fd_set_dma_count(raw_cmd->length);
+	virtual_dma_port = FDCS->address;
+	fd_enable_dma();
+	release_dma_lock(f);
+#endif
+	floppy_disable_hlt();
+}
+
+static void show_floppy(void);
+
+/* waits until the fdc becomes ready */
+static int wait_til_ready(void)
+{
+	int status;
+	int counter;
+
+	if (FDCS->reset)
+		return -1;
+	for (counter = 0; counter < 10000; counter++) {
+		status = fd_inb(FD_STATUS);
+		if (status & STATUS_READY)
+			return status;
+	}
+	if (initialized) {
+		DPRINT("Getstatus times out (%x) on fdc %d\n", status, fdc);
+		show_floppy();
+	}
+	FDCS->reset = 1;
+	return -1;
+}
+
+/* sends a command byte to the fdc */
+static int output_byte(char byte)
+{
+	int status = wait_til_ready();
+
+	if (status < 0)
+		return -1;
+
+	if (is_ready_state(status)) {
+		fd_outb(byte, FD_DATA);
+		output_log[output_log_pos].data = byte;
+		output_log[output_log_pos].status = status;
+		output_log[output_log_pos].jiffies = jiffies;
+		output_log_pos = (output_log_pos + 1) % OLOGSIZE;
+		return 0;
+	}
+	FDCS->reset = 1;
+	if (initialized) {
+		DPRINT("Unable to send byte %x to FDC. Fdc=%x Status=%x\n",
+		       byte, fdc, status);
+		show_floppy();
+	}
+	return -1;
+}
+
+/* gets the response from the fdc */
+static int result(void)
+{
+	int i;
+	int status = 0;
+
+	for (i = 0; i < MAX_REPLIES; i++) {
+		status = wait_til_ready();
+		if (status < 0)
+			break;
+		status &= STATUS_DIR | STATUS_READY | STATUS_BUSY | STATUS_DMA;
+		if ((status & ~STATUS_BUSY) == STATUS_READY) {
+			resultjiffies = jiffies;
+			resultsize = i;
+			return i;
+		}
+		if (status == (STATUS_DIR | STATUS_READY | STATUS_BUSY))
+			reply_buffer[i] = fd_inb(FD_DATA);
+		else
+			break;
+	}
+	if (initialized) {
+		DPRINT("get result error. Fdc=%d Last status=%x Read bytes=%d\n",
+		       fdc, status, i);
+		show_floppy();
+	}
+	FDCS->reset = 1;
+	return -1;
+}
+
+#define MORE_OUTPUT -2
+/* does the fdc need more output? */
+static int need_more_output(void)
+{
+	int status = wait_til_ready();
+
+	if (status < 0)
+		return -1;
+
+	if (is_ready_state(status))
+		return MORE_OUTPUT;
+
+	return result();
+}
+
+/* Set perpendicular mode as required, based on data rate, if supported.
+ * 82077 Now tested. 1Mbps data rate only possible with 82077-1.
+ */
+static void perpendicular_mode(void)
+{
+	unsigned char perp_mode;
+
+	if (raw_cmd->rate & 0x40) {
+		switch (raw_cmd->rate & 3) {
+		case 0:
+			perp_mode = 2;
+			break;
+		case 3:
+			perp_mode = 3;
+			break;
+		default:
+			DPRINT("Invalid data rate for perpendicular mode!\n");
+			cont->done(0);
+			FDCS->reset = 1;
+					/*
+					 * convenient way to return to
+					 * redo without too much hassle
+					 * (deep stack et al.)
+					 */
+			return;
+		}
+	} else
+		perp_mode = 0;
+
+	if (FDCS->perp_mode == perp_mode)
+		return;
+	if (FDCS->version >= FDC_82077_ORIG) {
+		output_byte(FD_PERPENDICULAR);
+		output_byte(perp_mode);
+		FDCS->perp_mode = perp_mode;
+	} else if (perp_mode) {
+		DPRINT("perpendicular mode not supported by this FDC.\n");
+	}
+}				/* perpendicular_mode */
+
+static int fifo_depth = 0xa;
+static int no_fifo;
+
+static int fdc_configure(void)
+{
+	/* Turn on FIFO */
+	output_byte(FD_CONFIGURE);
+	if (need_more_output() != MORE_OUTPUT)
+		return 0;
+	output_byte(0);
+	output_byte(0x10 | (no_fifo & 0x20) | (fifo_depth & 0xf));
+	output_byte(0);		/* pre-compensation from track
+				   0 upwards */
+	return 1;
+}
+
+#define NOMINAL_DTR 500
+
+/* Issue a "SPECIFY" command to set the step rate time, head unload time,
+ * head load time, and DMA disable flag to values needed by floppy.
+ *
+ * The value "dtr" is the data transfer rate in Kbps.  It is needed
+ * to account for the data rate-based scaling done by the 82072 and 82077
+ * FDC types.  This parameter is ignored for other types of FDCs (i.e.
+ * 8272a).
+ *
+ * Note that changing the data transfer rate has a (probably deleterious)
+ * effect on the parameters subject to scaling for 82072/82077 FDCs, so
+ * fdc_specify is called again after each data transfer rate
+ * change.
+ *
+ * srt: 1000 to 16000 in microseconds
+ * hut: 16 to 240 milliseconds
+ * hlt: 2 to 254 milliseconds
+ *
+ * These values are rounded up to the next highest available delay time.
+ */
+static void fdc_specify(void)
+{
+	unsigned char spec1;
+	unsigned char spec2;
+	unsigned long srt;
+	unsigned long hlt;
+	unsigned long hut;
+	unsigned long dtr = NOMINAL_DTR;
+	unsigned long scale_dtr = NOMINAL_DTR;
+	int hlt_max_code = 0x7f;
+	int hut_max_code = 0xf;
+
+	if (FDCS->need_configure && FDCS->version >= FDC_82072A) {
+		fdc_configure();
+		FDCS->need_configure = 0;
+	}
+
+	switch (raw_cmd->rate & 0x03) {
+	case 3:
+		dtr = 1000;
+		break;
+	case 1:
+		dtr = 300;
+		if (FDCS->version >= FDC_82078) {
+			/* chose the default rate table, not the one
+			 * where 1 = 2 Mbps */
+			output_byte(FD_DRIVESPEC);
+			if (need_more_output() == MORE_OUTPUT) {
+				output_byte(UNIT(current_drive));
+				output_byte(0xc0);
+			}
+		}
+		break;
+	case 2:
+		dtr = 250;
+		break;
+	}
+
+	if (FDCS->version >= FDC_82072) {
+		scale_dtr = dtr;
+		hlt_max_code = 0x00;	/* 0==256msec*dtr0/dtr (not linear!) */
+		hut_max_code = 0x0;	/* 0==256msec*dtr0/dtr (not linear!) */
+	}
+
+	/* Convert step rate from microseconds to milliseconds and 4 bits */
+	srt = 16 - DIV_ROUND_UP(DP->srt * scale_dtr / 1000, NOMINAL_DTR);
+	if (slow_floppy)
+		srt = srt / 4;
+
+	SUPBOUND(srt, 0xf);
+	INFBOUND(srt, 0);
+
+	hlt = DIV_ROUND_UP(DP->hlt * scale_dtr / 2, NOMINAL_DTR);
+	if (hlt < 0x01)
+		hlt = 0x01;
+	else if (hlt > 0x7f)
+		hlt = hlt_max_code;
+
+	hut = DIV_ROUND_UP(DP->hut * scale_dtr / 16, NOMINAL_DTR);
+	if (hut < 0x1)
+		hut = 0x1;
+	else if (hut > 0xf)
+		hut = hut_max_code;
+
+	spec1 = (srt << 4) | hut;
+	spec2 = (hlt << 1) | (use_virtual_dma & 1);
+
+	/* If these parameters did not change, just return with success */
+	if (FDCS->spec1 != spec1 || FDCS->spec2 != spec2) {
+		/* Go ahead and set spec1 and spec2 */
+		output_byte(FD_SPECIFY);
+		output_byte(FDCS->spec1 = spec1);
+		output_byte(FDCS->spec2 = spec2);
+	}
+}				/* fdc_specify */
+
+/* Set the FDC's data transfer rate on behalf of the specified drive.
+ * NOTE: with 82072/82077 FDCs, changing the data rate requires a reissue
+ * of the specify command (i.e. using the fdc_specify function).
+ */
+static int fdc_dtr(void)
+{
+	/* If data rate not already set to desired value, set it. */
+	if ((raw_cmd->rate & 3) == FDCS->dtr)
+		return 0;
+
+	/* Set dtr */
+	fd_outb(raw_cmd->rate & 3, FD_DCR);
+
+	/* TODO: some FDC/drive combinations (C&T 82C711 with TEAC 1.2MB)
+	 * need a stabilization period of several milliseconds to be
+	 * enforced after data rate changes before R/W operations.
+	 * Pause 5 msec to avoid trouble. (Needs to be 2 jiffies)
+	 */
+	FDCS->dtr = raw_cmd->rate & 3;
+	return fd_wait_for_completion(jiffies + 2UL * HZ / 100,
+				      (timeout_fn)floppy_ready);
+}				/* fdc_dtr */
+
+static void tell_sector(void)
+{
+	pr_cont(": track %d, head %d, sector %d, size %d",
+		R_TRACK, R_HEAD, R_SECTOR, R_SIZECODE);
+}				/* tell_sector */
+
+static void print_errors(void)
+{
+	DPRINT("");
+	if (ST0 & ST0_ECE) {
+		pr_cont("Recalibrate failed!");
+	} else if (ST2 & ST2_CRC) {
+		pr_cont("data CRC error");
+		tell_sector();
+	} else if (ST1 & ST1_CRC) {
+		pr_cont("CRC error");
+		tell_sector();
+	} else if ((ST1 & (ST1_MAM | ST1_ND)) ||
+		   (ST2 & ST2_MAM)) {
+		if (!probing) {
+			pr_cont("sector not found");
+			tell_sector();
+		} else
+			pr_cont("probe failed...");
+	} else if (ST2 & ST2_WC) {	/* seek error */
+		pr_cont("wrong cylinder");
+	} else if (ST2 & ST2_BC) {	/* cylinder marked as bad */
+		pr_cont("bad cylinder");
+	} else {
+		pr_cont("unknown error. ST[0..2] are: 0x%x 0x%x 0x%x",
+			ST0, ST1, ST2);
+		tell_sector();
+	}
+	pr_cont("\n");
+}
+
+/*
+ * OK, this error interpreting routine is called after a
+ * DMA read/write has succeeded
+ * or failed, so we check the results, and copy any buffers.
+ * hhb: Added better error reporting.
+ * ak: Made this into a separate routine.
+ */
+static int interpret_errors(void)
+{
+	char bad;
+
+	if (inr != 7) {
+		DPRINT("-- FDC reply error\n");
+		FDCS->reset = 1;
+		return 1;
+	}
+
+	/* check IC to find cause of interrupt */
+	switch (ST0 & ST0_INTR) {
+	case 0x40:		/* error occurred during command execution */
+		if (ST1 & ST1_EOC)
+			return 0;	/* occurs with pseudo-DMA */
+		bad = 1;
+		if (ST1 & ST1_WP) {
+			DPRINT("Drive is write protected\n");
+			clear_bit(FD_DISK_WRITABLE_BIT, &DRS->flags);
+			cont->done(0);
+			bad = 2;
+		} else if (ST1 & ST1_ND) {
+			set_bit(FD_NEED_TWADDLE_BIT, &DRS->flags);
+		} else if (ST1 & ST1_OR) {
+			if (DP->flags & FTD_MSG)
+				DPRINT("Over/Underrun - retrying\n");
+			bad = 0;
+		} else if (*errors >= DP->max_errors.reporting) {
+			print_errors();
+		}
+		if (ST2 & ST2_WC || ST2 & ST2_BC)
+			/* wrong cylinder => recal */
+			DRS->track = NEED_2_RECAL;
+		return bad;
+	case 0x80:		/* invalid command given */
+		DPRINT("Invalid FDC command given!\n");
+		cont->done(0);
+		return 2;
+	case 0xc0:
+		DPRINT("Abnormal termination caused by polling\n");
+		cont->error();
+		return 2;
+	default:		/* (0) Normal command termination */
+		return 0;
+	}
+}
+
+/*
+ * This routine is called when everything should be correctly set up
+ * for the transfer (i.e. floppy motor is on, the correct floppy is
+ * selected, and the head is sitting on the right track).
+ */
+static void setup_rw_floppy(void)
+{
+	int i;
+	int r;
+	int flags;
+	int dflags;
+	unsigned long ready_date;
+	timeout_fn function;
+
+	flags = raw_cmd->flags;
+	if (flags & (FD_RAW_READ | FD_RAW_WRITE))
+		flags |= FD_RAW_INTR;
+
+	if ((flags & FD_RAW_SPIN) && !(flags & FD_RAW_NO_MOTOR)) {
+		ready_date = DRS->spinup_date + DP->spinup;
+		/* If spinup will take a long time, rerun scandrives
+		 * again just before spinup completion. Beware that
+		 * after scandrives, we must again wait for selection.
+		 */
+		if (time_after(ready_date, jiffies + DP->select_delay)) {
+			ready_date -= DP->select_delay;
+			function = (timeout_fn)floppy_start;
+		} else
+			function = (timeout_fn)setup_rw_floppy;
+
+		/* wait until the floppy is spinning fast enough */
+		if (fd_wait_for_completion(ready_date, function))
+			return;
+	}
+	dflags = DRS->flags;
+
+	if ((flags & FD_RAW_READ) || (flags & FD_RAW_WRITE))
+		setup_DMA();
+
+	if (flags & FD_RAW_INTR)
+		do_floppy = main_command_interrupt;
+
+	r = 0;
+	for (i = 0; i < raw_cmd->cmd_count; i++)
+		r |= output_byte(raw_cmd->cmd[i]);
+
+	debugt(__func__, "rw_command");
+
+	if (r) {
+		cont->error();
+		reset_fdc();
+		return;
+	}
+
+	if (!(flags & FD_RAW_INTR)) {
+		inr = result();
+		cont->interrupt();
+	} else if (flags & FD_RAW_NEED_DISK)
+		fd_watchdog();
+}
+
+static int blind_seek;
+
+/*
+ * This is the routine called after every seek (or recalibrate) interrupt
+ * from the floppy controller.
+ */
+static void seek_interrupt(void)
+{
+	debugt(__func__, "");
+	if (inr != 2 || (ST0 & 0xF8) != 0x20) {
+		DPRINT("seek failed\n");
+		DRS->track = NEED_2_RECAL;
+		cont->error();
+		cont->redo();
+		return;
+	}
+	if (DRS->track >= 0 && DRS->track != ST1 && !blind_seek) {
+		debug_dcl(DP->flags,
+			  "clearing NEWCHANGE flag because of effective seek\n");
+		debug_dcl(DP->flags, "jiffies=%lu\n", jiffies);
+		clear_bit(FD_DISK_NEWCHANGE_BIT, &DRS->flags);
+					/* effective seek */
+		DRS->select_date = jiffies;
+	}
+	DRS->track = ST1;
+	floppy_ready();
+}
+
+static void check_wp(void)
+{
+	if (test_bit(FD_VERIFY_BIT, &DRS->flags)) {
+					/* check write protection */
+		output_byte(FD_GETSTATUS);
+		output_byte(UNIT(current_drive));
+		if (result() != 1) {
+			FDCS->reset = 1;
+			return;
+		}
+		clear_bit(FD_VERIFY_BIT, &DRS->flags);
+		clear_bit(FD_NEED_TWADDLE_BIT, &DRS->flags);
+		debug_dcl(DP->flags,
+			  "checking whether disk is write protected\n");
+		debug_dcl(DP->flags, "wp=%x\n", ST3 & 0x40);
+		if (!(ST3 & 0x40))
+			set_bit(FD_DISK_WRITABLE_BIT, &DRS->flags);
+		else
+			clear_bit(FD_DISK_WRITABLE_BIT, &DRS->flags);
+	}
+}
+
+static void seek_floppy(void)
+{
+	int track;
+
+	blind_seek = 0;
+
+	debug_dcl(DP->flags, "calling disk change from %s\n", __func__);
+
+	if (!test_bit(FD_DISK_NEWCHANGE_BIT, &DRS->flags) &&
+	    disk_change(current_drive) && (raw_cmd->flags & FD_RAW_NEED_DISK)) {
+		/* the media changed flag should be cleared after the seek.
+		 * If it isn't, this means that there is really no disk in
+		 * the drive.
+		 */
+		set_bit(FD_DISK_CHANGED_BIT, &DRS->flags);
+		cont->done(0);
+		cont->redo();
+		return;
+	}
+	if (DRS->track <= NEED_1_RECAL) {
+		recalibrate_floppy();
+		return;
+	} else if (test_bit(FD_DISK_NEWCHANGE_BIT, &DRS->flags) &&
+		   (raw_cmd->flags & FD_RAW_NEED_DISK) &&
+		   (DRS->track <= NO_TRACK || DRS->track == raw_cmd->track)) {
+		/* we seek to clear the media-changed condition. Does anybody
+		 * know a more elegant way, which works on all drives? */
+		if (raw_cmd->track)
+			track = raw_cmd->track - 1;
+		else {
+			if (DP->flags & FD_SILENT_DCL_CLEAR) {
+				set_dor(fdc, ~(0x10 << UNIT(current_drive)), 0);
+				blind_seek = 1;
+				raw_cmd->flags |= FD_RAW_NEED_SEEK;
+			}
+			track = 1;
+		}
+	} else {
+		check_wp();
+		if (raw_cmd->track != DRS->track &&
+		    (raw_cmd->flags & FD_RAW_NEED_SEEK))
+			track = raw_cmd->track;
+		else {
+			setup_rw_floppy();
+			return;
+		}
+	}
+
+	do_floppy = seek_interrupt;
+	output_byte(FD_SEEK);
+	output_byte(UNIT(current_drive));
+	if (output_byte(track) < 0) {
+		reset_fdc();
+		return;
+	}
+	debugt(__func__, "");
+}
+
+static void recal_interrupt(void)
+{
+	debugt(__func__, "");
+	if (inr != 2)
+		FDCS->reset = 1;
+	else if (ST0 & ST0_ECE) {
+		switch (DRS->track) {
+		case NEED_1_RECAL:
+			debugt(__func__, "need 1 recal");
+			/* after a second recalibrate, we still haven't
+			 * reached track 0. Probably no drive. Raise an
+			 * error, as failing immediately might upset
+			 * computers possessed by the Devil :-) */
+			cont->error();
+			cont->redo();
+			return;
+		case NEED_2_RECAL:
+			debugt(__func__, "need 2 recal");
+			/* If we already did a recalibrate,
+			 * and we are not at track 0, this
+			 * means we have moved. (The only way
+			 * not to move at recalibration is to
+			 * be already at track 0.) Clear the
+			 * new change flag */
+			debug_dcl(DP->flags,
+				  "clearing NEWCHANGE flag because of second recalibrate\n");
+
+			clear_bit(FD_DISK_NEWCHANGE_BIT, &DRS->flags);
+			DRS->select_date = jiffies;
+			/* fall through */
+		default:
+			debugt(__func__, "default");
+			/* Recalibrate moves the head by at
+			 * most 80 steps. If after one
+			 * recalibrate we don't have reached
+			 * track 0, this might mean that we
+			 * started beyond track 80.  Try
+			 * again.  */
+			DRS->track = NEED_1_RECAL;
+			break;
+		}
+	} else
+		DRS->track = ST1;
+	floppy_ready();
+}
+
+static void print_result(char *message, int inr)
+{
+	int i;
+
+	DPRINT("%s ", message);
+	if (inr >= 0)
+		for (i = 0; i < inr; i++)
+			pr_cont("repl[%d]=%x ", i, reply_buffer[i]);
+	pr_cont("\n");
+}
+
+/* interrupt handler. Note that this can be called externally on the Sparc */
+irqreturn_t floppy_interrupt(int irq, void *dev_id)
+{
+	int do_print;
+	unsigned long f;
+	void (*handler)(void) = do_floppy;
+
+	lasthandler = handler;
+	interruptjiffies = jiffies;
+
+	f = claim_dma_lock();
+	fd_disable_dma();
+	release_dma_lock(f);
+
+	floppy_enable_hlt();
+	do_floppy = NULL;
+	if (fdc >= N_FDC || FDCS->address == -1) {
+		/* we don't even know which FDC is the culprit */
+		pr_info("DOR0=%x\n", fdc_state[0].dor);
+		pr_info("floppy interrupt on bizarre fdc %d\n", fdc);
+		pr_info("handler=%pf\n", handler);
+		is_alive(__func__, "bizarre fdc");
+		return IRQ_NONE;
+	}
+
+	FDCS->reset = 0;
+	/* We have to clear the reset flag here, because apparently on boxes
+	 * with level triggered interrupts (PS/2, Sparc, ...), it is needed to
+	 * emit SENSEI's to clear the interrupt line. And FDCS->reset blocks the
+	 * emission of the SENSEI's.
+	 * It is OK to emit floppy commands because we are in an interrupt
+	 * handler here, and thus we have to fear no interference of other
+	 * activity.
+	 */
+
+	do_print = !handler && print_unex && initialized;
+
+	inr = result();
+	if (do_print)
+		print_result("unexpected interrupt", inr);
+	if (inr == 0) {
+		int max_sensei = 4;
+		do {
+			output_byte(FD_SENSEI);
+			inr = result();
+			if (do_print)
+				print_result("sensei", inr);
+			max_sensei--;
+		} while ((ST0 & 0x83) != UNIT(current_drive) &&
+			 inr == 2 && max_sensei);
+	}
+	if (!handler) {
+		FDCS->reset = 1;
+		return IRQ_NONE;
+	}
+	schedule_bh(handler);
+	is_alive(__func__, "normal interrupt end");
+
+	/* FIXME! Was it really for us? */
+	return IRQ_HANDLED;
+}
+
+static void recalibrate_floppy(void)
+{
+	debugt(__func__, "");
+	do_floppy = recal_interrupt;
+	output_byte(FD_RECALIBRATE);
+	if (output_byte(UNIT(current_drive)) < 0)
+		reset_fdc();
+}
+
+/*
+ * Must do 4 FD_SENSEIs after reset because of ``drive polling''.
+ */
+static void reset_interrupt(void)
+{
+	debugt(__func__, "");
+	result();		/* get the status ready for set_fdc */
+	if (FDCS->reset) {
+		pr_info("reset set in interrupt, calling %pf\n", cont->error);
+		cont->error();	/* a reset just after a reset. BAD! */
+	}
+	cont->redo();
+}
+
+/*
+ * reset is done by pulling bit 2 of DOR low for a while (old FDCs),
+ * or by setting the self clearing bit 7 of STATUS (newer FDCs)
+ */
+static void reset_fdc(void)
+{
+	unsigned long flags;
+
+	do_floppy = reset_interrupt;
+	FDCS->reset = 0;
+	reset_fdc_info(0);
+
+	/* Pseudo-DMA may intercept 'reset finished' interrupt.  */
+	/* Irrelevant for systems with true DMA (i386).          */
+
+	flags = claim_dma_lock();
+	fd_disable_dma();
+	release_dma_lock(flags);
+
+	if (FDCS->version >= FDC_82072A)
+		fd_outb(0x80 | (FDCS->dtr & 3), FD_STATUS);
+	else {
+		fd_outb(FDCS->dor & ~0x04, FD_DOR);
+		udelay(FD_RESET_DELAY);
+		fd_outb(FDCS->dor, FD_DOR);
+	}
+}
+
+static void show_floppy(void)
+{
+	int i;
+
+	pr_info("\n");
+	pr_info("floppy driver state\n");
+	pr_info("-------------------\n");
+	pr_info("now=%lu last interrupt=%lu diff=%lu last called handler=%pf\n",
+		jiffies, interruptjiffies, jiffies - interruptjiffies,
+		lasthandler);
+
+	pr_info("timeout_message=%s\n", timeout_message);
+	pr_info("last output bytes:\n");
+	for (i = 0; i < OLOGSIZE; i++)
+		pr_info("%2x %2x %lu\n",
+			output_log[(i + output_log_pos) % OLOGSIZE].data,
+			output_log[(i + output_log_pos) % OLOGSIZE].status,
+			output_log[(i + output_log_pos) % OLOGSIZE].jiffies);
+	pr_info("last result at %lu\n", resultjiffies);
+	pr_info("last redo_fd_request at %lu\n", lastredo);
+	print_hex_dump(KERN_INFO, "", DUMP_PREFIX_NONE, 16, 1,
+		       reply_buffer, resultsize, true);
+
+	pr_info("status=%x\n", fd_inb(FD_STATUS));
+	pr_info("fdc_busy=%lu\n", fdc_busy);
+	if (do_floppy)
+		pr_info("do_floppy=%pf\n", do_floppy);
+	if (work_pending(&floppy_work))
+		pr_info("floppy_work.func=%pf\n", floppy_work.func);
+	if (timer_pending(&fd_timer))
+		pr_info("fd_timer.function=%pf\n", fd_timer.function);
+	if (timer_pending(&fd_timeout)) {
+		pr_info("timer_function=%pf\n", fd_timeout.function);
+		pr_info("expires=%lu\n", fd_timeout.expires - jiffies);
+		pr_info("now=%lu\n", jiffies);
+	}
+	pr_info("cont=%p\n", cont);
+	pr_info("current_req=%p\n", current_req);
+	pr_info("command_status=%d\n", command_status);
+	pr_info("\n");
+}
+
+static void floppy_shutdown(unsigned long data)
+{
+	unsigned long flags;
+
+	if (initialized)
+		show_floppy();
+	cancel_activity();
+
+	floppy_enable_hlt();
+
+	flags = claim_dma_lock();
+	fd_disable_dma();
+	release_dma_lock(flags);
+
+	/* avoid dma going to a random drive after shutdown */
+
+	if (initialized)
+		DPRINT("floppy timeout called\n");
+	FDCS->reset = 1;
+	if (cont) {
+		cont->done(0);
+		cont->redo();	/* this will recall reset when needed */
+	} else {
+		pr_info("no cont in shutdown!\n");
+		process_fd_request();
+	}
+	is_alive(__func__, "");
+}
+
+/* start motor, check media-changed condition and write protection */
+static int start_motor(void (*function)(void))
+{
+	int mask;
+	int data;
+
+	mask = 0xfc;
+	data = UNIT(current_drive);
+	if (!(raw_cmd->flags & FD_RAW_NO_MOTOR)) {
+		if (!(FDCS->dor & (0x10 << UNIT(current_drive)))) {
+			set_debugt();
+			/* no read since this drive is running */
+			DRS->first_read_date = 0;
+			/* note motor start time if motor is not yet running */
+			DRS->spinup_date = jiffies;
+			data |= (0x10 << UNIT(current_drive));
+		}
+	} else if (FDCS->dor & (0x10 << UNIT(current_drive)))
+		mask &= ~(0x10 << UNIT(current_drive));
+
+	/* starts motor and selects floppy */
+	del_timer(motor_off_timer + current_drive);
+	set_dor(fdc, mask, data);
+
+	/* wait_for_completion also schedules reset if needed. */
+	return fd_wait_for_completion(DRS->select_date + DP->select_delay,
+				      (timeout_fn)function);
+}
+
+static void floppy_ready(void)
+{
+	if (FDCS->reset) {
+		reset_fdc();
+		return;
+	}
+	if (start_motor(floppy_ready))
+		return;
+	if (fdc_dtr())
+		return;
+
+	debug_dcl(DP->flags, "calling disk change from floppy_ready\n");
+	if (!(raw_cmd->flags & FD_RAW_NO_MOTOR) &&
+	    disk_change(current_drive) && !DP->select_delay)
+		twaddle();	/* this clears the dcl on certain
+				 * drive/controller combinations */
+
+#ifdef fd_chose_dma_mode
+	if ((raw_cmd->flags & FD_RAW_READ) || (raw_cmd->flags & FD_RAW_WRITE)) {
+		unsigned long flags = claim_dma_lock();
+		fd_chose_dma_mode(raw_cmd->kernel_data, raw_cmd->length);
+		release_dma_lock(flags);
+	}
+#endif
+
+	if (raw_cmd->flags & (FD_RAW_NEED_SEEK | FD_RAW_NEED_DISK)) {
+		perpendicular_mode();
+		fdc_specify();	/* must be done here because of hut, hlt ... */
+		seek_floppy();
+	} else {
+		if ((raw_cmd->flags & FD_RAW_READ) ||
+		    (raw_cmd->flags & FD_RAW_WRITE))
+			fdc_specify();
+		setup_rw_floppy();
+	}
+}
+
+static void floppy_start(void)
+{
+	reschedule_timeout(current_reqD, "floppy start");
+
+	scandrives();
+	debug_dcl(DP->flags, "setting NEWCHANGE in floppy_start\n");
+	set_bit(FD_DISK_NEWCHANGE_BIT, &DRS->flags);
+	floppy_ready();
+}
+
+/*
+ * ========================================================================
+ * here ends the bottom half. Exported routines are:
+ * floppy_start, floppy_off, floppy_ready, lock_fdc, unlock_fdc, set_fdc,
+ * start_motor, reset_fdc, reset_fdc_info, interpret_errors.
+ * Initialization also uses output_byte, result, set_dor, floppy_interrupt
+ * and set_dor.
+ * ========================================================================
+ */
+/*
+ * General purpose continuations.
+ * ==============================
+ */
+
+static void do_wakeup(void)
+{
+	reschedule_timeout(MAXTIMEOUT, "do wakeup");
+	cont = NULL;
+	command_status += 2;
+	wake_up(&command_done);
+}
+
+static const struct cont_t wakeup_cont = {
+	.interrupt	= empty,
+	.redo		= do_wakeup,
+	.error		= empty,
+	.done		= (done_f)empty
+};
+
+static const struct cont_t intr_cont = {
+	.interrupt	= empty,
+	.redo		= process_fd_request,
+	.error		= empty,
+	.done		= (done_f)empty
+};
+
+static int wait_til_done(void (*handler)(void), bool interruptible)
+{
+	int ret;
+
+	schedule_bh(handler);
+
+	if (interruptible)
+		wait_event_interruptible(command_done, command_status >= 2);
+	else
+		wait_event(command_done, command_status >= 2);
+
+	if (command_status < 2) {
+		cancel_activity();
+		cont = &intr_cont;
+		reset_fdc();
+		return -EINTR;
+	}
+
+	if (FDCS->reset)
+		command_status = FD_COMMAND_ERROR;
+	if (command_status == FD_COMMAND_OKAY)
+		ret = 0;
+	else
+		ret = -EIO;
+	command_status = FD_COMMAND_NONE;
+	return ret;
+}
+
+static void generic_done(int result)
+{
+	command_status = result;
+	cont = &wakeup_cont;
+}
+
+static void generic_success(void)
+{
+	cont->done(1);
+}
+
+static void generic_failure(void)
+{
+	cont->done(0);
+}
+
+static void success_and_wakeup(void)
+{
+	generic_success();
+	cont->redo();
+}
+
+/*
+ * formatting and rw support.
+ * ==========================
+ */
+
+static int next_valid_format(void)
+{
+	int probed_format;
+
+	probed_format = DRS->probed_format;
+	while (1) {
+		if (probed_format >= 8 || !DP->autodetect[probed_format]) {
+			DRS->probed_format = 0;
+			return 1;
+		}
+		if (floppy_type[DP->autodetect[probed_format]].sect) {
+			DRS->probed_format = probed_format;
+			return 0;
+		}
+		probed_format++;
+	}
+}
+
+static void bad_flp_intr(void)
+{
+	int err_count;
+
+	if (probing) {
+		DRS->probed_format++;
+		if (!next_valid_format())
+			return;
+	}
+	err_count = ++(*errors);
+	INFBOUND(DRWE->badness, err_count);
+	if (err_count > DP->max_errors.abort)
+		cont->done(0);
+	if (err_count > DP->max_errors.reset)
+		FDCS->reset = 1;
+	else if (err_count > DP->max_errors.recal)
+		DRS->track = NEED_2_RECAL;
+}
+
+static void set_floppy(int drive)
+{
+	int type = ITYPE(UDRS->fd_device);
+
+	if (type)
+		_floppy = floppy_type + type;
+	else
+		_floppy = current_type[drive];
+}
+
+/*
+ * formatting support.
+ * ===================
+ */
+static void format_interrupt(void)
+{
+	switch (interpret_errors()) {
+	case 1:
+		cont->error();
+	case 2:
+		break;
+	case 0:
+		cont->done(1);
+	}
+	cont->redo();
+}
+
+#define FM_MODE(x, y) ((y) & ~(((x)->rate & 0x80) >> 1))
+#define CT(x) ((x) | 0xc0)
+
+static void setup_format_params(int track)
+{
+	int n;
+	int il;
+	int count;
+	int head_shift;
+	int track_shift;
+	struct fparm {
+		unsigned char track, head, sect, size;
+	} *here = (struct fparm *)floppy_track_buffer;
+
+	raw_cmd = &default_raw_cmd;
+	raw_cmd->track = track;
+
+	raw_cmd->flags = (FD_RAW_WRITE | FD_RAW_INTR | FD_RAW_SPIN |
+			  FD_RAW_NEED_DISK | FD_RAW_NEED_SEEK);
+	raw_cmd->rate = _floppy->rate & 0x43;
+	raw_cmd->cmd_count = NR_F;
+	COMMAND = FM_MODE(_floppy, FD_FORMAT);
+	DR_SELECT = UNIT(current_drive) + PH_HEAD(_floppy, format_req.head);
+	F_SIZECODE = FD_SIZECODE(_floppy);
+	F_SECT_PER_TRACK = _floppy->sect << 2 >> F_SIZECODE;
+	F_GAP = _floppy->fmt_gap;
+	F_FILL = FD_FILL_BYTE;
+
+	raw_cmd->kernel_data = floppy_track_buffer;
+	raw_cmd->length = 4 * F_SECT_PER_TRACK;
+
+	/* allow for about 30ms for data transport per track */
+	head_shift = (F_SECT_PER_TRACK + 5) / 6;
+
+	/* a ``cylinder'' is two tracks plus a little stepping time */
+	track_shift = 2 * head_shift + 3;
+
+	/* position of logical sector 1 on this track */
+	n = (track_shift * format_req.track + head_shift * format_req.head)
+	    % F_SECT_PER_TRACK;
+
+	/* determine interleave */
+	il = 1;
+	if (_floppy->fmt_gap < 0x22)
+		il++;
+
+	/* initialize field */
+	for (count = 0; count < F_SECT_PER_TRACK; ++count) {
+		here[count].track = format_req.track;
+		here[count].head = format_req.head;
+		here[count].sect = 0;
+		here[count].size = F_SIZECODE;
+	}
+	/* place logical sectors */
+	for (count = 1; count <= F_SECT_PER_TRACK; ++count) {
+		here[n].sect = count;
+		n = (n + il) % F_SECT_PER_TRACK;
+		if (here[n].sect) {	/* sector busy, find next free sector */
+			++n;
+			if (n >= F_SECT_PER_TRACK) {
+				n -= F_SECT_PER_TRACK;
+				while (here[n].sect)
+					++n;
+			}
+		}
+	}
+	if (_floppy->stretch & FD_SECTBASEMASK) {
+		for (count = 0; count < F_SECT_PER_TRACK; count++)
+			here[count].sect += FD_SECTBASE(_floppy) - 1;
+	}
+}
+
+static void redo_format(void)
+{
+	buffer_track = -1;
+	setup_format_params(format_req.track << STRETCH(_floppy));
+	floppy_start();
+	debugt(__func__, "queue format request");
+}
+
+static const struct cont_t format_cont = {
+	.interrupt	= format_interrupt,
+	.redo		= redo_format,
+	.error		= bad_flp_intr,
+	.done		= generic_done
+};
+
+static int do_format(int drive, struct format_descr *tmp_format_req)
+{
+	int ret;
+
+	if (lock_fdc(drive, true))
+		return -EINTR;
+
+	set_floppy(drive);
+	if (!_floppy ||
+	    _floppy->track > DP->tracks ||
+	    tmp_format_req->track >= _floppy->track ||
+	    tmp_format_req->head >= _floppy->head ||
+	    (_floppy->sect << 2) % (1 << FD_SIZECODE(_floppy)) ||
+	    !_floppy->fmt_gap) {
+		process_fd_request();
+		return -EINVAL;
+	}
+	format_req = *tmp_format_req;
+	format_errors = 0;
+	cont = &format_cont;
+	errors = &format_errors;
+	ret = wait_til_done(redo_format, true);
+	if (ret == -EINTR)
+		return -EINTR;
+	process_fd_request();
+	return ret;
+}
+
+/*
+ * Buffer read/write and support
+ * =============================
+ */
+
+static void floppy_end_request(struct request *req, int error)
+{
+	unsigned int nr_sectors = current_count_sectors;
+	unsigned int drive = (unsigned long)req->rq_disk->private_data;
+
+	/* current_count_sectors can be zero if transfer failed */
+	if (error)
+		nr_sectors = blk_rq_cur_sectors(req);
+	if (__blk_end_request(req, error, nr_sectors << 9))
+		return;
+
+	/* We're done with the request */
+	floppy_off(drive);
+	current_req = NULL;
+}
+
+/* new request_done. Can handle physical sectors which are smaller than a
+ * logical buffer */
+static void request_done(int uptodate)
+{
+	struct request *req = current_req;
+	struct request_queue *q;
+	unsigned long flags;
+	int block;
+	char msg[sizeof("request done ") + sizeof(int) * 3];
+
+	probing = 0;
+	snprintf(msg, sizeof(msg), "request done %d", uptodate);
+	reschedule_timeout(MAXTIMEOUT, msg);
+
+	if (!req) {
+		pr_info("floppy.c: no request in request_done\n");
+		return;
+	}
+
+	q = req->q;
+
+	if (uptodate) {
+		/* maintain values for invalidation on geometry
+		 * change */
+		block = current_count_sectors + blk_rq_pos(req);
+		INFBOUND(DRS->maxblock, block);
+		if (block > _floppy->sect)
+			DRS->maxtrack = 1;
+
+		/* unlock chained buffers */
+		spin_lock_irqsave(q->queue_lock, flags);
+		floppy_end_request(req, 0);
+		spin_unlock_irqrestore(q->queue_lock, flags);
+	} else {
+		if (rq_data_dir(req) == WRITE) {
+			/* record write error information */
+			DRWE->write_errors++;
+			if (DRWE->write_errors == 1) {
+				DRWE->first_error_sector = blk_rq_pos(req);
+				DRWE->first_error_generation = DRS->generation;
+			}
+			DRWE->last_error_sector = blk_rq_pos(req);
+			DRWE->last_error_generation = DRS->generation;
+		}
+		spin_lock_irqsave(q->queue_lock, flags);
+		floppy_end_request(req, -EIO);
+		spin_unlock_irqrestore(q->queue_lock, flags);
+	}
+}
+
+/* Interrupt handler evaluating the result of the r/w operation */
+static void rw_interrupt(void)
+{
+	int eoc;
+	int ssize;
+	int heads;
+	int nr_sectors;
+
+	if (R_HEAD >= 2) {
+		/* some Toshiba floppy controllers occasionnally seem to
+		 * return bogus interrupts after read/write operations, which
+		 * can be recognized by a bad head number (>= 2) */
+		return;
+	}
+
+	if (!DRS->first_read_date)
+		DRS->first_read_date = jiffies;
+
+	nr_sectors = 0;
+	ssize = DIV_ROUND_UP(1 << SIZECODE, 4);
+
+	if (ST1 & ST1_EOC)
+		eoc = 1;
+	else
+		eoc = 0;
+
+	if (COMMAND & 0x80)
+		heads = 2;
+	else
+		heads = 1;
+
+	nr_sectors = (((R_TRACK - TRACK) * heads +
+		       R_HEAD - HEAD) * SECT_PER_TRACK +
+		      R_SECTOR - SECTOR + eoc) << SIZECODE >> 2;
+
+	if (nr_sectors / ssize >
+	    DIV_ROUND_UP(in_sector_offset + current_count_sectors, ssize)) {
+		DPRINT("long rw: %x instead of %lx\n",
+		       nr_sectors, current_count_sectors);
+		pr_info("rs=%d s=%d\n", R_SECTOR, SECTOR);
+		pr_info("rh=%d h=%d\n", R_HEAD, HEAD);
+		pr_info("rt=%d t=%d\n", R_TRACK, TRACK);
+		pr_info("heads=%d eoc=%d\n", heads, eoc);
+		pr_info("spt=%d st=%d ss=%d\n",
+			SECT_PER_TRACK, fsector_t, ssize);
+		pr_info("in_sector_offset=%d\n", in_sector_offset);
+	}
+
+	nr_sectors -= in_sector_offset;
+	INFBOUND(nr_sectors, 0);
+	SUPBOUND(current_count_sectors, nr_sectors);
+
+	switch (interpret_errors()) {
+	case 2:
+		cont->redo();
+		return;
+	case 1:
+		if (!current_count_sectors) {
+			cont->error();
+			cont->redo();
+			return;
+		}
+		break;
+	case 0:
+		if (!current_count_sectors) {
+			cont->redo();
+			return;
+		}
+		current_type[current_drive] = _floppy;
+		floppy_sizes[TOMINOR(current_drive)] = _floppy->size;
+		break;
+	}
+
+	if (probing) {
+		if (DP->flags & FTD_MSG)
+			DPRINT("Auto-detected floppy type %s in fd%d\n",
+			       _floppy->name, current_drive);
+		current_type[current_drive] = _floppy;
+		floppy_sizes[TOMINOR(current_drive)] = _floppy->size;
+		probing = 0;
+	}
+
+	if (CT(COMMAND) != FD_READ ||
+	    raw_cmd->kernel_data == current_req->buffer) {
+		/* transfer directly from buffer */
+		cont->done(1);
+	} else if (CT(COMMAND) == FD_READ) {
+		buffer_track = raw_cmd->track;
+		buffer_drive = current_drive;
+		INFBOUND(buffer_max, nr_sectors + fsector_t);
+	}
+	cont->redo();
+}
+
+/* Compute maximal contiguous buffer size. */
+static int buffer_chain_size(void)
+{
+	struct bio_vec *bv;
+	int size;
+	struct req_iterator iter;
+	char *base;
+
+	base = bio_data(current_req->bio);
+	size = 0;
+
+	rq_for_each_segment(bv, current_req, iter) {
+		if (page_address(bv->bv_page) + bv->bv_offset != base + size)
+			break;
+
+		size += bv->bv_len;
+	}
+
+	return size >> 9;
+}
+
+/* Compute the maximal transfer size */
+static int transfer_size(int ssize, int max_sector, int max_size)
+{
+	SUPBOUND(max_sector, fsector_t + max_size);
+
+	/* alignment */
+	max_sector -= (max_sector % _floppy->sect) % ssize;
+
+	/* transfer size, beginning not aligned */
+	current_count_sectors = max_sector - fsector_t;
+
+	return max_sector;
+}
+
+/*
+ * Move data from/to the track buffer to/from the buffer cache.
+ */
+static void copy_buffer(int ssize, int max_sector, int max_sector_2)
+{
+	int remaining;		/* number of transferred 512-byte sectors */
+	struct bio_vec *bv;
+	char *buffer;
+	char *dma_buffer;
+	int size;
+	struct req_iterator iter;
+
+	max_sector = transfer_size(ssize,
+				   min(max_sector, max_sector_2),
+				   blk_rq_sectors(current_req));
+
+	if (current_count_sectors <= 0 && CT(COMMAND) == FD_WRITE &&
+	    buffer_max > fsector_t + blk_rq_sectors(current_req))
+		current_count_sectors = min_t(int, buffer_max - fsector_t,
+					      blk_rq_sectors(current_req));
+
+	remaining = current_count_sectors << 9;
+	if (remaining > blk_rq_bytes(current_req) && CT(COMMAND) == FD_WRITE) {
+		DPRINT("in copy buffer\n");
+		pr_info("current_count_sectors=%ld\n", current_count_sectors);
+		pr_info("remaining=%d\n", remaining >> 9);
+		pr_info("current_req->nr_sectors=%u\n",
+			blk_rq_sectors(current_req));
+		pr_info("current_req->current_nr_sectors=%u\n",
+			blk_rq_cur_sectors(current_req));
+		pr_info("max_sector=%d\n", max_sector);
+		pr_info("ssize=%d\n", ssize);
+	}
+
+	buffer_max = max(max_sector, buffer_max);
+
+	dma_buffer = floppy_track_buffer + ((fsector_t - buffer_min) << 9);
+
+	size = blk_rq_cur_bytes(current_req);
+
+	rq_for_each_segment(bv, current_req, iter) {
+		if (!remaining)
+			break;
+
+		size = bv->bv_len;
+		SUPBOUND(size, remaining);
+
+		buffer = page_address(bv->bv_page) + bv->bv_offset;
+		if (dma_buffer + size >
+		    floppy_track_buffer + (max_buffer_sectors << 10) ||
+		    dma_buffer < floppy_track_buffer) {
+			DPRINT("buffer overrun in copy buffer %d\n",
+			       (int)((floppy_track_buffer - dma_buffer) >> 9));
+			pr_info("fsector_t=%d buffer_min=%d\n",
+				fsector_t, buffer_min);
+			pr_info("current_count_sectors=%ld\n",
+				current_count_sectors);
+			if (CT(COMMAND) == FD_READ)
+				pr_info("read\n");
+			if (CT(COMMAND) == FD_WRITE)
+				pr_info("write\n");
+			break;
+		}
+		if (((unsigned long)buffer) % 512)
+			DPRINT("%p buffer not aligned\n", buffer);
+
+		if (CT(COMMAND) == FD_READ)
+			memcpy(buffer, dma_buffer, size);
+		else
+			memcpy(dma_buffer, buffer, size);
+
+		remaining -= size;
+		dma_buffer += size;
+	}
+	if (remaining) {
+		if (remaining > 0)
+			max_sector -= remaining >> 9;
+		DPRINT("weirdness: remaining %d\n", remaining >> 9);
+	}
+}
+
+/* work around a bug in pseudo DMA
+ * (on some FDCs) pseudo DMA does not stop when the CPU stops
+ * sending data.  Hence we need a different way to signal the
+ * transfer length:  We use SECT_PER_TRACK.  Unfortunately, this
+ * does not work with MT, hence we can only transfer one head at
+ * a time
+ */
+static void virtualdmabug_workaround(void)
+{
+	int hard_sectors;
+	int end_sector;
+
+	if (CT(COMMAND) == FD_WRITE) {
+		COMMAND &= ~0x80;	/* switch off multiple track mode */
+
+		hard_sectors = raw_cmd->length >> (7 + SIZECODE);
+		end_sector = SECTOR + hard_sectors - 1;
+		if (end_sector > SECT_PER_TRACK) {
+			pr_info("too many sectors %d > %d\n",
+				end_sector, SECT_PER_TRACK);
+			return;
+		}
+		SECT_PER_TRACK = end_sector;
+					/* make sure SECT_PER_TRACK
+					 * points to end of transfer */
+	}
+}
+
+/*
+ * Formulate a read/write request.
+ * this routine decides where to load the data (directly to buffer, or to
+ * tmp floppy area), how much data to load (the size of the buffer, the whole
+ * track, or a single sector)
+ * All floppy_track_buffer handling goes in here. If we ever add track buffer
+ * allocation on the fly, it should be done here. No other part should need
+ * modification.
+ */
+
+static int make_raw_rw_request(void)
+{
+	int aligned_sector_t;
+	int max_sector;
+	int max_size;
+	int tracksize;
+	int ssize;
+
+	if (WARN(max_buffer_sectors == 0, "VFS: Block I/O scheduled on unopened device\n"))
+		return 0;
+
+	set_fdc((long)current_req->rq_disk->private_data);
+
+	raw_cmd = &default_raw_cmd;
+	raw_cmd->flags = FD_RAW_SPIN | FD_RAW_NEED_DISK | FD_RAW_NEED_DISK |
+	    FD_RAW_NEED_SEEK;
+	raw_cmd->cmd_count = NR_RW;
+	if (rq_data_dir(current_req) == READ) {
+		raw_cmd->flags |= FD_RAW_READ;
+		COMMAND = FM_MODE(_floppy, FD_READ);
+	} else if (rq_data_dir(current_req) == WRITE) {
+		raw_cmd->flags |= FD_RAW_WRITE;
+		COMMAND = FM_MODE(_floppy, FD_WRITE);
+	} else {
+		DPRINT("%s: unknown command\n", __func__);
+		return 0;
+	}
+
+	max_sector = _floppy->sect * _floppy->head;
+
+	TRACK = (int)blk_rq_pos(current_req) / max_sector;
+	fsector_t = (int)blk_rq_pos(current_req) % max_sector;
+	if (_floppy->track && TRACK >= _floppy->track) {
+		if (blk_rq_cur_sectors(current_req) & 1) {
+			current_count_sectors = 1;
+			return 1;
+		} else
+			return 0;
+	}
+	HEAD = fsector_t / _floppy->sect;
+
+	if (((_floppy->stretch & (FD_SWAPSIDES | FD_SECTBASEMASK)) ||
+	     test_bit(FD_NEED_TWADDLE_BIT, &DRS->flags)) &&
+	    fsector_t < _floppy->sect)
+		max_sector = _floppy->sect;
+
+	/* 2M disks have phantom sectors on the first track */
+	if ((_floppy->rate & FD_2M) && (!TRACK) && (!HEAD)) {
+		max_sector = 2 * _floppy->sect / 3;
+		if (fsector_t >= max_sector) {
+			current_count_sectors =
+			    min_t(int, _floppy->sect - fsector_t,
+				  blk_rq_sectors(current_req));
+			return 1;
+		}
+		SIZECODE = 2;
+	} else
+		SIZECODE = FD_SIZECODE(_floppy);
+	raw_cmd->rate = _floppy->rate & 0x43;
+	if ((_floppy->rate & FD_2M) && (TRACK || HEAD) && raw_cmd->rate == 2)
+		raw_cmd->rate = 1;
+
+	if (SIZECODE)
+		SIZECODE2 = 0xff;
+	else
+		SIZECODE2 = 0x80;
+	raw_cmd->track = TRACK << STRETCH(_floppy);
+	DR_SELECT = UNIT(current_drive) + PH_HEAD(_floppy, HEAD);
+	GAP = _floppy->gap;
+	ssize = DIV_ROUND_UP(1 << SIZECODE, 4);
+	SECT_PER_TRACK = _floppy->sect << 2 >> SIZECODE;
+	SECTOR = ((fsector_t % _floppy->sect) << 2 >> SIZECODE) +
+	    FD_SECTBASE(_floppy);
+
+	/* tracksize describes the size which can be filled up with sectors
+	 * of size ssize.
+	 */
+	tracksize = _floppy->sect - _floppy->sect % ssize;
+	if (tracksize < _floppy->sect) {
+		SECT_PER_TRACK++;
+		if (tracksize <= fsector_t % _floppy->sect)
+			SECTOR--;
+
+		/* if we are beyond tracksize, fill up using smaller sectors */
+		while (tracksize <= fsector_t % _floppy->sect) {
+			while (tracksize + ssize > _floppy->sect) {
+				SIZECODE--;
+				ssize >>= 1;
+			}
+			SECTOR++;
+			SECT_PER_TRACK++;
+			tracksize += ssize;
+		}
+		max_sector = HEAD * _floppy->sect + tracksize;
+	} else if (!TRACK && !HEAD && !(_floppy->rate & FD_2M) && probing) {
+		max_sector = _floppy->sect;
+	} else if (!HEAD && CT(COMMAND) == FD_WRITE) {
+		/* for virtual DMA bug workaround */
+		max_sector = _floppy->sect;
+	}
+
+	in_sector_offset = (fsector_t % _floppy->sect) % ssize;
+	aligned_sector_t = fsector_t - in_sector_offset;
+	max_size = blk_rq_sectors(current_req);
+	if ((raw_cmd->track == buffer_track) &&
+	    (current_drive == buffer_drive) &&
+	    (fsector_t >= buffer_min) && (fsector_t < buffer_max)) {
+		/* data already in track buffer */
+		if (CT(COMMAND) == FD_READ) {
+			copy_buffer(1, max_sector, buffer_max);
+			return 1;
+		}
+	} else if (in_sector_offset || blk_rq_sectors(current_req) < ssize) {
+		if (CT(COMMAND) == FD_WRITE) {
+			unsigned int sectors;
+
+			sectors = fsector_t + blk_rq_sectors(current_req);
+			if (sectors > ssize && sectors < ssize + ssize)
+				max_size = ssize + ssize;
+			else
+				max_size = ssize;
+		}
+		raw_cmd->flags &= ~FD_RAW_WRITE;
+		raw_cmd->flags |= FD_RAW_READ;
+		COMMAND = FM_MODE(_floppy, FD_READ);
+	} else if ((unsigned long)current_req->buffer < MAX_DMA_ADDRESS) {
+		unsigned long dma_limit;
+		int direct, indirect;
+
+		indirect =
+		    transfer_size(ssize, max_sector,
+				  max_buffer_sectors * 2) - fsector_t;
+
+		/*
+		 * Do NOT use minimum() here---MAX_DMA_ADDRESS is 64 bits wide
+		 * on a 64 bit machine!
+		 */
+		max_size = buffer_chain_size();
+		dma_limit = (MAX_DMA_ADDRESS -
+			     ((unsigned long)current_req->buffer)) >> 9;
+		if ((unsigned long)max_size > dma_limit)
+			max_size = dma_limit;
+		/* 64 kb boundaries */
+		if (CROSS_64KB(current_req->buffer, max_size << 9))
+			max_size = (K_64 -
+				    ((unsigned long)current_req->buffer) %
+				    K_64) >> 9;
+		direct = transfer_size(ssize, max_sector, max_size) - fsector_t;
+		/*
+		 * We try to read tracks, but if we get too many errors, we
+		 * go back to reading just one sector at a time.
+		 *
+		 * This means we should be able to read a sector even if there
+		 * are other bad sectors on this track.
+		 */
+		if (!direct ||
+		    (indirect * 2 > direct * 3 &&
+		     *errors < DP->max_errors.read_track &&
+		     ((!probing ||
+		       (DP->read_track & (1 << DRS->probed_format)))))) {
+			max_size = blk_rq_sectors(current_req);
+		} else {
+			raw_cmd->kernel_data = current_req->buffer;
+			raw_cmd->length = current_count_sectors << 9;
+			if (raw_cmd->length == 0) {
+				DPRINT("%s: zero dma transfer attempted\n", __func__);
+				DPRINT("indirect=%d direct=%d fsector_t=%d\n",
+				       indirect, direct, fsector_t);
+				return 0;
+			}
+			virtualdmabug_workaround();
+			return 2;
+		}
+	}
+
+	if (CT(COMMAND) == FD_READ)
+		max_size = max_sector;	/* unbounded */
+
+	/* claim buffer track if needed */
+	if (buffer_track != raw_cmd->track ||	/* bad track */
+	    buffer_drive != current_drive ||	/* bad drive */
+	    fsector_t > buffer_max ||
+	    fsector_t < buffer_min ||
+	    ((CT(COMMAND) == FD_READ ||
+	      (!in_sector_offset && blk_rq_sectors(current_req) >= ssize)) &&
+	     max_sector > 2 * max_buffer_sectors + buffer_min &&
+	     max_size + fsector_t > 2 * max_buffer_sectors + buffer_min)) {
+		/* not enough space */
+		buffer_track = -1;
+		buffer_drive = current_drive;
+		buffer_max = buffer_min = aligned_sector_t;
+	}
+	raw_cmd->kernel_data = floppy_track_buffer +
+		((aligned_sector_t - buffer_min) << 9);
+
+	if (CT(COMMAND) == FD_WRITE) {
+		/* copy write buffer to track buffer.
+		 * if we get here, we know that the write
+		 * is either aligned or the data already in the buffer
+		 * (buffer will be overwritten) */
+		if (in_sector_offset && buffer_track == -1)
+			DPRINT("internal error offset !=0 on write\n");
+		buffer_track = raw_cmd->track;
+		buffer_drive = current_drive;
+		copy_buffer(ssize, max_sector,
+			    2 * max_buffer_sectors + buffer_min);
+	} else
+		transfer_size(ssize, max_sector,
+			      2 * max_buffer_sectors + buffer_min -
+			      aligned_sector_t);
+
+	/* round up current_count_sectors to get dma xfer size */
+	raw_cmd->length = in_sector_offset + current_count_sectors;
+	raw_cmd->length = ((raw_cmd->length - 1) | (ssize - 1)) + 1;
+	raw_cmd->length <<= 9;
+	if ((raw_cmd->length < current_count_sectors << 9) ||
+	    (raw_cmd->kernel_data != current_req->buffer &&
+	     CT(COMMAND) == FD_WRITE &&
+	     (aligned_sector_t + (raw_cmd->length >> 9) > buffer_max ||
+	      aligned_sector_t < buffer_min)) ||
+	    raw_cmd->length % (128 << SIZECODE) ||
+	    raw_cmd->length <= 0 || current_count_sectors <= 0) {
+		DPRINT("fractionary current count b=%lx s=%lx\n",
+		       raw_cmd->length, current_count_sectors);
+		if (raw_cmd->kernel_data != current_req->buffer)
+			pr_info("addr=%d, length=%ld\n",
+				(int)((raw_cmd->kernel_data -
+				       floppy_track_buffer) >> 9),
+				current_count_sectors);
+		pr_info("st=%d ast=%d mse=%d msi=%d\n",
+			fsector_t, aligned_sector_t, max_sector, max_size);
+		pr_info("ssize=%x SIZECODE=%d\n", ssize, SIZECODE);
+		pr_info("command=%x SECTOR=%d HEAD=%d, TRACK=%d\n",
+			COMMAND, SECTOR, HEAD, TRACK);
+		pr_info("buffer drive=%d\n", buffer_drive);
+		pr_info("buffer track=%d\n", buffer_track);
+		pr_info("buffer_min=%d\n", buffer_min);
+		pr_info("buffer_max=%d\n", buffer_max);
+		return 0;
+	}
+
+	if (raw_cmd->kernel_data != current_req->buffer) {
+		if (raw_cmd->kernel_data < floppy_track_buffer ||
+		    current_count_sectors < 0 ||
+		    raw_cmd->length < 0 ||
+		    raw_cmd->kernel_data + raw_cmd->length >
+		    floppy_track_buffer + (max_buffer_sectors << 10)) {
+			DPRINT("buffer overrun in schedule dma\n");
+			pr_info("fsector_t=%d buffer_min=%d current_count=%ld\n",
+				fsector_t, buffer_min, raw_cmd->length >> 9);
+			pr_info("current_count_sectors=%ld\n",
+				current_count_sectors);
+			if (CT(COMMAND) == FD_READ)
+				pr_info("read\n");
+			if (CT(COMMAND) == FD_WRITE)
+				pr_info("write\n");
+			return 0;
+		}
+	} else if (raw_cmd->length > blk_rq_bytes(current_req) ||
+		   current_count_sectors > blk_rq_sectors(current_req)) {
+		DPRINT("buffer overrun in direct transfer\n");
+		return 0;
+	} else if (raw_cmd->length < current_count_sectors << 9) {
+		DPRINT("more sectors than bytes\n");
+		pr_info("bytes=%ld\n", raw_cmd->length >> 9);
+		pr_info("sectors=%ld\n", current_count_sectors);
+	}
+	if (raw_cmd->length == 0) {
+		DPRINT("zero dma transfer attempted from make_raw_request\n");
+		return 0;
+	}
+
+	virtualdmabug_workaround();
+	return 2;
+}
+
+/*
+ * Round-robin between our available drives, doing one request from each
+ */
+static int set_next_request(void)
+{
+	struct request_queue *q;
+	int old_pos = fdc_queue;
+
+	do {
+		q = disks[fdc_queue]->queue;
+		if (++fdc_queue == N_DRIVE)
+			fdc_queue = 0;
+		if (q) {
+			current_req = blk_fetch_request(q);
+			if (current_req)
+				break;
+		}
+	} while (fdc_queue != old_pos);
+
+	return current_req != NULL;
+}
+
+static void redo_fd_request(void)
+{
+	int drive;
+	int tmp;
+
+	lastredo = jiffies;
+	if (current_drive < N_DRIVE)
+		floppy_off(current_drive);
+
+do_request:
+	if (!current_req) {
+		int pending;
+
+		spin_lock_irq(&floppy_lock);
+		pending = set_next_request();
+		spin_unlock_irq(&floppy_lock);
+
+		if (!pending) {
+			do_floppy = NULL;
+			unlock_fdc();
+			return;
+		}
+	}
+	drive = (long)current_req->rq_disk->private_data;
+	set_fdc(drive);
+	reschedule_timeout(current_reqD, "redo fd request");
+
+	set_floppy(drive);
+	raw_cmd = &default_raw_cmd;
+	raw_cmd->flags = 0;
+	if (start_motor(redo_fd_request))
+		return;
+
+	disk_change(current_drive);
+	if (test_bit(current_drive, &fake_change) ||
+	    test_bit(FD_DISK_CHANGED_BIT, &DRS->flags)) {
+		DPRINT("disk absent or changed during operation\n");
+		request_done(0);
+		goto do_request;
+	}
+	if (!_floppy) {	/* Autodetection */
+		if (!probing) {
+			DRS->probed_format = 0;
+			if (next_valid_format()) {
+				DPRINT("no autodetectable formats\n");
+				_floppy = NULL;
+				request_done(0);
+				goto do_request;
+			}
+		}
+		probing = 1;
+		_floppy = floppy_type + DP->autodetect[DRS->probed_format];
+	} else
+		probing = 0;
+	errors = &(current_req->errors);
+	tmp = make_raw_rw_request();
+	if (tmp < 2) {
+		request_done(tmp);
+		goto do_request;
+	}
+
+	if (test_bit(FD_NEED_TWADDLE_BIT, &DRS->flags))
+		twaddle();
+	schedule_bh(floppy_start);
+	debugt(__func__, "queue fd request");
+	return;
+}
+
+static const struct cont_t rw_cont = {
+	.interrupt	= rw_interrupt,
+	.redo		= redo_fd_request,
+	.error		= bad_flp_intr,
+	.done		= request_done
+};
+
+static void process_fd_request(void)
+{
+	cont = &rw_cont;
+	schedule_bh(redo_fd_request);
+}
+
+static void do_fd_request(struct request_queue *q)
+{
+	if (WARN(max_buffer_sectors == 0,
+		 "VFS: %s called on non-open device\n", __func__))
+		return;
+
+	if (WARN(atomic_read(&usage_count) == 0,
+		 "warning: usage count=0, current_req=%p sect=%ld type=%x flags=%x\n",
+		 current_req, (long)blk_rq_pos(current_req), current_req->cmd_type,
+		 current_req->cmd_flags))
+		return;
+
+	if (test_bit(0, &fdc_busy)) {
+		/* fdc busy, this new request will be treated when the
+		   current one is done */
+		is_alive(__func__, "old request running");
+		return;
+	}
+	lock_fdc(MAXTIMEOUT, false);
+	process_fd_request();
+	is_alive(__func__, "");
+}
+
+static const struct cont_t poll_cont = {
+	.interrupt	= success_and_wakeup,
+	.redo		= floppy_ready,
+	.error		= generic_failure,
+	.done		= generic_done
+};
+
+static int poll_drive(bool interruptible, int flag)
+{
+	/* no auto-sense, just clear dcl */
+	raw_cmd = &default_raw_cmd;
+	raw_cmd->flags = flag;
+	raw_cmd->track = 0;
+	raw_cmd->cmd_count = 0;
+	cont = &poll_cont;
+	debug_dcl(DP->flags, "setting NEWCHANGE in poll_drive\n");
+	set_bit(FD_DISK_NEWCHANGE_BIT, &DRS->flags);
+
+	return wait_til_done(floppy_ready, interruptible);
+}
+
+/*
+ * User triggered reset
+ * ====================
+ */
+
+static void reset_intr(void)
+{
+	pr_info("weird, reset interrupt called\n");
+}
+
+static const struct cont_t reset_cont = {
+	.interrupt	= reset_intr,
+	.redo		= success_and_wakeup,
+	.error		= generic_failure,
+	.done		= generic_done
+};
+
+static int user_reset_fdc(int drive, int arg, bool interruptible)
+{
+	int ret;
+
+	if (lock_fdc(drive, interruptible))
+		return -EINTR;
+
+	if (arg == FD_RESET_ALWAYS)
+		FDCS->reset = 1;
+	if (FDCS->reset) {
+		cont = &reset_cont;
+		ret = wait_til_done(reset_fdc, interruptible);
+		if (ret == -EINTR)
+			return -EINTR;
+	}
+	process_fd_request();
+	return 0;
+}
+
+/*
+ * Misc Ioctl's and support
+ * ========================
+ */
+static inline int fd_copyout(void __user *param, const void *address,
+			     unsigned long size)
+{
+	return copy_to_user(param, address, size) ? -EFAULT : 0;
+}
+
+static inline int fd_copyin(void __user *param, void *address,
+			    unsigned long size)
+{
+	return copy_from_user(address, param, size) ? -EFAULT : 0;
+}
+
+static const char *drive_name(int type, int drive)
+{
+	struct floppy_struct *floppy;
+
+	if (type)
+		floppy = floppy_type + type;
+	else {
+		if (UDP->native_format)
+			floppy = floppy_type + UDP->native_format;
+		else
+			return "(null)";
+	}
+	if (floppy->name)
+		return floppy->name;
+	else
+		return "(null)";
+}
+
+/* raw commands */
+static void raw_cmd_done(int flag)
+{
+	int i;
+
+	if (!flag) {
+		raw_cmd->flags |= FD_RAW_FAILURE;
+		raw_cmd->flags |= FD_RAW_HARDFAILURE;
+	} else {
+		raw_cmd->reply_count = inr;
+		if (raw_cmd->reply_count > MAX_REPLIES)
+			raw_cmd->reply_count = 0;
+		for (i = 0; i < raw_cmd->reply_count; i++)
+			raw_cmd->reply[i] = reply_buffer[i];
+
+		if (raw_cmd->flags & (FD_RAW_READ | FD_RAW_WRITE)) {
+			unsigned long flags;
+			flags = claim_dma_lock();
+			raw_cmd->length = fd_get_dma_residue();
+			release_dma_lock(flags);
+		}
+
+		if ((raw_cmd->flags & FD_RAW_SOFTFAILURE) &&
+		    (!raw_cmd->reply_count || (raw_cmd->reply[0] & 0xc0)))
+			raw_cmd->flags |= FD_RAW_FAILURE;
+
+		if (disk_change(current_drive))
+			raw_cmd->flags |= FD_RAW_DISK_CHANGE;
+		else
+			raw_cmd->flags &= ~FD_RAW_DISK_CHANGE;
+		if (raw_cmd->flags & FD_RAW_NO_MOTOR_AFTER)
+			motor_off_callback(current_drive);
+
+		if (raw_cmd->next &&
+		    (!(raw_cmd->flags & FD_RAW_FAILURE) ||
+		     !(raw_cmd->flags & FD_RAW_STOP_IF_FAILURE)) &&
+		    ((raw_cmd->flags & FD_RAW_FAILURE) ||
+		     !(raw_cmd->flags & FD_RAW_STOP_IF_SUCCESS))) {
+			raw_cmd = raw_cmd->next;
+			return;
+		}
+	}
+	generic_done(flag);
+}
+
+static const struct cont_t raw_cmd_cont = {
+	.interrupt	= success_and_wakeup,
+	.redo		= floppy_start,
+	.error		= generic_failure,
+	.done		= raw_cmd_done
+};
+
+static int raw_cmd_copyout(int cmd, void __user *param,
+				  struct floppy_raw_cmd *ptr)
+{
+	int ret;
+
+	while (ptr) {
+		ret = copy_to_user(param, ptr, sizeof(*ptr));
+		if (ret)
+			return -EFAULT;
+		param += sizeof(struct floppy_raw_cmd);
+		if ((ptr->flags & FD_RAW_READ) && ptr->buffer_length) {
+			if (ptr->length >= 0 &&
+			    ptr->length <= ptr->buffer_length) {
+				long length = ptr->buffer_length - ptr->length;
+				ret = fd_copyout(ptr->data, ptr->kernel_data,
+						 length);
+				if (ret)
+					return ret;
+			}
+		}
+		ptr = ptr->next;
+	}
+
+	return 0;
+}
+
+static void raw_cmd_free(struct floppy_raw_cmd **ptr)
+{
+	struct floppy_raw_cmd *next;
+	struct floppy_raw_cmd *this;
+
+	this = *ptr;
+	*ptr = NULL;
+	while (this) {
+		if (this->buffer_length) {
+			fd_dma_mem_free((unsigned long)this->kernel_data,
+					this->buffer_length);
+			this->buffer_length = 0;
+		}
+		next = this->next;
+		kfree(this);
+		this = next;
+	}
+}
+
+static int raw_cmd_copyin(int cmd, void __user *param,
+				 struct floppy_raw_cmd **rcmd)
+{
+	struct floppy_raw_cmd *ptr;
+	int ret;
+	int i;
+
+	*rcmd = NULL;
+
+loop:
+	ptr = kmalloc(sizeof(struct floppy_raw_cmd), GFP_USER);
+	if (!ptr)
+		return -ENOMEM;
+	*rcmd = ptr;
+	ret = copy_from_user(ptr, param, sizeof(*ptr));
+	if (ret)
+		return -EFAULT;
+	ptr->next = NULL;
+	ptr->buffer_length = 0;
+	param += sizeof(struct floppy_raw_cmd);
+	if (ptr->cmd_count > 33)
+			/* the command may now also take up the space
+			 * initially intended for the reply & the
+			 * reply count. Needed for long 82078 commands
+			 * such as RESTORE, which takes ... 17 command
+			 * bytes. Murphy's law #137: When you reserve
+			 * 16 bytes for a structure, you'll one day
+			 * discover that you really need 17...
+			 */
+		return -EINVAL;
+
+	for (i = 0; i < 16; i++)
+		ptr->reply[i] = 0;
+	ptr->resultcode = 0;
+	ptr->kernel_data = NULL;
+
+	if (ptr->flags & (FD_RAW_READ | FD_RAW_WRITE)) {
+		if (ptr->length <= 0)
+			return -EINVAL;
+		ptr->kernel_data = (char *)fd_dma_mem_alloc(ptr->length);
+		fallback_on_nodma_alloc(&ptr->kernel_data, ptr->length);
+		if (!ptr->kernel_data)
+			return -ENOMEM;
+		ptr->buffer_length = ptr->length;
+	}
+	if (ptr->flags & FD_RAW_WRITE) {
+		ret = fd_copyin(ptr->data, ptr->kernel_data, ptr->length);
+		if (ret)
+			return ret;
+	}
+
+	if (ptr->flags & FD_RAW_MORE) {
+		rcmd = &(ptr->next);
+		ptr->rate &= 0x43;
+		goto loop;
+	}
+
+	return 0;
+}
+
+static int raw_cmd_ioctl(int cmd, void __user *param)
+{
+	struct floppy_raw_cmd *my_raw_cmd;
+	int drive;
+	int ret2;
+	int ret;
+
+	if (FDCS->rawcmd <= 1)
+		FDCS->rawcmd = 1;
+	for (drive = 0; drive < N_DRIVE; drive++) {
+		if (FDC(drive) != fdc)
+			continue;
+		if (drive == current_drive) {
+			if (UDRS->fd_ref > 1) {
+				FDCS->rawcmd = 2;
+				break;
+			}
+		} else if (UDRS->fd_ref) {
+			FDCS->rawcmd = 2;
+			break;
+		}
+	}
+
+	if (FDCS->reset)
+		return -EIO;
+
+	ret = raw_cmd_copyin(cmd, param, &my_raw_cmd);
+	if (ret) {
+		raw_cmd_free(&my_raw_cmd);
+		return ret;
+	}
+
+	raw_cmd = my_raw_cmd;
+	cont = &raw_cmd_cont;
+	ret = wait_til_done(floppy_start, true);
+	debug_dcl(DP->flags, "calling disk change from raw_cmd ioctl\n");
+
+	if (ret != -EINTR && FDCS->reset)
+		ret = -EIO;
+
+	DRS->track = NO_TRACK;
+
+	ret2 = raw_cmd_copyout(cmd, param, my_raw_cmd);
+	if (!ret)
+		ret = ret2;
+	raw_cmd_free(&my_raw_cmd);
+	return ret;
+}
+
+static int invalidate_drive(struct block_device *bdev)
+{
+	/* invalidate the buffer track to force a reread */
+	set_bit((long)bdev->bd_disk->private_data, &fake_change);
+	process_fd_request();
+	check_disk_change(bdev);
+	return 0;
+}
+
+static int set_geometry(unsigned int cmd, struct floppy_struct *g,
+			       int drive, int type, struct block_device *bdev)
+{
+	int cnt;
+
+	/* sanity checking for parameters. */
+	if (g->sect <= 0 ||
+	    g->head <= 0 ||
+	    g->track <= 0 || g->track > UDP->tracks >> STRETCH(g) ||
+	    /* check if reserved bits are set */
+	    (g->stretch & ~(FD_STRETCH | FD_SWAPSIDES | FD_SECTBASEMASK)) != 0)
+		return -EINVAL;
+	if (type) {
+		if (!capable(CAP_SYS_ADMIN))
+			return -EPERM;
+		mutex_lock(&open_lock);
+		if (lock_fdc(drive, true)) {
+			mutex_unlock(&open_lock);
+			return -EINTR;
+		}
+		floppy_type[type] = *g;
+		floppy_type[type].name = "user format";
+		for (cnt = type << 2; cnt < (type << 2) + 4; cnt++)
+			floppy_sizes[cnt] = floppy_sizes[cnt + 0x80] =
+			    floppy_type[type].size + 1;
+		process_fd_request();
+		for (cnt = 0; cnt < N_DRIVE; cnt++) {
+			struct block_device *bdev = opened_bdev[cnt];
+			if (!bdev || ITYPE(drive_state[cnt].fd_device) != type)
+				continue;
+			__invalidate_device(bdev, true);
+		}
+		mutex_unlock(&open_lock);
+	} else {
+		int oldStretch;
+
+		if (lock_fdc(drive, true))
+			return -EINTR;
+		if (cmd != FDDEFPRM) {
+			/* notice a disk change immediately, else
+			 * we lose our settings immediately*/
+			if (poll_drive(true, FD_RAW_NEED_DISK) == -EINTR)
+				return -EINTR;
+		}
+		oldStretch = g->stretch;
+		user_params[drive] = *g;
+		if (buffer_drive == drive)
+			SUPBOUND(buffer_max, user_params[drive].sect);
+		current_type[drive] = &user_params[drive];
+		floppy_sizes[drive] = user_params[drive].size;
+		if (cmd == FDDEFPRM)
+			DRS->keep_data = -1;
+		else
+			DRS->keep_data = 1;
+		/* invalidation. Invalidate only when needed, i.e.
+		 * when there are already sectors in the buffer cache
+		 * whose number will change. This is useful, because
+		 * mtools often changes the geometry of the disk after
+		 * looking at the boot block */
+		if (DRS->maxblock > user_params[drive].sect ||
+		    DRS->maxtrack ||
+		    ((user_params[drive].sect ^ oldStretch) &
+		     (FD_SWAPSIDES | FD_SECTBASEMASK)))
+			invalidate_drive(bdev);
+		else
+			process_fd_request();
+	}
+	return 0;
+}
+
+/* handle obsolete ioctl's */
+static unsigned int ioctl_table[] = {
+	FDCLRPRM,
+	FDSETPRM,
+	FDDEFPRM,
+	FDGETPRM,
+	FDMSGON,
+	FDMSGOFF,
+	FDFMTBEG,
+	FDFMTTRK,
+	FDFMTEND,
+	FDSETEMSGTRESH,
+	FDFLUSH,
+	FDSETMAXERRS,
+	FDGETMAXERRS,
+	FDGETDRVTYP,
+	FDSETDRVPRM,
+	FDGETDRVPRM,
+	FDGETDRVSTAT,
+	FDPOLLDRVSTAT,
+	FDRESET,
+	FDGETFDCSTAT,
+	FDWERRORCLR,
+	FDWERRORGET,
+	FDRAWCMD,
+	FDEJECT,
+	FDTWADDLE
+};
+
+static int normalize_ioctl(unsigned int *cmd, int *size)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(ioctl_table); i++) {
+		if ((*cmd & 0xffff) == (ioctl_table[i] & 0xffff)) {
+			*size = _IOC_SIZE(*cmd);
+			*cmd = ioctl_table[i];
+			if (*size > _IOC_SIZE(*cmd)) {
+				pr_info("ioctl not yet supported\n");
+				return -EFAULT;
+			}
+			return 0;
+		}
+	}
+	return -EINVAL;
+}
+
+static int get_floppy_geometry(int drive, int type, struct floppy_struct **g)
+{
+	if (type)
+		*g = &floppy_type[type];
+	else {
+		if (lock_fdc(drive, false))
+			return -EINTR;
+		if (poll_drive(false, 0) == -EINTR)
+			return -EINTR;
+		process_fd_request();
+		*g = current_type[drive];
+	}
+	if (!*g)
+		return -ENODEV;
+	return 0;
+}
+
+static int fd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
+{
+	int drive = (long)bdev->bd_disk->private_data;
+	int type = ITYPE(drive_state[drive].fd_device);
+	struct floppy_struct *g;
+	int ret;
+
+	ret = get_floppy_geometry(drive, type, &g);
+	if (ret)
+		return ret;
+
+	geo->heads = g->head;
+	geo->sectors = g->sect;
+	geo->cylinders = g->track;
+	return 0;
+}
+
+static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd,
+		    unsigned long param)
+{
+	int drive = (long)bdev->bd_disk->private_data;
+	int type = ITYPE(UDRS->fd_device);
+	int i;
+	int ret;
+	int size;
+	union inparam {
+		struct floppy_struct g;	/* geometry */
+		struct format_descr f;
+		struct floppy_max_errors max_errors;
+		struct floppy_drive_params dp;
+	} inparam;		/* parameters coming from user space */
+	const void *outparam;	/* parameters passed back to user space */
+
+	/* convert compatibility eject ioctls into floppy eject ioctl.
+	 * We do this in order to provide a means to eject floppy disks before
+	 * installing the new fdutils package */
+	if (cmd == CDROMEJECT ||	/* CD-ROM eject */
+	    cmd == 0x6470) {		/* SunOS floppy eject */
+		DPRINT("obsolete eject ioctl\n");
+		DPRINT("please use floppycontrol --eject\n");
+		cmd = FDEJECT;
+	}
+
+	if (!((cmd & 0xff00) == 0x0200))
+		return -EINVAL;
+
+	/* convert the old style command into a new style command */
+	ret = normalize_ioctl(&cmd, &size);
+	if (ret)
+		return ret;
+
+	/* permission checks */
+	if (((cmd & 0x40) && !(mode & (FMODE_WRITE | FMODE_WRITE_IOCTL))) ||
+	    ((cmd & 0x80) && !capable(CAP_SYS_ADMIN)))
+		return -EPERM;
+
+	if (WARN_ON(size < 0 || size > sizeof(inparam)))
+		return -EINVAL;
+
+	/* copyin */
+	memset(&inparam, 0, sizeof(inparam));
+	if (_IOC_DIR(cmd) & _IOC_WRITE) {
+		ret = fd_copyin((void __user *)param, &inparam, size);
+		if (ret)
+			return ret;
+	}
+
+	switch (cmd) {
+	case FDEJECT:
+		if (UDRS->fd_ref != 1)
+			/* somebody else has this drive open */
+			return -EBUSY;
+		if (lock_fdc(drive, true))
+			return -EINTR;
+
+		/* do the actual eject. Fails on
+		 * non-Sparc architectures */
+		ret = fd_eject(UNIT(drive));
+
+		set_bit(FD_DISK_CHANGED_BIT, &UDRS->flags);
+		set_bit(FD_VERIFY_BIT, &UDRS->flags);
+		process_fd_request();
+		return ret;
+	case FDCLRPRM:
+		if (lock_fdc(drive, true))
+			return -EINTR;
+		current_type[drive] = NULL;
+		floppy_sizes[drive] = MAX_DISK_SIZE << 1;
+		UDRS->keep_data = 0;
+		return invalidate_drive(bdev);
+	case FDSETPRM:
+	case FDDEFPRM:
+		return set_geometry(cmd, &inparam.g, drive, type, bdev);
+	case FDGETPRM:
+		ret = get_floppy_geometry(drive, type,
+					  (struct floppy_struct **)&outparam);
+		if (ret)
+			return ret;
+		break;
+	case FDMSGON:
+		UDP->flags |= FTD_MSG;
+		return 0;
+	case FDMSGOFF:
+		UDP->flags &= ~FTD_MSG;
+		return 0;
+	case FDFMTBEG:
+		if (lock_fdc(drive, true))
+			return -EINTR;
+		if (poll_drive(true, FD_RAW_NEED_DISK) == -EINTR)
+			return -EINTR;
+		ret = UDRS->flags;
+		process_fd_request();
+		if (ret & FD_VERIFY)
+			return -ENODEV;
+		if (!(ret & FD_DISK_WRITABLE))
+			return -EROFS;
+		return 0;
+	case FDFMTTRK:
+		if (UDRS->fd_ref != 1)
+			return -EBUSY;
+		return do_format(drive, &inparam.f);
+	case FDFMTEND:
+	case FDFLUSH:
+		if (lock_fdc(drive, true))
+			return -EINTR;
+		return invalidate_drive(bdev);
+	case FDSETEMSGTRESH:
+		UDP->max_errors.reporting = (unsigned short)(param & 0x0f);
+		return 0;
+	case FDGETMAXERRS:
+		outparam = &UDP->max_errors;
+		break;
+	case FDSETMAXERRS:
+		UDP->max_errors = inparam.max_errors;
+		break;
+	case FDGETDRVTYP:
+		outparam = drive_name(type, drive);
+		SUPBOUND(size, strlen((const char *)outparam) + 1);
+		break;
+	case FDSETDRVPRM:
+		*UDP = inparam.dp;
+		break;
+	case FDGETDRVPRM:
+		outparam = UDP;
+		break;
+	case FDPOLLDRVSTAT:
+		if (lock_fdc(drive, true))
+			return -EINTR;
+		if (poll_drive(true, FD_RAW_NEED_DISK) == -EINTR)
+			return -EINTR;
+		process_fd_request();
+		/* fall through */
+	case FDGETDRVSTAT:
+		outparam = UDRS;
+		break;
+	case FDRESET:
+		return user_reset_fdc(drive, (int)param, true);
+	case FDGETFDCSTAT:
+		outparam = UFDCS;
+		break;
+	case FDWERRORCLR:
+		memset(UDRWE, 0, sizeof(*UDRWE));
+		return 0;
+	case FDWERRORGET:
+		outparam = UDRWE;
+		break;
+	case FDRAWCMD:
+		if (type)
+			return -EINVAL;
+		if (lock_fdc(drive, true))
+			return -EINTR;
+		set_floppy(drive);
+		i = raw_cmd_ioctl(cmd, (void __user *)param);
+		if (i == -EINTR)
+			return -EINTR;
+		process_fd_request();
+		return i;
+	case FDTWADDLE:
+		if (lock_fdc(drive, true))
+			return -EINTR;
+		twaddle();
+		process_fd_request();
+		return 0;
+	default:
+		return -EINVAL;
+	}
+
+	if (_IOC_DIR(cmd) & _IOC_READ)
+		return fd_copyout((void __user *)param, outparam, size);
+
+	return 0;
+}
+
+static int fd_ioctl(struct block_device *bdev, fmode_t mode,
+			     unsigned int cmd, unsigned long param)
+{
+	int ret;
+
+	mutex_lock(&floppy_mutex);
+	ret = fd_locked_ioctl(bdev, mode, cmd, param);
+	mutex_unlock(&floppy_mutex);
+
+	return ret;
+}
+
+static void __init config_types(void)
+{
+	bool has_drive = false;
+	int drive;
+
+	/* read drive info out of physical CMOS */
+	drive = 0;
+	if (!UDP->cmos)
+		UDP->cmos = FLOPPY0_TYPE;
+	drive = 1;
+	if (!UDP->cmos && FLOPPY1_TYPE)
+		UDP->cmos = FLOPPY1_TYPE;
+
+	/* FIXME: additional physical CMOS drive detection should go here */
+
+	for (drive = 0; drive < N_DRIVE; drive++) {
+		unsigned int type = UDP->cmos;
+		struct floppy_drive_params *params;
+		const char *name = NULL;
+		static char temparea[32];
+
+		if (type < ARRAY_SIZE(default_drive_params)) {
+			params = &default_drive_params[type].params;
+			if (type) {
+				name = default_drive_params[type].name;
+				allowed_drive_mask |= 1 << drive;
+			} else
+				allowed_drive_mask &= ~(1 << drive);
+		} else {
+			params = &default_drive_params[0].params;
+			sprintf(temparea, "unknown type %d (usb?)", type);
+			name = temparea;
+		}
+		if (name) {
+			const char *prepend;
+			if (!has_drive) {
+				prepend = "";
+				has_drive = true;
+				pr_info("Floppy drive(s):");
+			} else {
+				prepend = ",";
+			}
+
+			pr_cont("%s fd%d is %s", prepend, drive, name);
+		}
+		*UDP = *params;
+	}
+
+	if (has_drive)
+		pr_cont("\n");
+}
+
+static int floppy_release(struct gendisk *disk, fmode_t mode)
+{
+	int drive = (long)disk->private_data;
+
+	mutex_lock(&floppy_mutex);
+	mutex_lock(&open_lock);
+	if (UDRS->fd_ref < 0)
+		UDRS->fd_ref = 0;
+	else if (!UDRS->fd_ref--) {
+		DPRINT("floppy_release with fd_ref == 0");
+		UDRS->fd_ref = 0;
+	}
+	if (!UDRS->fd_ref)
+		opened_bdev[drive] = NULL;
+	mutex_unlock(&open_lock);
+	mutex_unlock(&floppy_mutex);
+
+	return 0;
+}
+
+/*
+ * floppy_open check for aliasing (/dev/fd0 can be the same as
+ * /dev/PS0 etc), and disallows simultaneous access to the same
+ * drive with different device numbers.
+ */
+static int floppy_open(struct block_device *bdev, fmode_t mode)
+{
+	int drive = (long)bdev->bd_disk->private_data;
+	int old_dev, new_dev;
+	int try;
+	int res = -EBUSY;
+	char *tmp;
+
+	mutex_lock(&floppy_mutex);
+	mutex_lock(&open_lock);
+	old_dev = UDRS->fd_device;
+	if (opened_bdev[drive] && opened_bdev[drive] != bdev)
+		goto out2;
+
+	if (!UDRS->fd_ref && (UDP->flags & FD_BROKEN_DCL)) {
+		set_bit(FD_DISK_CHANGED_BIT, &UDRS->flags);
+		set_bit(FD_VERIFY_BIT, &UDRS->flags);
+	}
+
+	if (UDRS->fd_ref == -1 || (UDRS->fd_ref && (mode & FMODE_EXCL)))
+		goto out2;
+
+	if (mode & FMODE_EXCL)
+		UDRS->fd_ref = -1;
+	else
+		UDRS->fd_ref++;
+
+	opened_bdev[drive] = bdev;
+
+	res = -ENXIO;
+
+	if (!floppy_track_buffer) {
+		/* if opening an ED drive, reserve a big buffer,
+		 * else reserve a small one */
+		if ((UDP->cmos == 6) || (UDP->cmos == 5))
+			try = 64;	/* Only 48 actually useful */
+		else
+			try = 32;	/* Only 24 actually useful */
+
+		tmp = (char *)fd_dma_mem_alloc(1024 * try);
+		if (!tmp && !floppy_track_buffer) {
+			try >>= 1;	/* buffer only one side */
+			INFBOUND(try, 16);
+			tmp = (char *)fd_dma_mem_alloc(1024 * try);
+		}
+		if (!tmp && !floppy_track_buffer)
+			fallback_on_nodma_alloc(&tmp, 2048 * try);
+		if (!tmp && !floppy_track_buffer) {
+			DPRINT("Unable to allocate DMA memory\n");
+			goto out;
+		}
+		if (floppy_track_buffer) {
+			if (tmp)
+				fd_dma_mem_free((unsigned long)tmp, try * 1024);
+		} else {
+			buffer_min = buffer_max = -1;
+			floppy_track_buffer = tmp;
+			max_buffer_sectors = try;
+		}
+	}
+
+	new_dev = MINOR(bdev->bd_dev);
+	UDRS->fd_device = new_dev;
+	set_capacity(disks[drive], floppy_sizes[new_dev]);
+	if (old_dev != -1 && old_dev != new_dev) {
+		if (buffer_drive == drive)
+			buffer_track = -1;
+	}
+
+	if (UFDCS->rawcmd == 1)
+		UFDCS->rawcmd = 2;
+
+	if (!(mode & FMODE_NDELAY)) {
+		if (mode & (FMODE_READ|FMODE_WRITE)) {
+			UDRS->last_checked = 0;
+			check_disk_change(bdev);
+			if (test_bit(FD_DISK_CHANGED_BIT, &UDRS->flags))
+				goto out;
+		}
+		res = -EROFS;
+		if ((mode & FMODE_WRITE) &&
+		    !test_bit(FD_DISK_WRITABLE_BIT, &UDRS->flags))
+			goto out;
+	}
+	mutex_unlock(&open_lock);
+	mutex_unlock(&floppy_mutex);
+	return 0;
+out:
+	if (UDRS->fd_ref < 0)
+		UDRS->fd_ref = 0;
+	else
+		UDRS->fd_ref--;
+	if (!UDRS->fd_ref)
+		opened_bdev[drive] = NULL;
+out2:
+	mutex_unlock(&open_lock);
+	mutex_unlock(&floppy_mutex);
+	return res;
+}
+
+/*
+ * Check if the disk has been changed or if a change has been faked.
+ */
+static unsigned int floppy_check_events(struct gendisk *disk,
+					unsigned int clearing)
+{
+	int drive = (long)disk->private_data;
+
+	if (test_bit(FD_DISK_CHANGED_BIT, &UDRS->flags) ||
+	    test_bit(FD_VERIFY_BIT, &UDRS->flags))
+		return DISK_EVENT_MEDIA_CHANGE;
+
+	if (time_after(jiffies, UDRS->last_checked + UDP->checkfreq)) {
+		lock_fdc(drive, false);
+		poll_drive(false, 0);
+		process_fd_request();
+	}
+
+	if (test_bit(FD_DISK_CHANGED_BIT, &UDRS->flags) ||
+	    test_bit(FD_VERIFY_BIT, &UDRS->flags) ||
+	    test_bit(drive, &fake_change) ||
+	    drive_no_geom(drive))
+		return DISK_EVENT_MEDIA_CHANGE;
+	return 0;
+}
+
+/*
+ * This implements "read block 0" for floppy_revalidate().
+ * Needed for format autodetection, checking whether there is
+ * a disk in the drive, and whether that disk is writable.
+ */
+
+static void floppy_rb0_complete(struct bio *bio, int err)
+{
+	complete((struct completion *)bio->bi_private);
+}
+
+static int __floppy_read_block_0(struct block_device *bdev)
+{
+	struct bio bio;
+	struct bio_vec bio_vec;
+	struct completion complete;
+	struct page *page;
+	size_t size;
+
+	page = alloc_page(GFP_NOIO);
+	if (!page) {
+		process_fd_request();
+		return -ENOMEM;
+	}
+
+	size = bdev->bd_block_size;
+	if (!size)
+		size = 1024;
+
+	bio_init(&bio);
+	bio.bi_io_vec = &bio_vec;
+	bio_vec.bv_page = page;
+	bio_vec.bv_len = size;
+	bio_vec.bv_offset = 0;
+	bio.bi_vcnt = 1;
+	bio.bi_idx = 0;
+	bio.bi_size = size;
+	bio.bi_bdev = bdev;
+	bio.bi_sector = 0;
+	bio.bi_flags = BIO_QUIET;
+	init_completion(&complete);
+	bio.bi_private = &complete;
+	bio.bi_end_io = floppy_rb0_complete;
+
+	submit_bio(READ, &bio);
+	process_fd_request();
+	wait_for_completion(&complete);
+
+	__free_page(page);
+
+	return 0;
+}
+
+/* revalidate the floppy disk, i.e. trigger format autodetection by reading
+ * the bootblock (block 0). "Autodetection" is also needed to check whether
+ * there is a disk in the drive at all... Thus we also do it for fixed
+ * geometry formats */
+static int floppy_revalidate(struct gendisk *disk)
+{
+	int drive = (long)disk->private_data;
+	int cf;
+	int res = 0;
+
+	if (test_bit(FD_DISK_CHANGED_BIT, &UDRS->flags) ||
+	    test_bit(FD_VERIFY_BIT, &UDRS->flags) ||
+	    test_bit(drive, &fake_change) ||
+	    drive_no_geom(drive)) {
+		if (WARN(atomic_read(&usage_count) == 0,
+			 "VFS: revalidate called on non-open device.\n"))
+			return -EFAULT;
+
+		lock_fdc(drive, false);
+		cf = (test_bit(FD_DISK_CHANGED_BIT, &UDRS->flags) ||
+		      test_bit(FD_VERIFY_BIT, &UDRS->flags));
+		if (!(cf || test_bit(drive, &fake_change) || drive_no_geom(drive))) {
+			process_fd_request();	/*already done by another thread */
+			return 0;
+		}
+		UDRS->maxblock = 0;
+		UDRS->maxtrack = 0;
+		if (buffer_drive == drive)
+			buffer_track = -1;
+		clear_bit(drive, &fake_change);
+		clear_bit(FD_DISK_CHANGED_BIT, &UDRS->flags);
+		if (cf)
+			UDRS->generation++;
+		if (drive_no_geom(drive)) {
+			/* auto-sensing */
+			res = __floppy_read_block_0(opened_bdev[drive]);
+		} else {
+			if (cf)
+				poll_drive(false, FD_RAW_NEED_DISK);
+			process_fd_request();
+		}
+	}
+	set_capacity(disk, floppy_sizes[UDRS->fd_device]);
+	return res;
+}
+
+static const struct block_device_operations floppy_fops = {
+	.owner			= THIS_MODULE,
+	.open			= floppy_open,
+	.release		= floppy_release,
+	.ioctl			= fd_ioctl,
+	.getgeo			= fd_getgeo,
+	.check_events		= floppy_check_events,
+	.revalidate_disk	= floppy_revalidate,
+};
+
+/*
+ * Floppy Driver initialization
+ * =============================
+ */
+
+/* Determine the floppy disk controller type */
+/* This routine was written by David C. Niemi */
+static char __init get_fdc_version(void)
+{
+	int r;
+
+	output_byte(FD_DUMPREGS);	/* 82072 and better know DUMPREGS */
+	if (FDCS->reset)
+		return FDC_NONE;
+	r = result();
+	if (r <= 0x00)
+		return FDC_NONE;	/* No FDC present ??? */
+	if ((r == 1) && (reply_buffer[0] == 0x80)) {
+		pr_info("FDC %d is an 8272A\n", fdc);
+		return FDC_8272A;	/* 8272a/765 don't know DUMPREGS */
+	}
+	if (r != 10) {
+		pr_info("FDC %d init: DUMPREGS: unexpected return of %d bytes.\n",
+			fdc, r);
+		return FDC_UNKNOWN;
+	}
+
+	if (!fdc_configure()) {
+		pr_info("FDC %d is an 82072\n", fdc);
+		return FDC_82072;	/* 82072 doesn't know CONFIGURE */
+	}
+
+	output_byte(FD_PERPENDICULAR);
+	if (need_more_output() == MORE_OUTPUT) {
+		output_byte(0);
+	} else {
+		pr_info("FDC %d is an 82072A\n", fdc);
+		return FDC_82072A;	/* 82072A as found on Sparcs. */
+	}
+
+	output_byte(FD_UNLOCK);
+	r = result();
+	if ((r == 1) && (reply_buffer[0] == 0x80)) {
+		pr_info("FDC %d is a pre-1991 82077\n", fdc);
+		return FDC_82077_ORIG;	/* Pre-1991 82077, doesn't know
+					 * LOCK/UNLOCK */
+	}
+	if ((r != 1) || (reply_buffer[0] != 0x00)) {
+		pr_info("FDC %d init: UNLOCK: unexpected return of %d bytes.\n",
+			fdc, r);
+		return FDC_UNKNOWN;
+	}
+	output_byte(FD_PARTID);
+	r = result();
+	if (r != 1) {
+		pr_info("FDC %d init: PARTID: unexpected return of %d bytes.\n",
+			fdc, r);
+		return FDC_UNKNOWN;
+	}
+	if (reply_buffer[0] == 0x80) {
+		pr_info("FDC %d is a post-1991 82077\n", fdc);
+		return FDC_82077;	/* Revised 82077AA passes all the tests */
+	}
+	switch (reply_buffer[0] >> 5) {
+	case 0x0:
+		/* Either a 82078-1 or a 82078SL running at 5Volt */
+		pr_info("FDC %d is an 82078.\n", fdc);
+		return FDC_82078;
+	case 0x1:
+		pr_info("FDC %d is a 44pin 82078\n", fdc);
+		return FDC_82078;
+	case 0x2:
+		pr_info("FDC %d is a S82078B\n", fdc);
+		return FDC_S82078B;
+	case 0x3:
+		pr_info("FDC %d is a National Semiconductor PC87306\n", fdc);
+		return FDC_87306;
+	default:
+		pr_info("FDC %d init: 82078 variant with unknown PARTID=%d.\n",
+			fdc, reply_buffer[0] >> 5);
+		return FDC_82078_UNKN;
+	}
+}				/* get_fdc_version */
+
+/* lilo configuration */
+
+static void __init floppy_set_flags(int *ints, int param, int param2)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(default_drive_params); i++) {
+		if (param)
+			default_drive_params[i].params.flags |= param2;
+		else
+			default_drive_params[i].params.flags &= ~param2;
+	}
+	DPRINT("%s flag 0x%x\n", param2 ? "Setting" : "Clearing", param);
+}
+
+static void __init daring(int *ints, int param, int param2)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(default_drive_params); i++) {
+		if (param) {
+			default_drive_params[i].params.select_delay = 0;
+			default_drive_params[i].params.flags |=
+			    FD_SILENT_DCL_CLEAR;
+		} else {
+			default_drive_params[i].params.select_delay =
+			    2 * HZ / 100;
+			default_drive_params[i].params.flags &=
+			    ~FD_SILENT_DCL_CLEAR;
+		}
+	}
+	DPRINT("Assuming %s floppy hardware\n", param ? "standard" : "broken");
+}
+
+static void __init set_cmos(int *ints, int dummy, int dummy2)
+{
+	int current_drive = 0;
+
+	if (ints[0] != 2) {
+		DPRINT("wrong number of parameters for CMOS\n");
+		return;
+	}
+	current_drive = ints[1];
+	if (current_drive < 0 || current_drive >= 8) {
+		DPRINT("bad drive for set_cmos\n");
+		return;
+	}
+#if N_FDC > 1
+	if (current_drive >= 4 && !FDC2)
+		FDC2 = 0x370;
+#endif
+	DP->cmos = ints[2];
+	DPRINT("setting CMOS code to %d\n", ints[2]);
+}
+
+static struct param_table {
+	const char *name;
+	void (*fn) (int *ints, int param, int param2);
+	int *var;
+	int def_param;
+	int param2;
+} config_params[] __initdata = {
+	{"allowed_drive_mask", NULL, &allowed_drive_mask, 0xff, 0}, /* obsolete */
+	{"all_drives", NULL, &allowed_drive_mask, 0xff, 0},	/* obsolete */
+	{"asus_pci", NULL, &allowed_drive_mask, 0x33, 0},
+	{"irq", NULL, &FLOPPY_IRQ, 6, 0},
+	{"dma", NULL, &FLOPPY_DMA, 2, 0},
+	{"daring", daring, NULL, 1, 0},
+#if N_FDC > 1
+	{"two_fdc", NULL, &FDC2, 0x370, 0},
+	{"one_fdc", NULL, &FDC2, 0, 0},
+#endif
+	{"thinkpad", floppy_set_flags, NULL, 1, FD_INVERTED_DCL},
+	{"broken_dcl", floppy_set_flags, NULL, 1, FD_BROKEN_DCL},
+	{"messages", floppy_set_flags, NULL, 1, FTD_MSG},
+	{"silent_dcl_clear", floppy_set_flags, NULL, 1, FD_SILENT_DCL_CLEAR},
+	{"debug", floppy_set_flags, NULL, 1, FD_DEBUG},
+	{"nodma", NULL, &can_use_virtual_dma, 1, 0},
+	{"omnibook", NULL, &can_use_virtual_dma, 1, 0},
+	{"yesdma", NULL, &can_use_virtual_dma, 0, 0},
+	{"fifo_depth", NULL, &fifo_depth, 0xa, 0},
+	{"nofifo", NULL, &no_fifo, 0x20, 0},
+	{"usefifo", NULL, &no_fifo, 0, 0},
+	{"cmos", set_cmos, NULL, 0, 0},
+	{"slow", NULL, &slow_floppy, 1, 0},
+	{"unexpected_interrupts", NULL, &print_unex, 1, 0},
+	{"no_unexpected_interrupts", NULL, &print_unex, 0, 0},
+	{"L40SX", NULL, &print_unex, 0, 0}
+
+	EXTRA_FLOPPY_PARAMS
+};
+
+static int __init floppy_setup(char *str)
+{
+	int i;
+	int param;
+	int ints[11];
+
+	str = get_options(str, ARRAY_SIZE(ints), ints);
+	if (str) {
+		for (i = 0; i < ARRAY_SIZE(config_params); i++) {
+			if (strcmp(str, config_params[i].name) == 0) {
+				if (ints[0])
+					param = ints[1];
+				else
+					param = config_params[i].def_param;
+				if (config_params[i].fn)
+					config_params[i].fn(ints, param,
+							    config_params[i].
+							    param2);
+				if (config_params[i].var) {
+					DPRINT("%s=%d\n", str, param);
+					*config_params[i].var = param;
+				}
+				return 1;
+			}
+		}
+	}
+	if (str) {
+		DPRINT("unknown floppy option [%s]\n", str);
+
+		DPRINT("allowed options are:");
+		for (i = 0; i < ARRAY_SIZE(config_params); i++)
+			pr_cont(" %s", config_params[i].name);
+		pr_cont("\n");
+	} else
+		DPRINT("botched floppy option\n");
+	DPRINT("Read Documentation/blockdev/floppy.txt\n");
+	return 0;
+}
+
+static int have_no_fdc = -ENODEV;
+
+static ssize_t floppy_cmos_show(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	struct platform_device *p = to_platform_device(dev);
+	int drive;
+
+	drive = p->id;
+	return sprintf(buf, "%X\n", UDP->cmos);
+}
+
+static DEVICE_ATTR(cmos, S_IRUGO, floppy_cmos_show, NULL);
+
+static void floppy_device_release(struct device *dev)
+{
+}
+
+static int floppy_resume(struct device *dev)
+{
+	int fdc;
+
+	for (fdc = 0; fdc < N_FDC; fdc++)
+		if (FDCS->address != -1)
+			user_reset_fdc(-1, FD_RESET_ALWAYS, false);
+
+	return 0;
+}
+
+static const struct dev_pm_ops floppy_pm_ops = {
+	.resume = floppy_resume,
+	.restore = floppy_resume,
+};
+
+static struct platform_driver floppy_driver = {
+	.driver = {
+		   .name = "floppy",
+		   .pm = &floppy_pm_ops,
+	},
+};
+
+static struct platform_device floppy_device[N_DRIVE];
+
+static struct kobject *floppy_find(dev_t dev, int *part, void *data)
+{
+	int drive = (*part & 3) | ((*part & 0x80) >> 5);
+	if (drive >= N_DRIVE ||
+	    !(allowed_drive_mask & (1 << drive)) ||
+	    fdc_state[FDC(drive)].version == FDC_NONE)
+		return NULL;
+	if (((*part >> 2) & 0x1f) >= ARRAY_SIZE(floppy_type))
+		return NULL;
+	*part = 0;
+	return get_disk(disks[drive]);
+}
+
+static int __init floppy_init(void)
+{
+	int i, unit, drive;
+	int err, dr;
+
+	set_debugt();
+	interruptjiffies = resultjiffies = jiffies;
+
+#if defined(CONFIG_PPC)
+	if (check_legacy_ioport(FDC1))
+		return -ENODEV;
+#endif
+
+	raw_cmd = NULL;
+
+	for (dr = 0; dr < N_DRIVE; dr++) {
+		disks[dr] = alloc_disk(1);
+		if (!disks[dr]) {
+			err = -ENOMEM;
+			goto out_put_disk;
+		}
+
+		disks[dr]->queue = blk_init_queue(do_fd_request, &floppy_lock);
+		if (!disks[dr]->queue) {
+			err = -ENOMEM;
+			goto out_put_disk;
+		}
+
+		blk_queue_max_hw_sectors(disks[dr]->queue, 64);
+		disks[dr]->major = FLOPPY_MAJOR;
+		disks[dr]->first_minor = TOMINOR(dr);
+		disks[dr]->fops = &floppy_fops;
+		sprintf(disks[dr]->disk_name, "fd%d", dr);
+
+		init_timer(&motor_off_timer[dr]);
+		motor_off_timer[dr].data = dr;
+		motor_off_timer[dr].function = motor_off_callback;
+	}
+
+	err = register_blkdev(FLOPPY_MAJOR, "fd");
+	if (err)
+		goto out_put_disk;
+
+	err = platform_driver_register(&floppy_driver);
+	if (err)
+		goto out_unreg_blkdev;
+
+	blk_register_region(MKDEV(FLOPPY_MAJOR, 0), 256, THIS_MODULE,
+			    floppy_find, NULL, NULL);
+
+	for (i = 0; i < 256; i++)
+		if (ITYPE(i))
+			floppy_sizes[i] = floppy_type[ITYPE(i)].size;
+		else
+			floppy_sizes[i] = MAX_DISK_SIZE << 1;
+
+	reschedule_timeout(MAXTIMEOUT, "floppy init");
+	config_types();
+
+	for (i = 0; i < N_FDC; i++) {
+		fdc = i;
+		memset(FDCS, 0, sizeof(*FDCS));
+		FDCS->dtr = -1;
+		FDCS->dor = 0x4;
+#if defined(__sparc__) || defined(__mc68000__)
+	/*sparcs/sun3x don't have a DOR reset which we can fall back on to */
+#ifdef __mc68000__
+		if (MACH_IS_SUN3X)
+#endif
+			FDCS->version = FDC_82072A;
+#endif
+	}
+
+	use_virtual_dma = can_use_virtual_dma & 1;
+	fdc_state[0].address = FDC1;
+	if (fdc_state[0].address == -1) {
+		del_timer_sync(&fd_timeout);
+		err = -ENODEV;
+		goto out_unreg_region;
+	}
+#if N_FDC > 1
+	fdc_state[1].address = FDC2;
+#endif
+
+	fdc = 0;		/* reset fdc in case of unexpected interrupt */
+	err = floppy_grab_irq_and_dma();
+	if (err) {
+		del_timer_sync(&fd_timeout);
+		err = -EBUSY;
+		goto out_unreg_region;
+	}
+
+	/* initialise drive state */
+	for (drive = 0; drive < N_DRIVE; drive++) {
+		memset(UDRS, 0, sizeof(*UDRS));
+		memset(UDRWE, 0, sizeof(*UDRWE));
+		set_bit(FD_DISK_NEWCHANGE_BIT, &UDRS->flags);
+		set_bit(FD_DISK_CHANGED_BIT, &UDRS->flags);
+		set_bit(FD_VERIFY_BIT, &UDRS->flags);
+		UDRS->fd_device = -1;
+		floppy_track_buffer = NULL;
+		max_buffer_sectors = 0;
+	}
+	/*
+	 * Small 10 msec delay to let through any interrupt that
+	 * initialization might have triggered, to not
+	 * confuse detection:
+	 */
+	msleep(10);
+
+	for (i = 0; i < N_FDC; i++) {
+		fdc = i;
+		FDCS->driver_version = FD_DRIVER_VERSION;
+		for (unit = 0; unit < 4; unit++)
+			FDCS->track[unit] = 0;
+		if (FDCS->address == -1)
+			continue;
+		FDCS->rawcmd = 2;
+		if (user_reset_fdc(-1, FD_RESET_ALWAYS, false)) {
+			/* free ioports reserved by floppy_grab_irq_and_dma() */
+			floppy_release_regions(fdc);
+			FDCS->address = -1;
+			FDCS->version = FDC_NONE;
+			continue;
+		}
+		/* Try to determine the floppy controller type */
+		FDCS->version = get_fdc_version();
+		if (FDCS->version == FDC_NONE) {
+			/* free ioports reserved by floppy_grab_irq_and_dma() */
+			floppy_release_regions(fdc);
+			FDCS->address = -1;
+			continue;
+		}
+		if (can_use_virtual_dma == 2 && FDCS->version < FDC_82072A)
+			can_use_virtual_dma = 0;
+
+		have_no_fdc = 0;
+		/* Not all FDCs seem to be able to handle the version command
+		 * properly, so force a reset for the standard FDC clones,
+		 * to avoid interrupt garbage.
+		 */
+		user_reset_fdc(-1, FD_RESET_ALWAYS, false);
+	}
+	fdc = 0;
+	del_timer_sync(&fd_timeout);
+	current_drive = 0;
+	initialized = true;
+	if (have_no_fdc) {
+		DPRINT("no floppy controllers found\n");
+		err = have_no_fdc;
+		goto out_flush_work;
+	}
+
+	for (drive = 0; drive < N_DRIVE; drive++) {
+		if (!(allowed_drive_mask & (1 << drive)))
+			continue;
+		if (fdc_state[FDC(drive)].version == FDC_NONE)
+			continue;
+
+		floppy_device[drive].name = floppy_device_name;
+		floppy_device[drive].id = drive;
+		floppy_device[drive].dev.release = floppy_device_release;
+
+		err = platform_device_register(&floppy_device[drive]);
+		if (err)
+			goto out_flush_work;
+
+		err = device_create_file(&floppy_device[drive].dev,
+					 &dev_attr_cmos);
+		if (err)
+			goto out_unreg_platform_dev;
+
+		/* to be cleaned up... */
+		disks[drive]->private_data = (void *)(long)drive;
+		disks[drive]->flags |= GENHD_FL_REMOVABLE;
+		disks[drive]->driverfs_dev = &floppy_device[drive].dev;
+		add_disk(disks[drive]);
+	}
+
+	return 0;
+
+out_unreg_platform_dev:
+	platform_device_unregister(&floppy_device[drive]);
+out_flush_work:
+	flush_work_sync(&floppy_work);
+	if (atomic_read(&usage_count))
+		floppy_release_irq_and_dma();
+out_unreg_region:
+	blk_unregister_region(MKDEV(FLOPPY_MAJOR, 0), 256);
+	platform_driver_unregister(&floppy_driver);
+out_unreg_blkdev:
+	unregister_blkdev(FLOPPY_MAJOR, "fd");
+out_put_disk:
+	while (dr--) {
+		del_timer_sync(&motor_off_timer[dr]);
+		if (disks[dr]->queue)
+			blk_cleanup_queue(disks[dr]->queue);
+		put_disk(disks[dr]);
+	}
+	return err;
+}
+
+static const struct io_region {
+	int offset;
+	int size;
+} io_regions[] = {
+	{ 2, 1 },
+	/* address + 3 is sometimes reserved by pnp bios for motherboard */
+	{ 4, 2 },
+	/* address + 6 is reserved, and may be taken by IDE.
+	 * Unfortunately, Adaptec doesn't know this :-(, */
+	{ 7, 1 },
+};
+
+static void floppy_release_allocated_regions(int fdc, const struct io_region *p)
+{
+	while (p != io_regions) {
+		p--;
+		release_region(FDCS->address + p->offset, p->size);
+	}
+}
+
+#define ARRAY_END(X) (&((X)[ARRAY_SIZE(X)]))
+
+static int floppy_request_regions(int fdc)
+{
+	const struct io_region *p;
+
+	for (p = io_regions; p < ARRAY_END(io_regions); p++) {
+		if (!request_region(FDCS->address + p->offset,
+				    p->size, "floppy")) {
+			DPRINT("Floppy io-port 0x%04lx in use\n",
+			       FDCS->address + p->offset);
+			floppy_release_allocated_regions(fdc, p);
+			return -EBUSY;
+		}
+	}
+	return 0;
+}
+
+static void floppy_release_regions(int fdc)
+{
+	floppy_release_allocated_regions(fdc, ARRAY_END(io_regions));
+}
+
+static int floppy_grab_irq_and_dma(void)
+{
+	if (atomic_inc_return(&usage_count) > 1)
+		return 0;
+
+	/*
+	 * We might have scheduled a free_irq(), wait it to
+	 * drain first:
+	 */
+	flush_work_sync(&floppy_work);
+
+	if (fd_request_irq()) {
+		DPRINT("Unable to grab IRQ%d for the floppy driver\n",
+		       FLOPPY_IRQ);
+		atomic_dec(&usage_count);
+		return -1;
+	}
+	if (fd_request_dma()) {
+		DPRINT("Unable to grab DMA%d for the floppy driver\n",
+		       FLOPPY_DMA);
+		if (can_use_virtual_dma & 2)
+			use_virtual_dma = can_use_virtual_dma = 1;
+		if (!(can_use_virtual_dma & 1)) {
+			fd_free_irq();
+			atomic_dec(&usage_count);
+			return -1;
+		}
+	}
+
+	for (fdc = 0; fdc < N_FDC; fdc++) {
+		if (FDCS->address != -1) {
+			if (floppy_request_regions(fdc))
+				goto cleanup;
+		}
+	}
+	for (fdc = 0; fdc < N_FDC; fdc++) {
+		if (FDCS->address != -1) {
+			reset_fdc_info(1);
+			fd_outb(FDCS->dor, FD_DOR);
+		}
+	}
+	fdc = 0;
+	set_dor(0, ~0, 8);	/* avoid immediate interrupt */
+
+	for (fdc = 0; fdc < N_FDC; fdc++)
+		if (FDCS->address != -1)
+			fd_outb(FDCS->dor, FD_DOR);
+	/*
+	 * The driver will try and free resources and relies on us
+	 * to know if they were allocated or not.
+	 */
+	fdc = 0;
+	irqdma_allocated = 1;
+	return 0;
+cleanup:
+	fd_free_irq();
+	fd_free_dma();
+	while (--fdc >= 0)
+		floppy_release_regions(fdc);
+	atomic_dec(&usage_count);
+	return -1;
+}
+
+static void floppy_release_irq_and_dma(void)
+{
+	int old_fdc;
+#ifndef __sparc__
+	int drive;
+#endif
+	long tmpsize;
+	unsigned long tmpaddr;
+
+	if (!atomic_dec_and_test(&usage_count))
+		return;
+
+	if (irqdma_allocated) {
+		fd_disable_dma();
+		fd_free_dma();
+		fd_free_irq();
+		irqdma_allocated = 0;
+	}
+	set_dor(0, ~0, 8);
+#if N_FDC > 1
+	set_dor(1, ~8, 0);
+#endif
+	floppy_enable_hlt();
+
+	if (floppy_track_buffer && max_buffer_sectors) {
+		tmpsize = max_buffer_sectors * 1024;
+		tmpaddr = (unsigned long)floppy_track_buffer;
+		floppy_track_buffer = NULL;
+		max_buffer_sectors = 0;
+		buffer_min = buffer_max = -1;
+		fd_dma_mem_free(tmpaddr, tmpsize);
+	}
+#ifndef __sparc__
+	for (drive = 0; drive < N_FDC * 4; drive++)
+		if (timer_pending(motor_off_timer + drive))
+			pr_info("motor off timer %d still active\n", drive);
+#endif
+
+	if (timer_pending(&fd_timeout))
+		pr_info("floppy timer still active:%s\n", timeout_message);
+	if (timer_pending(&fd_timer))
+		pr_info("auxiliary floppy timer still active\n");
+	if (work_pending(&floppy_work))
+		pr_info("work still pending\n");
+	old_fdc = fdc;
+	for (fdc = 0; fdc < N_FDC; fdc++)
+		if (FDCS->address != -1)
+			floppy_release_regions(fdc);
+	fdc = old_fdc;
+}
+
+#ifdef MODULE
+
+static char *floppy;
+
+static void __init parse_floppy_cfg_string(char *cfg)
+{
+	char *ptr;
+
+	while (*cfg) {
+		ptr = cfg;
+		while (*cfg && *cfg != ' ' && *cfg != '\t')
+			cfg++;
+		if (*cfg) {
+			*cfg = '\0';
+			cfg++;
+		}
+		if (*ptr)
+			floppy_setup(ptr);
+	}
+}
+
+static int __init floppy_module_init(void)
+{
+	if (floppy)
+		parse_floppy_cfg_string(floppy);
+	return floppy_init();
+}
+module_init(floppy_module_init);
+
+static void __exit floppy_module_exit(void)
+{
+	int drive;
+
+	blk_unregister_region(MKDEV(FLOPPY_MAJOR, 0), 256);
+	unregister_blkdev(FLOPPY_MAJOR, "fd");
+	platform_driver_unregister(&floppy_driver);
+
+	for (drive = 0; drive < N_DRIVE; drive++) {
+		del_timer_sync(&motor_off_timer[drive]);
+
+		if ((allowed_drive_mask & (1 << drive)) &&
+		    fdc_state[FDC(drive)].version != FDC_NONE) {
+			del_gendisk(disks[drive]);
+			device_remove_file(&floppy_device[drive].dev, &dev_attr_cmos);
+			platform_device_unregister(&floppy_device[drive]);
+		}
+		blk_cleanup_queue(disks[drive]->queue);
+		put_disk(disks[drive]);
+	}
+
+	del_timer_sync(&fd_timeout);
+	del_timer_sync(&fd_timer);
+
+	if (atomic_read(&usage_count))
+		floppy_release_irq_and_dma();
+
+	/* eject disk, if any */
+	fd_eject(0);
+}
+
+module_exit(floppy_module_exit);
+
+module_param(floppy, charp, 0);
+module_param(FLOPPY_IRQ, int, 0);
+module_param(FLOPPY_DMA, int, 0);
+MODULE_AUTHOR("Alain L. Knaff");
+MODULE_SUPPORTED_DEVICE("fd");
+MODULE_LICENSE("GPL");
+
+/* This doesn't actually get used other than for module information */
+static const struct pnp_device_id floppy_pnpids[] = {
+	{"PNP0700", 0},
+	{}
+};
+
+MODULE_DEVICE_TABLE(pnp, floppy_pnpids);
+
+#else
+
+__setup("floppy=", floppy_setup);
+module_init(floppy_init)
+#endif
+
+MODULE_ALIAS_BLOCKDEV_MAJOR(FLOPPY_MAJOR);
diff --git a/drivers/block/hd.c b/drivers/block/hd.c
new file mode 100644
index 00000000..007c6309
--- /dev/null
+++ b/drivers/block/hd.c
@@ -0,0 +1,815 @@
+/*
+ *  Copyright (C) 1991, 1992  Linus Torvalds
+ *
+ * This is the low-level hd interrupt support. It traverses the
+ * request-list, using interrupts to jump between functions. As
+ * all the functions are called within interrupts, we may not
+ * sleep. Special care is recommended.
+ *
+ *  modified by Drew Eckhardt to check nr of hd's from the CMOS.
+ *
+ *  Thanks to Branko Lankester, lankeste@fwi.uva.nl, who found a bug
+ *  in the early extended-partition checks and added DM partitions
+ *
+ *  IRQ-unmask, drive-id, multiple-mode, support for ">16 heads",
+ *  and general streamlining by Mark Lord.
+ *
+ *  Removed 99% of above. Use Mark's ide driver for those options.
+ *  This is now a lightweight ST-506 driver. (Paul Gortmaker)
+ *
+ *  Modified 1995 Russell King for ARM processor.
+ *
+ *  Bugfix: max_sectors must be <= 255 or the wheels tend to come
+ *  off in a hurry once you queue things up - Paul G. 02/2001
+ */
+
+/* Uncomment the following if you want verbose error reports. */
+/* #define VERBOSE_ERRORS */
+
+#include <linux/blkdev.h>
+#include <linux/errno.h>
+#include <linux/signal.h>
+#include <linux/interrupt.h>
+#include <linux/timer.h>
+#include <linux/fs.h>
+#include <linux/kernel.h>
+#include <linux/genhd.h>
+#include <linux/string.h>
+#include <linux/ioport.h>
+#include <linux/init.h>
+#include <linux/blkpg.h>
+#include <linux/ata.h>
+#include <linux/hdreg.h>
+
+#define HD_IRQ 14
+
+#define REALLY_SLOW_IO
+#include <asm/system.h>
+#include <asm/io.h>
+#include <asm/uaccess.h>
+
+#ifdef __arm__
+#undef  HD_IRQ
+#endif
+#include <asm/irq.h>
+#ifdef __arm__
+#define HD_IRQ IRQ_HARDDISK
+#endif
+
+/* Hd controller regster ports */
+
+#define HD_DATA		0x1f0		/* _CTL when writing */
+#define HD_ERROR	0x1f1		/* see err-bits */
+#define HD_NSECTOR	0x1f2		/* nr of sectors to read/write */
+#define HD_SECTOR	0x1f3		/* starting sector */
+#define HD_LCYL		0x1f4		/* starting cylinder */
+#define HD_HCYL		0x1f5		/* high byte of starting cyl */
+#define HD_CURRENT	0x1f6		/* 101dhhhh , d=drive, hhhh=head */
+#define HD_STATUS	0x1f7		/* see status-bits */
+#define HD_FEATURE	HD_ERROR	/* same io address, read=error, write=feature */
+#define HD_PRECOMP	HD_FEATURE	/* obsolete use of this port - predates IDE */
+#define HD_COMMAND	HD_STATUS	/* same io address, read=status, write=cmd */
+
+#define HD_CMD		0x3f6		/* used for resets */
+#define HD_ALTSTATUS	0x3f6		/* same as HD_STATUS but doesn't clear irq */
+
+/* Bits of HD_STATUS */
+#define ERR_STAT		0x01
+#define INDEX_STAT		0x02
+#define ECC_STAT		0x04	/* Corrected error */
+#define DRQ_STAT		0x08
+#define SEEK_STAT		0x10
+#define SERVICE_STAT		SEEK_STAT
+#define WRERR_STAT		0x20
+#define READY_STAT		0x40
+#define BUSY_STAT		0x80
+
+/* Bits for HD_ERROR */
+#define MARK_ERR		0x01	/* Bad address mark */
+#define TRK0_ERR		0x02	/* couldn't find track 0 */
+#define ABRT_ERR		0x04	/* Command aborted */
+#define MCR_ERR			0x08	/* media change request */
+#define ID_ERR			0x10	/* ID field not found */
+#define MC_ERR			0x20	/* media changed */
+#define ECC_ERR			0x40	/* Uncorrectable ECC error */
+#define BBD_ERR			0x80	/* pre-EIDE meaning:  block marked bad */
+#define ICRC_ERR		0x80	/* new meaning:  CRC error during transfer */
+
+static DEFINE_SPINLOCK(hd_lock);
+static struct request_queue *hd_queue;
+static struct request *hd_req;
+
+#define TIMEOUT_VALUE	(6*HZ)
+#define	HD_DELAY	0
+
+#define MAX_ERRORS     16	/* Max read/write errors/sector */
+#define RESET_FREQ      8	/* Reset controller every 8th retry */
+#define RECAL_FREQ      4	/* Recalibrate every 4th retry */
+#define MAX_HD		2
+
+#define STAT_OK		(READY_STAT|SEEK_STAT)
+#define OK_STATUS(s)	(((s)&(STAT_OK|(BUSY_STAT|WRERR_STAT|ERR_STAT)))==STAT_OK)
+
+static void recal_intr(void);
+static void bad_rw_intr(void);
+
+static int reset;
+static int hd_error;
+
+/*
+ *  This struct defines the HD's and their types.
+ */
+struct hd_i_struct {
+	unsigned int head, sect, cyl, wpcom, lzone, ctl;
+	int unit;
+	int recalibrate;
+	int special_op;
+};
+
+#ifdef HD_TYPE
+static struct hd_i_struct hd_info[] = { HD_TYPE };
+static int NR_HD = ARRAY_SIZE(hd_info);
+#else
+static struct hd_i_struct hd_info[MAX_HD];
+static int NR_HD;
+#endif
+
+static struct gendisk *hd_gendisk[MAX_HD];
+
+static struct timer_list device_timer;
+
+#define TIMEOUT_VALUE (6*HZ)
+
+#define SET_TIMER							\
+	do {								\
+		mod_timer(&device_timer, jiffies + TIMEOUT_VALUE);	\
+	} while (0)
+
+static void (*do_hd)(void) = NULL;
+#define SET_HANDLER(x) \
+if ((do_hd = (x)) != NULL) \
+	SET_TIMER; \
+else \
+	del_timer(&device_timer);
+
+
+#if (HD_DELAY > 0)
+
+#include <asm/i8253.h>
+
+unsigned long last_req;
+
+unsigned long read_timer(void)
+{
+	unsigned long t, flags;
+	int i;
+
+	raw_spin_lock_irqsave(&i8253_lock, flags);
+	t = jiffies * 11932;
+	outb_p(0, 0x43);
+	i = inb_p(0x40);
+	i |= inb(0x40) << 8;
+	raw_spin_unlock_irqrestore(&i8253_lock, flags);
+	return(t - i);
+}
+#endif
+
+static void __init hd_setup(char *str, int *ints)
+{
+	int hdind = 0;
+
+	if (ints[0] != 3)
+		return;
+	if (hd_info[0].head != 0)
+		hdind = 1;
+	hd_info[hdind].head = ints[2];
+	hd_info[hdind].sect = ints[3];
+	hd_info[hdind].cyl = ints[1];
+	hd_info[hdind].wpcom = 0;
+	hd_info[hdind].lzone = ints[1];
+	hd_info[hdind].ctl = (ints[2] > 8 ? 8 : 0);
+	NR_HD = hdind+1;
+}
+
+static bool hd_end_request(int err, unsigned int bytes)
+{
+	if (__blk_end_request(hd_req, err, bytes))
+		return true;
+	hd_req = NULL;
+	return false;
+}
+
+static bool hd_end_request_cur(int err)
+{
+	return hd_end_request(err, blk_rq_cur_bytes(hd_req));
+}
+
+static void dump_status(const char *msg, unsigned int stat)
+{
+	char *name = "hd?";
+	if (hd_req)
+		name = hd_req->rq_disk->disk_name;
+
+#ifdef VERBOSE_ERRORS
+	printk("%s: %s: status=0x%02x { ", name, msg, stat & 0xff);
+	if (stat & BUSY_STAT)	printk("Busy ");
+	if (stat & READY_STAT)	printk("DriveReady ");
+	if (stat & WRERR_STAT)	printk("WriteFault ");
+	if (stat & SEEK_STAT)	printk("SeekComplete ");
+	if (stat & DRQ_STAT)	printk("DataRequest ");
+	if (stat & ECC_STAT)	printk("CorrectedError ");
+	if (stat & INDEX_STAT)	printk("Index ");
+	if (stat & ERR_STAT)	printk("Error ");
+	printk("}\n");
+	if ((stat & ERR_STAT) == 0) {
+		hd_error = 0;
+	} else {
+		hd_error = inb(HD_ERROR);
+		printk("%s: %s: error=0x%02x { ", name, msg, hd_error & 0xff);
+		if (hd_error & BBD_ERR)		printk("BadSector ");
+		if (hd_error & ECC_ERR)		printk("UncorrectableError ");
+		if (hd_error & ID_ERR)		printk("SectorIdNotFound ");
+		if (hd_error & ABRT_ERR)	printk("DriveStatusError ");
+		if (hd_error & TRK0_ERR)	printk("TrackZeroNotFound ");
+		if (hd_error & MARK_ERR)	printk("AddrMarkNotFound ");
+		printk("}");
+		if (hd_error & (BBD_ERR|ECC_ERR|ID_ERR|MARK_ERR)) {
+			printk(", CHS=%d/%d/%d", (inb(HD_HCYL)<<8) + inb(HD_LCYL),
+				inb(HD_CURRENT) & 0xf, inb(HD_SECTOR));
+			if (hd_req)
+				printk(", sector=%ld", blk_rq_pos(hd_req));
+		}
+		printk("\n");
+	}
+#else
+	printk("%s: %s: status=0x%02x.\n", name, msg, stat & 0xff);
+	if ((stat & ERR_STAT) == 0) {
+		hd_error = 0;
+	} else {
+		hd_error = inb(HD_ERROR);
+		printk("%s: %s: error=0x%02x.\n", name, msg, hd_error & 0xff);
+	}
+#endif
+}
+
+static void check_status(void)
+{
+	int i = inb_p(HD_STATUS);
+
+	if (!OK_STATUS(i)) {
+		dump_status("check_status", i);
+		bad_rw_intr();
+	}
+}
+
+static int controller_busy(void)
+{
+	int retries = 100000;
+	unsigned char status;
+
+	do {
+		status = inb_p(HD_STATUS);
+	} while ((status & BUSY_STAT) && --retries);
+	return status;
+}
+
+static int status_ok(void)
+{
+	unsigned char status = inb_p(HD_STATUS);
+
+	if (status & BUSY_STAT)
+		return 1;	/* Ancient, but does it make sense??? */
+	if (status & WRERR_STAT)
+		return 0;
+	if (!(status & READY_STAT))
+		return 0;
+	if (!(status & SEEK_STAT))
+		return 0;
+	return 1;
+}
+
+static int controller_ready(unsigned int drive, unsigned int head)
+{
+	int retry = 100;
+
+	do {
+		if (controller_busy() & BUSY_STAT)
+			return 0;
+		outb_p(0xA0 | (drive<<4) | head, HD_CURRENT);
+		if (status_ok())
+			return 1;
+	} while (--retry);
+	return 0;
+}
+
+static void hd_out(struct hd_i_struct *disk,
+		   unsigned int nsect,
+		   unsigned int sect,
+		   unsigned int head,
+		   unsigned int cyl,
+		   unsigned int cmd,
+		   void (*intr_addr)(void))
+{
+	unsigned short port;
+
+#if (HD_DELAY > 0)
+	while (read_timer() - last_req < HD_DELAY)
+		/* nothing */;
+#endif
+	if (reset)
+		return;
+	if (!controller_ready(disk->unit, head)) {
+		reset = 1;
+		return;
+	}
+	SET_HANDLER(intr_addr);
+	outb_p(disk->ctl, HD_CMD);
+	port = HD_DATA;
+	outb_p(disk->wpcom >> 2, ++port);
+	outb_p(nsect, ++port);
+	outb_p(sect, ++port);
+	outb_p(cyl, ++port);
+	outb_p(cyl >> 8, ++port);
+	outb_p(0xA0 | (disk->unit << 4) | head, ++port);
+	outb_p(cmd, ++port);
+}
+
+static void hd_request (void);
+
+static int drive_busy(void)
+{
+	unsigned int i;
+	unsigned char c;
+
+	for (i = 0; i < 500000 ; i++) {
+		c = inb_p(HD_STATUS);
+		if ((c & (BUSY_STAT | READY_STAT | SEEK_STAT)) == STAT_OK)
+			return 0;
+	}
+	dump_status("reset timed out", c);
+	return 1;
+}
+
+static void reset_controller(void)
+{
+	int	i;
+
+	outb_p(4, HD_CMD);
+	for (i = 0; i < 1000; i++) barrier();
+	outb_p(hd_info[0].ctl & 0x0f, HD_CMD);
+	for (i = 0; i < 1000; i++) barrier();
+	if (drive_busy())
+		printk("hd: controller still busy\n");
+	else if ((hd_error = inb(HD_ERROR)) != 1)
+		printk("hd: controller reset failed: %02x\n", hd_error);
+}
+
+static void reset_hd(void)
+{
+	static int i;
+
+repeat:
+	if (reset) {
+		reset = 0;
+		i = -1;
+		reset_controller();
+	} else {
+		check_status();
+		if (reset)
+			goto repeat;
+	}
+	if (++i < NR_HD) {
+		struct hd_i_struct *disk = &hd_info[i];
+		disk->special_op = disk->recalibrate = 1;
+		hd_out(disk, disk->sect, disk->sect, disk->head-1,
+			disk->cyl, ATA_CMD_INIT_DEV_PARAMS, &reset_hd);
+		if (reset)
+			goto repeat;
+	} else
+		hd_request();
+}
+
+/*
+ * Ok, don't know what to do with the unexpected interrupts: on some machines
+ * doing a reset and a retry seems to result in an eternal loop. Right now I
+ * ignore it, and just set the timeout.
+ *
+ * On laptops (and "green" PCs), an unexpected interrupt occurs whenever the
+ * drive enters "idle", "standby", or "sleep" mode, so if the status looks
+ * "good", we just ignore the interrupt completely.
+ */
+static void unexpected_hd_interrupt(void)
+{
+	unsigned int stat = inb_p(HD_STATUS);
+
+	if (stat & (BUSY_STAT|DRQ_STAT|ECC_STAT|ERR_STAT)) {
+		dump_status("unexpected interrupt", stat);
+		SET_TIMER;
+	}
+}
+
+/*
+ * bad_rw_intr() now tries to be a bit smarter and does things
+ * according to the error returned by the controller.
+ * -Mika Liljeberg (liljeber@cs.Helsinki.FI)
+ */
+static void bad_rw_intr(void)
+{
+	struct request *req = hd_req;
+
+	if (req != NULL) {
+		struct hd_i_struct *disk = req->rq_disk->private_data;
+		if (++req->errors >= MAX_ERRORS || (hd_error & BBD_ERR)) {
+			hd_end_request_cur(-EIO);
+			disk->special_op = disk->recalibrate = 1;
+		} else if (req->errors % RESET_FREQ == 0)
+			reset = 1;
+		else if ((hd_error & TRK0_ERR) || req->errors % RECAL_FREQ == 0)
+			disk->special_op = disk->recalibrate = 1;
+		/* Otherwise just retry */
+	}
+}
+
+static inline int wait_DRQ(void)
+{
+	int retries;
+	int stat;
+
+	for (retries = 0; retries < 100000; retries++) {
+		stat = inb_p(HD_STATUS);
+		if (stat & DRQ_STAT)
+			return 0;
+	}
+	dump_status("wait_DRQ", stat);
+	return -1;
+}
+
+static void read_intr(void)
+{
+	struct request *req;
+	int i, retries = 100000;
+
+	do {
+		i = (unsigned) inb_p(HD_STATUS);
+		if (i & BUSY_STAT)
+			continue;
+		if (!OK_STATUS(i))
+			break;
+		if (i & DRQ_STAT)
+			goto ok_to_read;
+	} while (--retries > 0);
+	dump_status("read_intr", i);
+	bad_rw_intr();
+	hd_request();
+	return;
+
+ok_to_read:
+	req = hd_req;
+	insw(HD_DATA, req->buffer, 256);
+#ifdef DEBUG
+	printk("%s: read: sector %ld, remaining = %u, buffer=%p\n",
+	       req->rq_disk->disk_name, blk_rq_pos(req) + 1,
+	       blk_rq_sectors(req) - 1, req->buffer+512);
+#endif
+	if (hd_end_request(0, 512)) {
+		SET_HANDLER(&read_intr);
+		return;
+	}
+
+	(void) inb_p(HD_STATUS);
+#if (HD_DELAY > 0)
+	last_req = read_timer();
+#endif
+	hd_request();
+}
+
+static void write_intr(void)
+{
+	struct request *req = hd_req;
+	int i;
+	int retries = 100000;
+
+	do {
+		i = (unsigned) inb_p(HD_STATUS);
+		if (i & BUSY_STAT)
+			continue;
+		if (!OK_STATUS(i))
+			break;
+		if ((blk_rq_sectors(req) <= 1) || (i & DRQ_STAT))
+			goto ok_to_write;
+	} while (--retries > 0);
+	dump_status("write_intr", i);
+	bad_rw_intr();
+	hd_request();
+	return;
+
+ok_to_write:
+	if (hd_end_request(0, 512)) {
+		SET_HANDLER(&write_intr);
+		outsw(HD_DATA, req->buffer, 256);
+		return;
+	}
+
+#if (HD_DELAY > 0)
+	last_req = read_timer();
+#endif
+	hd_request();
+}
+
+static void recal_intr(void)
+{
+	check_status();
+#if (HD_DELAY > 0)
+	last_req = read_timer();
+#endif
+	hd_request();
+}
+
+/*
+ * This is another of the error-routines I don't know what to do with. The
+ * best idea seems to just set reset, and start all over again.
+ */
+static void hd_times_out(unsigned long dummy)
+{
+	char *name;
+
+	do_hd = NULL;
+
+	if (!hd_req)
+		return;
+
+	spin_lock_irq(hd_queue->queue_lock);
+	reset = 1;
+	name = hd_req->rq_disk->disk_name;
+	printk("%s: timeout\n", name);
+	if (++hd_req->errors >= MAX_ERRORS) {
+#ifdef DEBUG
+		printk("%s: too many errors\n", name);
+#endif
+		hd_end_request_cur(-EIO);
+	}
+	hd_request();
+	spin_unlock_irq(hd_queue->queue_lock);
+}
+
+static int do_special_op(struct hd_i_struct *disk, struct request *req)
+{
+	if (disk->recalibrate) {
+		disk->recalibrate = 0;
+		hd_out(disk, disk->sect, 0, 0, 0, ATA_CMD_RESTORE, &recal_intr);
+		return reset;
+	}
+	if (disk->head > 16) {
+		printk("%s: cannot handle device with more than 16 heads - giving up\n", req->rq_disk->disk_name);
+		hd_end_request_cur(-EIO);
+	}
+	disk->special_op = 0;
+	return 1;
+}
+
+/*
+ * The driver enables interrupts as much as possible.  In order to do this,
+ * (a) the device-interrupt is disabled before entering hd_request(),
+ * and (b) the timeout-interrupt is disabled before the sti().
+ *
+ * Interrupts are still masked (by default) whenever we are exchanging
+ * data/cmds with a drive, because some drives seem to have very poor
+ * tolerance for latency during I/O. The IDE driver has support to unmask
+ * interrupts for non-broken hardware, so use that driver if required.
+ */
+static void hd_request(void)
+{
+	unsigned int block, nsect, sec, track, head, cyl;
+	struct hd_i_struct *disk;
+	struct request *req;
+
+	if (do_hd)
+		return;
+repeat:
+	del_timer(&device_timer);
+
+	if (!hd_req) {
+		hd_req = blk_fetch_request(hd_queue);
+		if (!hd_req) {
+			do_hd = NULL;
+			return;
+		}
+	}
+	req = hd_req;
+
+	if (reset) {
+		reset_hd();
+		return;
+	}
+	disk = req->rq_disk->private_data;
+	block = blk_rq_pos(req);
+	nsect = blk_rq_sectors(req);
+	if (block >= get_capacity(req->rq_disk) ||
+	    ((block+nsect) > get_capacity(req->rq_disk))) {
+		printk("%s: bad access: block=%d, count=%d\n",
+			req->rq_disk->disk_name, block, nsect);
+		hd_end_request_cur(-EIO);
+		goto repeat;
+	}
+
+	if (disk->special_op) {
+		if (do_special_op(disk, req))
+			goto repeat;
+		return;
+	}
+	sec   = block % disk->sect + 1;
+	track = block / disk->sect;
+	head  = track % disk->head;
+	cyl   = track / disk->head;
+#ifdef DEBUG
+	printk("%s: %sing: CHS=%d/%d/%d, sectors=%d, buffer=%p\n",
+		req->rq_disk->disk_name,
+		req_data_dir(req) == READ ? "read" : "writ",
+		cyl, head, sec, nsect, req->buffer);
+#endif
+	if (req->cmd_type == REQ_TYPE_FS) {
+		switch (rq_data_dir(req)) {
+		case READ:
+			hd_out(disk, nsect, sec, head, cyl, ATA_CMD_PIO_READ,
+				&read_intr);
+			if (reset)
+				goto repeat;
+			break;
+		case WRITE:
+			hd_out(disk, nsect, sec, head, cyl, ATA_CMD_PIO_WRITE,
+				&write_intr);
+			if (reset)
+				goto repeat;
+			if (wait_DRQ()) {
+				bad_rw_intr();
+				goto repeat;
+			}
+			outsw(HD_DATA, req->buffer, 256);
+			break;
+		default:
+			printk("unknown hd-command\n");
+			hd_end_request_cur(-EIO);
+			break;
+		}
+	}
+}
+
+static void do_hd_request(struct request_queue *q)
+{
+	hd_request();
+}
+
+static int hd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
+{
+	struct hd_i_struct *disk = bdev->bd_disk->private_data;
+
+	geo->heads = disk->head;
+	geo->sectors = disk->sect;
+	geo->cylinders = disk->cyl;
+	return 0;
+}
+
+/*
+ * Releasing a block device means we sync() it, so that it can safely
+ * be forgotten about...
+ */
+
+static irqreturn_t hd_interrupt(int irq, void *dev_id)
+{
+	void (*handler)(void) = do_hd;
+
+	spin_lock(hd_queue->queue_lock);
+
+	do_hd = NULL;
+	del_timer(&device_timer);
+	if (!handler)
+		handler = unexpected_hd_interrupt;
+	handler();
+
+	spin_unlock(hd_queue->queue_lock);
+
+	return IRQ_HANDLED;
+}
+
+static const struct block_device_operations hd_fops = {
+	.getgeo =	hd_getgeo,
+};
+
+/*
+ * This is the hard disk IRQ description. The IRQF_DISABLED in sa_flags
+ * means we run the IRQ-handler with interrupts disabled:  this is bad for
+ * interrupt latency, but anything else has led to problems on some
+ * machines.
+ *
+ * We enable interrupts in some of the routines after making sure it's
+ * safe.
+ */
+
+static int __init hd_init(void)
+{
+	int drive;
+
+	if (register_blkdev(HD_MAJOR, "hd"))
+		return -1;
+
+	hd_queue = blk_init_queue(do_hd_request, &hd_lock);
+	if (!hd_queue) {
+		unregister_blkdev(HD_MAJOR, "hd");
+		return -ENOMEM;
+	}
+
+	blk_queue_max_hw_sectors(hd_queue, 255);
+	init_timer(&device_timer);
+	device_timer.function = hd_times_out;
+	blk_queue_logical_block_size(hd_queue, 512);
+
+	if (!NR_HD) {
+		/*
+		 * We don't know anything about the drive.  This means
+		 * that you *MUST* specify the drive parameters to the
+		 * kernel yourself.
+		 *
+		 * If we were on an i386, we used to read this info from
+		 * the BIOS or CMOS.  This doesn't work all that well,
+		 * since this assumes that this is a primary or secondary
+		 * drive, and if we're using this legacy driver, it's
+		 * probably an auxiliary controller added to recover
+		 * legacy data off an ST-506 drive.  Either way, it's
+		 * definitely safest to have the user explicitly specify
+		 * the information.
+		 */
+		printk("hd: no drives specified - use hd=cyl,head,sectors"
+			" on kernel command line\n");
+		goto out;
+	}
+
+	for (drive = 0 ; drive < NR_HD ; drive++) {
+		struct gendisk *disk = alloc_disk(64);
+		struct hd_i_struct *p = &hd_info[drive];
+		if (!disk)
+			goto Enomem;
+		disk->major = HD_MAJOR;
+		disk->first_minor = drive << 6;
+		disk->fops = &hd_fops;
+		sprintf(disk->disk_name, "hd%c", 'a'+drive);
+		disk->private_data = p;
+		set_capacity(disk, p->head * p->sect * p->cyl);
+		disk->queue = hd_queue;
+		p->unit = drive;
+		hd_gendisk[drive] = disk;
+		printk("%s: %luMB, CHS=%d/%d/%d\n",
+			disk->disk_name, (unsigned long)get_capacity(disk)/2048,
+			p->cyl, p->head, p->sect);
+	}
+
+	if (request_irq(HD_IRQ, hd_interrupt, IRQF_DISABLED, "hd", NULL)) {
+		printk("hd: unable to get IRQ%d for the hard disk driver\n",
+			HD_IRQ);
+		goto out1;
+	}
+	if (!request_region(HD_DATA, 8, "hd")) {
+		printk(KERN_WARNING "hd: port 0x%x busy\n", HD_DATA);
+		goto out2;
+	}
+	if (!request_region(HD_CMD, 1, "hd(cmd)")) {
+		printk(KERN_WARNING "hd: port 0x%x busy\n", HD_CMD);
+		goto out3;
+	}
+
+	/* Let them fly */
+	for (drive = 0; drive < NR_HD; drive++)
+		add_disk(hd_gendisk[drive]);
+
+	return 0;
+
+out3:
+	release_region(HD_DATA, 8);
+out2:
+	free_irq(HD_IRQ, NULL);
+out1:
+	for (drive = 0; drive < NR_HD; drive++)
+		put_disk(hd_gendisk[drive]);
+	NR_HD = 0;
+out:
+	del_timer(&device_timer);
+	unregister_blkdev(HD_MAJOR, "hd");
+	blk_cleanup_queue(hd_queue);
+	return -1;
+Enomem:
+	while (drive--)
+		put_disk(hd_gendisk[drive]);
+	goto out;
+}
+
+static int __init parse_hd_setup(char *line)
+{
+	int ints[6];
+
+	(void) get_options(line, ARRAY_SIZE(ints), ints);
+	hd_setup(NULL, ints);
+
+	return 1;
+}
+__setup("hd=", parse_hd_setup);
+
+late_initcall(hd_init);
diff --git a/drivers/block/ida_cmd.h b/drivers/block/ida_cmd.h
new file mode 100644
index 00000000..98b5746b
--- /dev/null
+++ b/drivers/block/ida_cmd.h
@@ -0,0 +1,349 @@
+/*
+ *    Disk Array driver for Compaq SMART2 Controllers
+ *    Copyright 1998 Compaq Computer Corporation
+ *
+ *    This program is free software; you can redistribute it and/or modify
+ *    it under the terms of the GNU General Public License as published by
+ *    the Free Software Foundation; either version 2 of the License, or
+ *    (at your option) any later version.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *    NON INFRINGEMENT.  See the GNU General Public License for more details.
+ *
+ *    You should have received a copy of the GNU General Public License
+ *    along with this program; if not, write to the Free Software
+ *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ *    Questions/Comments/Bugfixes to iss_storagedev@hp.com
+ *
+ */
+#ifndef ARRAYCMD_H
+#define ARRAYCMD_H
+
+#include <asm/types.h>
+#if 0
+#include <linux/blkdev.h>
+#endif
+
+/* for the Smart Array 42XX cards */
+#define S42XX_REQUEST_PORT_OFFSET	0x40
+#define S42XX_REPLY_INTR_MASK_OFFSET	0x34
+#define S42XX_REPLY_PORT_OFFSET		0x44
+#define S42XX_INTR_STATUS		0x30
+
+#define S42XX_INTR_OFF		0x08
+#define S42XX_INTR_PENDING	0x08
+
+#define COMMAND_FIFO		0x04
+#define COMMAND_COMPLETE_FIFO	0x08
+#define INTR_MASK		0x0C
+#define INTR_STATUS		0x10
+#define INTR_PENDING		0x14
+
+#define FIFO_NOT_EMPTY		0x01
+#define FIFO_NOT_FULL		0x02
+
+#define BIG_PROBLEM		0x40
+#define LOG_NOT_CONF		2
+
+#pragma pack(1)
+typedef struct {
+	__u32	size;
+	__u32	addr;
+} sg_t;
+
+#define RCODE_NONFATAL	0x02
+#define RCODE_FATAL	0x04
+#define RCODE_INVREQ	0x10
+typedef struct {
+	__u16	next;
+	__u8	cmd;
+	__u8	rcode;
+	__u32	blk;
+	__u16	blk_cnt;
+	__u8	sg_cnt;
+	__u8	reserved;
+} rhdr_t;
+
+#define SG_MAX			32
+typedef struct {
+	rhdr_t	hdr;
+	sg_t	sg[SG_MAX];
+	__u32	bp;
+} rblk_t;
+
+typedef struct {
+	__u8	unit;
+	__u8	prio;
+	__u16	size;
+} chdr_t;
+
+#define CMD_RWREQ	0x00
+#define CMD_IOCTL_PEND	0x01
+#define CMD_IOCTL_DONE	0x02
+
+typedef struct cmdlist {
+	chdr_t	hdr;
+	rblk_t	req;
+	__u32	size;
+	int	retry_cnt;
+	__u32	busaddr;
+	int	ctlr;
+	struct cmdlist *prev;
+	struct cmdlist *next;
+	struct request *rq;
+	int type;
+} cmdlist_t;
+	
+#define ID_CTLR		0x11
+typedef struct {
+	__u8	nr_drvs;
+	__u32	cfg_sig;
+	__u8	firm_rev[4];
+	__u8	rom_rev[4];
+	__u8	hw_rev;
+	__u32	bb_rev;
+	__u32	drv_present_map;
+	__u32	ext_drv_map;
+	__u32	board_id;
+	__u8	cfg_error;
+	__u32	non_disk_bits;
+	__u8	bad_ram_addr;
+	__u8	cpu_rev;
+	__u8	pdpi_rev;
+	__u8	epic_rev;
+	__u8	wcxc_rev;
+	__u8	marketing_rev;
+	__u8	ctlr_flags;
+	__u8	host_flags;
+	__u8	expand_dis;
+	__u8	scsi_chips;
+	__u32	max_req_blocks;
+	__u32	ctlr_clock;
+	__u8	drvs_per_bus;
+	__u16	big_drv_present_map[8];
+	__u16	big_ext_drv_map[8];
+	__u16	big_non_disk_map[8];
+	__u16	task_flags;
+	__u8	icl_bus;
+	__u8	red_modes;
+	__u8	cur_red_mode;
+	__u8	red_ctlr_stat;
+	__u8	red_fail_reason;
+	__u8	reserved[403];
+} id_ctlr_t;
+
+typedef struct {
+	__u16	cyl;
+	__u8	heads;
+	__u8	xsig;
+	__u8	psectors;
+	__u16	wpre;
+	__u8	maxecc;
+	__u8	drv_ctrl;
+	__u16	pcyls;
+	__u8	pheads;
+	__u16	landz;
+	__u8	sect_per_track;
+	__u8	cksum;
+} drv_param_t;
+
+#define ID_LOG_DRV	0x10
+typedef struct {
+	__u16	blk_size;
+	__u32	nr_blks;
+	drv_param_t drv;
+	__u8	fault_tol;
+	__u8	reserved;
+	__u8	bios_disable;
+} id_log_drv_t;
+
+#define ID_LOG_DRV_EXT	0x18
+typedef struct {
+	__u32	log_drv_id;
+	__u8	log_drv_label[64];
+	__u8	reserved[418];
+} id_log_drv_ext_t;
+
+#define SENSE_LOG_DRV_STAT	0x12
+typedef struct {
+	__u8	status;
+	__u32	fail_map;
+	__u16	read_err[32];
+	__u16	write_err[32];
+	__u8	drv_err_data[256];
+	__u8	drq_timeout[32];
+	__u32	blks_to_recover;
+	__u8	drv_recovering;
+	__u16	remap_cnt[32];
+	__u32	replace_drv_map;
+	__u32	act_spare_map;
+	__u8	spare_stat;
+	__u8	spare_repl_map[32];
+	__u32	repl_ok_map;
+	__u8	media_exch;
+	__u8	cache_fail;
+	__u8	expn_fail;
+	__u8	unit_flags;
+	__u16	big_fail_map[8];
+	__u16	big_remap_map[128];
+	__u16	big_repl_map[8];
+	__u16	big_act_spare_map[8];
+	__u8	big_spar_repl_map[128];
+	__u16	big_repl_ok_map[8];
+	__u8	big_drv_rebuild;
+	__u8	reserved[36];
+} sense_log_drv_stat_t;
+
+#define START_RECOVER		0x13
+
+#define ID_PHYS_DRV		0x15
+typedef struct {
+	__u8	scsi_bus;
+	__u8	scsi_id;
+	__u16	blk_size;
+	__u32	nr_blks;
+	__u32	rsvd_blks;
+	__u8	drv_model[40];
+	__u8	drv_sn[40];
+	__u8	drv_fw[8];
+	__u8	scsi_iq_bits;
+	__u8	compaq_drv_stmp;
+	__u8	last_fail;
+	__u8	phys_drv_flags;
+	__u8	phys_drv_flags1;
+	__u8	scsi_lun;
+	__u8	phys_drv_flags2;
+	__u8	reserved;
+	__u32	spi_speed_rules;
+	__u8	phys_connector[2];
+	__u8	phys_box_on_bus;
+	__u8	phys_bay_in_box;
+} id_phys_drv_t;
+
+#define BLINK_DRV_LEDS		0x16
+typedef struct {
+	__u32	blink_duration;
+	__u32	reserved;
+	__u8	blink[256];
+	__u8	reserved1[248];
+} blink_drv_leds_t;
+
+#define SENSE_BLINK_LEDS	0x17
+typedef struct {
+	__u32	blink_duration;
+	__u32	btime_elap;
+	__u8	blink[256];
+	__u8	reserved1[248];
+} sense_blink_leds_t;
+
+#define IDA_READ		0x20
+#define IDA_WRITE		0x30
+#define IDA_WRITE_MEDIA		0x31
+#define RESET_TO_DIAG		0x40
+#define DIAG_PASS_THRU		0x41
+
+#define SENSE_CONFIG		0x50
+#define SET_CONFIG		0x51
+typedef struct {
+	__u32	cfg_sig;
+	__u16	compat_port;
+	__u8	data_dist_mode;
+	__u8	surf_an_ctrl;
+	__u16	ctlr_phys_drv;
+	__u16	log_unit_phys_drv;
+	__u16	fault_tol_mode;
+	__u8	phys_drv_param[16];
+	drv_param_t drv;
+	__u32	drv_asgn_map;
+	__u16	dist_factor;
+	__u32	spare_asgn_map;
+	__u8	reserved[6];
+	__u16	os;
+	__u8	ctlr_order;
+	__u8	extra_info;
+	__u32	data_offs;
+	__u8	parity_backedout_write_drvs;
+	__u8	parity_dist_mode;
+	__u8	parity_shift_fact;
+	__u8	bios_disable_flag;
+	__u32	blks_on_vol;
+	__u32	blks_per_drv;
+	__u8	scratch[16];
+	__u16	big_drv_map[8];
+	__u16	big_spare_map[8];
+	__u8	ss_source_vol;
+	__u8	mix_drv_cap_range;
+	struct {
+		__u16	big_drv_map[8];
+		__u32	blks_per_drv;
+		__u16	fault_tol_mode;
+		__u16	dist_factor;
+	} MDC_range[4];
+	__u8	reserved1[248];
+} config_t;
+
+#define BYPASS_VOL_STATE	0x52
+#define SS_CREATE_VOL		0x53
+#define CHANGE_CONFIG		0x54
+#define SENSE_ORIG_CONF		0x55
+#define REORDER_LOG_DRV		0x56
+typedef struct {
+	__u8	old_units[32];
+} reorder_log_drv_t;
+
+#define LABEL_LOG_DRV		0x57
+typedef struct {
+	__u8	log_drv_label[64];
+} label_log_drv_t;
+
+#define SS_TO_VOL		0x58
+	
+#define SET_SURF_DELAY		0x60
+typedef struct {
+	__u16	delay;
+	__u8	reserved[510];
+} surf_delay_t;
+
+#define SET_OVERHEAT_DELAY	0x61
+typedef struct {
+	__u16	delay;
+} overhead_delay_t;
+ 
+#define SET_MP_DELAY
+typedef struct {
+	__u16	delay;
+	__u8	reserved[510];
+} mp_delay_t;
+
+#define PASSTHRU_A	0x91
+typedef struct {
+	__u8	target;
+	__u8	bus;
+	__u8	lun;
+	__u32	timeout;
+	__u32	flags;
+	__u8	status;
+	__u8	error;
+	__u8	cdb_len;
+	__u8	sense_error;
+	__u8	sense_key;
+	__u32	sense_info;
+	__u8	sense_code;
+	__u8	sense_qual;
+	__u32	residual;
+	__u8	reserved[4];
+	__u8	cdb[12];	
+} scsi_param_t;
+
+#define RESUME_BACKGROUND_ACTIVITY	0x99
+#define SENSE_CONTROLLER_PERFORMANCE	0xa8
+#define FLUSH_CACHE			0xc2
+#define COLLECT_BUFFER			0xd2
+#define READ_FLASH_ROM			0xf6
+#define WRITE_FLASH_ROM			0xf7
+#pragma pack()	
+
+#endif /* ARRAYCMD_H */
diff --git a/drivers/block/ida_ioctl.h b/drivers/block/ida_ioctl.h
new file mode 100644
index 00000000..888fff9c
--- /dev/null
+++ b/drivers/block/ida_ioctl.h
@@ -0,0 +1,87 @@
+/*
+ *    Disk Array driver for Compaq SMART2 Controllers
+ *    Copyright 1998 Compaq Computer Corporation
+ *
+ *    This program is free software; you can redistribute it and/or modify
+ *    it under the terms of the GNU General Public License as published by
+ *    the Free Software Foundation; either version 2 of the License, or
+ *    (at your option) any later version.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *    NON INFRINGEMENT.  See the GNU General Public License for more details.
+ *
+ *    You should have received a copy of the GNU General Public License
+ *    along with this program; if not, write to the Free Software
+ *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ *    Questions/Comments/Bugfixes to iss_storagedev@hp.com
+ *
+ */
+#ifndef IDA_IOCTL_H
+#define IDA_IOCTL_H
+
+#include "ida_cmd.h"
+#include "cpqarray.h"
+
+#define IDAGETDRVINFO		0x27272828
+#define IDAPASSTHRU		0x28282929
+#define IDAGETCTLRSIG		0x29293030
+#define IDAREVALIDATEVOLS	0x30303131
+#define IDADRIVERVERSION	0x31313232
+#define IDAGETPCIINFO		0x32323333
+
+typedef struct _ida_pci_info_struct
+{
+	unsigned char 	bus;
+	unsigned char 	dev_fn;
+	__u32 		board_id;
+} ida_pci_info_struct;
+/*
+ * Normally, the ioctl determines the logical unit for this command by
+ * the major,minor number of the fd passed to ioctl.  If you need to send
+ * a command to a different/nonexistant unit (such as during config), you
+ * can override the normal behavior by setting the unit valid bit. (Normally,
+ * it should be zero) The controller the command is sent to is still
+ * determined by the major number of the open device.
+ */
+
+#define UNITVALID	0x80
+typedef struct {
+	__u8	cmd;
+	__u8	rcode;
+	__u8	unit;
+	__u32	blk;
+	__u16	blk_cnt;
+
+/* currently, sg_cnt is assumed to be 1: only the 0th element of sg is used */
+	struct {
+		void	__user *addr;
+		size_t	size;
+	} sg[SG_MAX];
+	int	sg_cnt;
+
+	union ctlr_cmds {
+		drv_info_t		drv;
+		unsigned char		buf[1024];
+
+		id_ctlr_t		id_ctlr;
+		drv_param_t		drv_param;
+		id_log_drv_t		id_log_drv;
+		id_log_drv_ext_t	id_log_drv_ext;
+		sense_log_drv_stat_t	sense_log_drv_stat;
+		id_phys_drv_t		id_phys_drv;
+		blink_drv_leds_t	blink_drv_leds;
+		sense_blink_leds_t	sense_blink_leds;
+		config_t		config;
+		reorder_log_drv_t	reorder_log_drv;
+		label_log_drv_t		label_log_drv;
+		surf_delay_t		surf_delay;
+		overhead_delay_t	overhead_delay;
+		mp_delay_t		mp_delay;
+		scsi_param_t		scsi_param;
+	} c;
+} ida_ioctl_t;
+
+#endif /* IDA_IOCTL_H */
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
new file mode 100644
index 00000000..2ebacf0e
--- /dev/null
+++ b/drivers/block/loop.c
@@ -0,0 +1,1777 @@
+/*
+ *  linux/drivers/block/loop.c
+ *
+ *  Written by Theodore Ts'o, 3/29/93
+ *
+ * Copyright 1993 by Theodore Ts'o.  Redistribution of this file is
+ * permitted under the GNU General Public License.
+ *
+ * DES encryption plus some minor changes by Werner Almesberger, 30-MAY-1993
+ * more DES encryption plus IDEA encryption by Nicholas J. Leon, June 20, 1996
+ *
+ * Modularized and updated for 1.1.16 kernel - Mitch Dsouza 28th May 1994
+ * Adapted for 1.3.59 kernel - Andries Brouwer, 1 Feb 1996
+ *
+ * Fixed do_loop_request() re-entrancy - Vincent.Renardias@waw.com Mar 20, 1997
+ *
+ * Added devfs support - Richard Gooch <rgooch@atnf.csiro.au> 16-Jan-1998
+ *
+ * Handle sparse backing files correctly - Kenn Humborg, Jun 28, 1998
+ *
+ * Loadable modules and other fixes by AK, 1998
+ *
+ * Make real block number available to downstream transfer functions, enables
+ * CBC (and relatives) mode encryption requiring unique IVs per data block.
+ * Reed H. Petty, rhp@draper.net
+ *
+ * Maximum number of loop devices now dynamic via max_loop module parameter.
+ * Russell Kroll <rkroll@exploits.org> 19990701
+ *
+ * Maximum number of loop devices when compiled-in now selectable by passing
+ * max_loop=<1-255> to the kernel on boot.
+ * Erik I. Bolsø, <eriki@himolde.no>, Oct 31, 1999
+ *
+ * Completely rewrite request handling to be make_request_fn style and
+ * non blocking, pushing work to a helper thread. Lots of fixes from
+ * Al Viro too.
+ * Jens Axboe <axboe@suse.de>, Nov 2000
+ *
+ * Support up to 256 loop devices
+ * Heinz Mauelshagen <mge@sistina.com>, Feb 2002
+ *
+ * Support for falling back on the write file operation when the address space
+ * operations write_begin is not available on the backing filesystem.
+ * Anton Altaparmakov, 16 Feb 2005
+ *
+ * Still To Fix:
+ * - Advisory locking is ignored here.
+ * - Should use an own CAP_* category instead of CAP_SYS_ADMIN
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/sched.h>
+#include <linux/fs.h>
+#include <linux/file.h>
+#include <linux/stat.h>
+#include <linux/errno.h>
+#include <linux/major.h>
+#include <linux/wait.h>
+#include <linux/blkdev.h>
+#include <linux/blkpg.h>
+#include <linux/init.h>
+#include <linux/swap.h>
+#include <linux/slab.h>
+#include <linux/loop.h>
+#include <linux/compat.h>
+#include <linux/suspend.h>
+#include <linux/freezer.h>
+#include <linux/mutex.h>
+#include <linux/writeback.h>
+#include <linux/buffer_head.h>		/* for invalidate_bdev() */
+#include <linux/completion.h>
+#include <linux/highmem.h>
+#include <linux/kthread.h>
+#include <linux/splice.h>
+#include <linux/sysfs.h>
+
+#include <asm/uaccess.h>
+
+static LIST_HEAD(loop_devices);
+static DEFINE_MUTEX(loop_devices_mutex);
+
+static int max_part;
+static int part_shift;
+
+/*
+ * Transfer functions
+ */
+static int transfer_none(struct loop_device *lo, int cmd,
+			 struct page *raw_page, unsigned raw_off,
+			 struct page *loop_page, unsigned loop_off,
+			 int size, sector_t real_block)
+{
+	char *raw_buf = kmap_atomic(raw_page, KM_USER0) + raw_off;
+	char *loop_buf = kmap_atomic(loop_page, KM_USER1) + loop_off;
+
+	if (cmd == READ)
+		memcpy(loop_buf, raw_buf, size);
+	else
+		memcpy(raw_buf, loop_buf, size);
+
+	kunmap_atomic(loop_buf, KM_USER1);
+	kunmap_atomic(raw_buf, KM_USER0);
+	cond_resched();
+	return 0;
+}
+
+static int transfer_xor(struct loop_device *lo, int cmd,
+			struct page *raw_page, unsigned raw_off,
+			struct page *loop_page, unsigned loop_off,
+			int size, sector_t real_block)
+{
+	char *raw_buf = kmap_atomic(raw_page, KM_USER0) + raw_off;
+	char *loop_buf = kmap_atomic(loop_page, KM_USER1) + loop_off;
+	char *in, *out, *key;
+	int i, keysize;
+
+	if (cmd == READ) {
+		in = raw_buf;
+		out = loop_buf;
+	} else {
+		in = loop_buf;
+		out = raw_buf;
+	}
+
+	key = lo->lo_encrypt_key;
+	keysize = lo->lo_encrypt_key_size;
+	for (i = 0; i < size; i++)
+		*out++ = *in++ ^ key[(i & 511) % keysize];
+
+	kunmap_atomic(loop_buf, KM_USER1);
+	kunmap_atomic(raw_buf, KM_USER0);
+	cond_resched();
+	return 0;
+}
+
+static int xor_init(struct loop_device *lo, const struct loop_info64 *info)
+{
+	if (unlikely(info->lo_encrypt_key_size <= 0))
+		return -EINVAL;
+	return 0;
+}
+
+static struct loop_func_table none_funcs = {
+	.number = LO_CRYPT_NONE,
+	.transfer = transfer_none,
+}; 	
+
+static struct loop_func_table xor_funcs = {
+	.number = LO_CRYPT_XOR,
+	.transfer = transfer_xor,
+	.init = xor_init
+}; 	
+
+/* xfer_funcs[0] is special - its release function is never called */
+static struct loop_func_table *xfer_funcs[MAX_LO_CRYPT] = {
+	&none_funcs,
+	&xor_funcs
+};
+
+static loff_t get_loop_size(struct loop_device *lo, struct file *file)
+{
+	loff_t size, offset, loopsize;
+
+	/* Compute loopsize in bytes */
+	size = i_size_read(file->f_mapping->host);
+	offset = lo->lo_offset;
+	loopsize = size - offset;
+	if (lo->lo_sizelimit > 0 && lo->lo_sizelimit < loopsize)
+		loopsize = lo->lo_sizelimit;
+
+	/*
+	 * Unfortunately, if we want to do I/O on the device,
+	 * the number of 512-byte sectors has to fit into a sector_t.
+	 */
+	return loopsize >> 9;
+}
+
+static int
+figure_loop_size(struct loop_device *lo)
+{
+	loff_t size = get_loop_size(lo, lo->lo_backing_file);
+	sector_t x = (sector_t)size;
+
+	if (unlikely((loff_t)x != size))
+		return -EFBIG;
+
+	set_capacity(lo->lo_disk, x);
+	return 0;					
+}
+
+static inline int
+lo_do_transfer(struct loop_device *lo, int cmd,
+	       struct page *rpage, unsigned roffs,
+	       struct page *lpage, unsigned loffs,
+	       int size, sector_t rblock)
+{
+	if (unlikely(!lo->transfer))
+		return 0;
+
+	return lo->transfer(lo, cmd, rpage, roffs, lpage, loffs, size, rblock);
+}
+
+/**
+ * do_lo_send_aops - helper for writing data to a loop device
+ *
+ * This is the fast version for backing filesystems which implement the address
+ * space operations write_begin and write_end.
+ */
+static int do_lo_send_aops(struct loop_device *lo, struct bio_vec *bvec,
+		loff_t pos, struct page *unused)
+{
+	struct file *file = lo->lo_backing_file; /* kudos to NFsckingS */
+	struct address_space *mapping = file->f_mapping;
+	pgoff_t index;
+	unsigned offset, bv_offs;
+	int len, ret;
+
+	mutex_lock(&mapping->host->i_mutex);
+	index = pos >> PAGE_CACHE_SHIFT;
+	offset = pos & ((pgoff_t)PAGE_CACHE_SIZE - 1);
+	bv_offs = bvec->bv_offset;
+	len = bvec->bv_len;
+	while (len > 0) {
+		sector_t IV;
+		unsigned size, copied;
+		int transfer_result;
+		struct page *page;
+		void *fsdata;
+
+		IV = ((sector_t)index << (PAGE_CACHE_SHIFT - 9))+(offset >> 9);
+		size = PAGE_CACHE_SIZE - offset;
+		if (size > len)
+			size = len;
+
+		ret = pagecache_write_begin(file, mapping, pos, size, 0,
+							&page, &fsdata);
+		if (ret)
+			goto fail;
+
+		file_update_time(file);
+
+		transfer_result = lo_do_transfer(lo, WRITE, page, offset,
+				bvec->bv_page, bv_offs, size, IV);
+		copied = size;
+		if (unlikely(transfer_result))
+			copied = 0;
+
+		ret = pagecache_write_end(file, mapping, pos, size, copied,
+							page, fsdata);
+		if (ret < 0 || ret != copied)
+			goto fail;
+
+		if (unlikely(transfer_result))
+			goto fail;
+
+		bv_offs += copied;
+		len -= copied;
+		offset = 0;
+		index++;
+		pos += copied;
+	}
+	ret = 0;
+out:
+	mutex_unlock(&mapping->host->i_mutex);
+	return ret;
+fail:
+	ret = -1;
+	goto out;
+}
+
+/**
+ * __do_lo_send_write - helper for writing data to a loop device
+ *
+ * This helper just factors out common code between do_lo_send_direct_write()
+ * and do_lo_send_write().
+ */
+static int __do_lo_send_write(struct file *file,
+		u8 *buf, const int len, loff_t pos)
+{
+	ssize_t bw;
+	mm_segment_t old_fs = get_fs();
+
+	set_fs(get_ds());
+	bw = file->f_op->write(file, buf, len, &pos);
+	set_fs(old_fs);
+	if (likely(bw == len))
+		return 0;
+	printk(KERN_ERR "loop: Write error at byte offset %llu, length %i.\n",
+			(unsigned long long)pos, len);
+	if (bw >= 0)
+		bw = -EIO;
+	return bw;
+}
+
+/**
+ * do_lo_send_direct_write - helper for writing data to a loop device
+ *
+ * This is the fast, non-transforming version for backing filesystems which do
+ * not implement the address space operations write_begin and write_end.
+ * It uses the write file operation which should be present on all writeable
+ * filesystems.
+ */
+static int do_lo_send_direct_write(struct loop_device *lo,
+		struct bio_vec *bvec, loff_t pos, struct page *page)
+{
+	ssize_t bw = __do_lo_send_write(lo->lo_backing_file,
+			kmap(bvec->bv_page) + bvec->bv_offset,
+			bvec->bv_len, pos);
+	kunmap(bvec->bv_page);
+	cond_resched();
+	return bw;
+}
+
+/**
+ * do_lo_send_write - helper for writing data to a loop device
+ *
+ * This is the slow, transforming version for filesystems which do not
+ * implement the address space operations write_begin and write_end.  It
+ * uses the write file operation which should be present on all writeable
+ * filesystems.
+ *
+ * Using fops->write is slower than using aops->{prepare,commit}_write in the
+ * transforming case because we need to double buffer the data as we cannot do
+ * the transformations in place as we do not have direct access to the
+ * destination pages of the backing file.
+ */
+static int do_lo_send_write(struct loop_device *lo, struct bio_vec *bvec,
+		loff_t pos, struct page *page)
+{
+	int ret = lo_do_transfer(lo, WRITE, page, 0, bvec->bv_page,
+			bvec->bv_offset, bvec->bv_len, pos >> 9);
+	if (likely(!ret))
+		return __do_lo_send_write(lo->lo_backing_file,
+				page_address(page), bvec->bv_len,
+				pos);
+	printk(KERN_ERR "loop: Transfer error at byte offset %llu, "
+			"length %i.\n", (unsigned long long)pos, bvec->bv_len);
+	if (ret > 0)
+		ret = -EIO;
+	return ret;
+}
+
+static int lo_send(struct loop_device *lo, struct bio *bio, loff_t pos)
+{
+	int (*do_lo_send)(struct loop_device *, struct bio_vec *, loff_t,
+			struct page *page);
+	struct bio_vec *bvec;
+	struct page *page = NULL;
+	int i, ret = 0;
+
+	do_lo_send = do_lo_send_aops;
+	if (!(lo->lo_flags & LO_FLAGS_USE_AOPS)) {
+		do_lo_send = do_lo_send_direct_write;
+		if (lo->transfer != transfer_none) {
+			page = alloc_page(GFP_NOIO | __GFP_HIGHMEM);
+			if (unlikely(!page))
+				goto fail;
+			kmap(page);
+			do_lo_send = do_lo_send_write;
+		}
+	}
+	bio_for_each_segment(bvec, bio, i) {
+		ret = do_lo_send(lo, bvec, pos, page);
+		if (ret < 0)
+			break;
+		pos += bvec->bv_len;
+	}
+	if (page) {
+		kunmap(page);
+		__free_page(page);
+	}
+out:
+	return ret;
+fail:
+	printk(KERN_ERR "loop: Failed to allocate temporary page for write.\n");
+	ret = -ENOMEM;
+	goto out;
+}
+
+struct lo_read_data {
+	struct loop_device *lo;
+	struct page *page;
+	unsigned offset;
+	int bsize;
+};
+
+static int
+lo_splice_actor(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
+		struct splice_desc *sd)
+{
+	struct lo_read_data *p = sd->u.data;
+	struct loop_device *lo = p->lo;
+	struct page *page = buf->page;
+	sector_t IV;
+	int size;
+
+	IV = ((sector_t) page->index << (PAGE_CACHE_SHIFT - 9)) +
+							(buf->offset >> 9);
+	size = sd->len;
+	if (size > p->bsize)
+		size = p->bsize;
+
+	if (lo_do_transfer(lo, READ, page, buf->offset, p->page, p->offset, size, IV)) {
+		printk(KERN_ERR "loop: transfer error block %ld\n",
+		       page->index);
+		size = -EINVAL;
+	}
+
+	flush_dcache_page(p->page);
+
+	if (size > 0)
+		p->offset += size;
+
+	return size;
+}
+
+static int
+lo_direct_splice_actor(struct pipe_inode_info *pipe, struct splice_desc *sd)
+{
+	return __splice_from_pipe(pipe, sd, lo_splice_actor);
+}
+
+static int
+do_lo_receive(struct loop_device *lo,
+	      struct bio_vec *bvec, int bsize, loff_t pos)
+{
+	struct lo_read_data cookie;
+	struct splice_desc sd;
+	struct file *file;
+	long retval;
+
+	cookie.lo = lo;
+	cookie.page = bvec->bv_page;
+	cookie.offset = bvec->bv_offset;
+	cookie.bsize = bsize;
+
+	sd.len = 0;
+	sd.total_len = bvec->bv_len;
+	sd.flags = 0;
+	sd.pos = pos;
+	sd.u.data = &cookie;
+
+	file = lo->lo_backing_file;
+	retval = splice_direct_to_actor(file, &sd, lo_direct_splice_actor);
+
+	if (retval < 0)
+		return retval;
+
+	return 0;
+}
+
+static int
+lo_receive(struct loop_device *lo, struct bio *bio, int bsize, loff_t pos)
+{
+	struct bio_vec *bvec;
+	int i, ret = 0;
+
+	bio_for_each_segment(bvec, bio, i) {
+		ret = do_lo_receive(lo, bvec, bsize, pos);
+		if (ret < 0)
+			break;
+		pos += bvec->bv_len;
+	}
+	return ret;
+}
+
+static int do_bio_filebacked(struct loop_device *lo, struct bio *bio)
+{
+	loff_t pos;
+	int ret;
+
+	pos = ((loff_t) bio->bi_sector << 9) + lo->lo_offset;
+
+	if (bio_rw(bio) == WRITE) {
+		struct file *file = lo->lo_backing_file;
+
+		if (bio->bi_rw & REQ_FLUSH) {
+			ret = vfs_fsync(file, 0);
+			if (unlikely(ret && ret != -EINVAL)) {
+				ret = -EIO;
+				goto out;
+			}
+		}
+
+		ret = lo_send(lo, bio, pos);
+
+		if ((bio->bi_rw & REQ_FUA) && !ret) {
+			ret = vfs_fsync(file, 0);
+			if (unlikely(ret && ret != -EINVAL))
+				ret = -EIO;
+		}
+	} else
+		ret = lo_receive(lo, bio, lo->lo_blocksize, pos);
+
+out:
+	return ret;
+}
+
+/*
+ * Add bio to back of pending list
+ */
+static void loop_add_bio(struct loop_device *lo, struct bio *bio)
+{
+	bio_list_add(&lo->lo_bio_list, bio);
+}
+
+/*
+ * Grab first pending buffer
+ */
+static struct bio *loop_get_bio(struct loop_device *lo)
+{
+	return bio_list_pop(&lo->lo_bio_list);
+}
+
+static int loop_make_request(struct request_queue *q, struct bio *old_bio)
+{
+	struct loop_device *lo = q->queuedata;
+	int rw = bio_rw(old_bio);
+
+	if (rw == READA)
+		rw = READ;
+
+	BUG_ON(!lo || (rw != READ && rw != WRITE));
+
+	spin_lock_irq(&lo->lo_lock);
+	if (lo->lo_state != Lo_bound)
+		goto out;
+	if (unlikely(rw == WRITE && (lo->lo_flags & LO_FLAGS_READ_ONLY)))
+		goto out;
+	loop_add_bio(lo, old_bio);
+	wake_up(&lo->lo_event);
+	spin_unlock_irq(&lo->lo_lock);
+	return 0;
+
+out:
+	spin_unlock_irq(&lo->lo_lock);
+	bio_io_error(old_bio);
+	return 0;
+}
+
+struct switch_request {
+	struct file *file;
+	struct completion wait;
+};
+
+static void do_loop_switch(struct loop_device *, struct switch_request *);
+
+static inline void loop_handle_bio(struct loop_device *lo, struct bio *bio)
+{
+	if (unlikely(!bio->bi_bdev)) {
+		do_loop_switch(lo, bio->bi_private);
+		bio_put(bio);
+	} else {
+		int ret = do_bio_filebacked(lo, bio);
+		bio_endio(bio, ret);
+	}
+}
+
+/*
+ * worker thread that handles reads/writes to file backed loop devices,
+ * to avoid blocking in our make_request_fn. it also does loop decrypting
+ * on reads for block backed loop, as that is too heavy to do from
+ * b_end_io context where irqs may be disabled.
+ *
+ * Loop explanation:  loop_clr_fd() sets lo_state to Lo_rundown before
+ * calling kthread_stop().  Therefore once kthread_should_stop() is
+ * true, make_request will not place any more requests.  Therefore
+ * once kthread_should_stop() is true and lo_bio is NULL, we are
+ * done with the loop.
+ */
+static int loop_thread(void *data)
+{
+	struct loop_device *lo = data;
+	struct bio *bio;
+
+	set_user_nice(current, -20);
+
+	while (!kthread_should_stop() || !bio_list_empty(&lo->lo_bio_list)) {
+
+		wait_event_interruptible(lo->lo_event,
+				!bio_list_empty(&lo->lo_bio_list) ||
+				kthread_should_stop());
+
+		if (bio_list_empty(&lo->lo_bio_list))
+			continue;
+		spin_lock_irq(&lo->lo_lock);
+		bio = loop_get_bio(lo);
+		spin_unlock_irq(&lo->lo_lock);
+
+		BUG_ON(!bio);
+		loop_handle_bio(lo, bio);
+	}
+
+	return 0;
+}
+
+/*
+ * loop_switch performs the hard work of switching a backing store.
+ * First it needs to flush existing IO, it does this by sending a magic
+ * BIO down the pipe. The completion of this BIO does the actual switch.
+ */
+static int loop_switch(struct loop_device *lo, struct file *file)
+{
+	struct switch_request w;
+	struct bio *bio = bio_alloc(GFP_KERNEL, 0);
+	if (!bio)
+		return -ENOMEM;
+	init_completion(&w.wait);
+	w.file = file;
+	bio->bi_private = &w;
+	bio->bi_bdev = NULL;
+	loop_make_request(lo->lo_queue, bio);
+	wait_for_completion(&w.wait);
+	return 0;
+}
+
+/*
+ * Helper to flush the IOs in loop, but keeping loop thread running
+ */
+static int loop_flush(struct loop_device *lo)
+{
+	/* loop not yet configured, no running thread, nothing to flush */
+	if (!lo->lo_thread)
+		return 0;
+
+	return loop_switch(lo, NULL);
+}
+
+/*
+ * Do the actual switch; called from the BIO completion routine
+ */
+static void do_loop_switch(struct loop_device *lo, struct switch_request *p)
+{
+	struct file *file = p->file;
+	struct file *old_file = lo->lo_backing_file;
+	struct address_space *mapping;
+
+	/* if no new file, only flush of queued bios requested */
+	if (!file)
+		goto out;
+
+	mapping = file->f_mapping;
+	mapping_set_gfp_mask(old_file->f_mapping, lo->old_gfp_mask);
+	lo->lo_backing_file = file;
+	lo->lo_blocksize = S_ISBLK(mapping->host->i_mode) ?
+		mapping->host->i_bdev->bd_block_size : PAGE_SIZE;
+	lo->old_gfp_mask = mapping_gfp_mask(mapping);
+	mapping_set_gfp_mask(mapping, lo->old_gfp_mask & ~(__GFP_IO|__GFP_FS));
+out:
+	complete(&p->wait);
+}
+
+
+/*
+ * loop_change_fd switched the backing store of a loopback device to
+ * a new file. This is useful for operating system installers to free up
+ * the original file and in High Availability environments to switch to
+ * an alternative location for the content in case of server meltdown.
+ * This can only work if the loop device is used read-only, and if the
+ * new backing store is the same size and type as the old backing store.
+ */
+static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
+			  unsigned int arg)
+{
+	struct file	*file, *old_file;
+	struct inode	*inode;
+	int		error;
+
+	error = -ENXIO;
+	if (lo->lo_state != Lo_bound)
+		goto out;
+
+	/* the loop device has to be read-only */
+	error = -EINVAL;
+	if (!(lo->lo_flags & LO_FLAGS_READ_ONLY))
+		goto out;
+
+	error = -EBADF;
+	file = fget(arg);
+	if (!file)
+		goto out;
+
+	inode = file->f_mapping->host;
+	old_file = lo->lo_backing_file;
+
+	error = -EINVAL;
+
+	if (!S_ISREG(inode->i_mode) && !S_ISBLK(inode->i_mode))
+		goto out_putf;
+
+	/* size of the new backing store needs to be the same */
+	if (get_loop_size(lo, file) != get_loop_size(lo, old_file))
+		goto out_putf;
+
+	/* and ... switch */
+	error = loop_switch(lo, file);
+	if (error)
+		goto out_putf;
+
+	fput(old_file);
+	if (max_part > 0)
+		ioctl_by_bdev(bdev, BLKRRPART, 0);
+	return 0;
+
+ out_putf:
+	fput(file);
+ out:
+	return error;
+}
+
+static inline int is_loop_device(struct file *file)
+{
+	struct inode *i = file->f_mapping->host;
+
+	return i && S_ISBLK(i->i_mode) && MAJOR(i->i_rdev) == LOOP_MAJOR;
+}
+
+/* loop sysfs attributes */
+
+static ssize_t loop_attr_show(struct device *dev, char *page,
+			      ssize_t (*callback)(struct loop_device *, char *))
+{
+	struct loop_device *l, *lo = NULL;
+
+	mutex_lock(&loop_devices_mutex);
+	list_for_each_entry(l, &loop_devices, lo_list)
+		if (disk_to_dev(l->lo_disk) == dev) {
+			lo = l;
+			break;
+		}
+	mutex_unlock(&loop_devices_mutex);
+
+	return lo ? callback(lo, page) : -EIO;
+}
+
+#define LOOP_ATTR_RO(_name)						\
+static ssize_t loop_attr_##_name##_show(struct loop_device *, char *);	\
+static ssize_t loop_attr_do_show_##_name(struct device *d,		\
+				struct device_attribute *attr, char *b)	\
+{									\
+	return loop_attr_show(d, b, loop_attr_##_name##_show);		\
+}									\
+static struct device_attribute loop_attr_##_name =			\
+	__ATTR(_name, S_IRUGO, loop_attr_do_show_##_name, NULL);
+
+static ssize_t loop_attr_backing_file_show(struct loop_device *lo, char *buf)
+{
+	ssize_t ret;
+	char *p = NULL;
+
+	spin_lock_irq(&lo->lo_lock);
+	if (lo->lo_backing_file)
+		p = d_path(&lo->lo_backing_file->f_path, buf, PAGE_SIZE - 1);
+	spin_unlock_irq(&lo->lo_lock);
+
+	if (IS_ERR_OR_NULL(p))
+		ret = PTR_ERR(p);
+	else {
+		ret = strlen(p);
+		memmove(buf, p, ret);
+		buf[ret++] = '\n';
+		buf[ret] = 0;
+	}
+
+	return ret;
+}
+
+static ssize_t loop_attr_offset_show(struct loop_device *lo, char *buf)
+{
+	return sprintf(buf, "%llu\n", (unsigned long long)lo->lo_offset);
+}
+
+static ssize_t loop_attr_sizelimit_show(struct loop_device *lo, char *buf)
+{
+	return sprintf(buf, "%llu\n", (unsigned long long)lo->lo_sizelimit);
+}
+
+static ssize_t loop_attr_autoclear_show(struct loop_device *lo, char *buf)
+{
+	int autoclear = (lo->lo_flags & LO_FLAGS_AUTOCLEAR);
+
+	return sprintf(buf, "%s\n", autoclear ? "1" : "0");
+}
+
+LOOP_ATTR_RO(backing_file);
+LOOP_ATTR_RO(offset);
+LOOP_ATTR_RO(sizelimit);
+LOOP_ATTR_RO(autoclear);
+
+static struct attribute *loop_attrs[] = {
+	&loop_attr_backing_file.attr,
+	&loop_attr_offset.attr,
+	&loop_attr_sizelimit.attr,
+	&loop_attr_autoclear.attr,
+	NULL,
+};
+
+static struct attribute_group loop_attribute_group = {
+	.name = "loop",
+	.attrs= loop_attrs,
+};
+
+static int loop_sysfs_init(struct loop_device *lo)
+{
+	return sysfs_create_group(&disk_to_dev(lo->lo_disk)->kobj,
+				  &loop_attribute_group);
+}
+
+static void loop_sysfs_exit(struct loop_device *lo)
+{
+	sysfs_remove_group(&disk_to_dev(lo->lo_disk)->kobj,
+			   &loop_attribute_group);
+}
+
+static int loop_set_fd(struct loop_device *lo, fmode_t mode,
+		       struct block_device *bdev, unsigned int arg)
+{
+	struct file	*file, *f;
+	struct inode	*inode;
+	struct address_space *mapping;
+	unsigned lo_blocksize;
+	int		lo_flags = 0;
+	int		error;
+	loff_t		size;
+
+	/* This is safe, since we have a reference from open(). */
+	__module_get(THIS_MODULE);
+
+	error = -EBADF;
+	file = fget(arg);
+	if (!file)
+		goto out;
+
+	error = -EBUSY;
+	if (lo->lo_state != Lo_unbound)
+		goto out_putf;
+
+	/* Avoid recursion */
+	f = file;
+	while (is_loop_device(f)) {
+		struct loop_device *l;
+
+		if (f->f_mapping->host->i_bdev == bdev)
+			goto out_putf;
+
+		l = f->f_mapping->host->i_bdev->bd_disk->private_data;
+		if (l->lo_state == Lo_unbound) {
+			error = -EINVAL;
+			goto out_putf;
+		}
+		f = l->lo_backing_file;
+	}
+
+	mapping = file->f_mapping;
+	inode = mapping->host;
+
+	if (!(file->f_mode & FMODE_WRITE))
+		lo_flags |= LO_FLAGS_READ_ONLY;
+
+	error = -EINVAL;
+	if (S_ISREG(inode->i_mode) || S_ISBLK(inode->i_mode)) {
+		const struct address_space_operations *aops = mapping->a_ops;
+
+		if (aops->write_begin)
+			lo_flags |= LO_FLAGS_USE_AOPS;
+		if (!(lo_flags & LO_FLAGS_USE_AOPS) && !file->f_op->write)
+			lo_flags |= LO_FLAGS_READ_ONLY;
+
+		lo_blocksize = S_ISBLK(inode->i_mode) ?
+			inode->i_bdev->bd_block_size : PAGE_SIZE;
+
+		error = 0;
+	} else {
+		goto out_putf;
+	}
+
+	size = get_loop_size(lo, file);
+
+	if ((loff_t)(sector_t)size != size) {
+		error = -EFBIG;
+		goto out_putf;
+	}
+
+	if (!(mode & FMODE_WRITE))
+		lo_flags |= LO_FLAGS_READ_ONLY;
+
+	set_device_ro(bdev, (lo_flags & LO_FLAGS_READ_ONLY) != 0);
+
+	lo->lo_blocksize = lo_blocksize;
+	lo->lo_device = bdev;
+	lo->lo_flags = lo_flags;
+	lo->lo_backing_file = file;
+	lo->transfer = transfer_none;
+	lo->ioctl = NULL;
+	lo->lo_sizelimit = 0;
+	lo->old_gfp_mask = mapping_gfp_mask(mapping);
+	mapping_set_gfp_mask(mapping, lo->old_gfp_mask & ~(__GFP_IO|__GFP_FS));
+
+	bio_list_init(&lo->lo_bio_list);
+
+	/*
+	 * set queue make_request_fn, and add limits based on lower level
+	 * device
+	 */
+	blk_queue_make_request(lo->lo_queue, loop_make_request);
+	lo->lo_queue->queuedata = lo;
+
+	if (!(lo_flags & LO_FLAGS_READ_ONLY) && file->f_op->fsync)
+		blk_queue_flush(lo->lo_queue, REQ_FLUSH);
+
+	set_capacity(lo->lo_disk, size);
+	bd_set_size(bdev, size << 9);
+	loop_sysfs_init(lo);
+	/* let user-space know about the new size */
+	kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, KOBJ_CHANGE);
+
+	set_blocksize(bdev, lo_blocksize);
+
+	lo->lo_thread = kthread_create(loop_thread, lo, "loop%d",
+						lo->lo_number);
+	if (IS_ERR(lo->lo_thread)) {
+		error = PTR_ERR(lo->lo_thread);
+		goto out_clr;
+	}
+	lo->lo_state = Lo_bound;
+	wake_up_process(lo->lo_thread);
+	if (max_part > 0)
+		ioctl_by_bdev(bdev, BLKRRPART, 0);
+	return 0;
+
+out_clr:
+	loop_sysfs_exit(lo);
+	lo->lo_thread = NULL;
+	lo->lo_device = NULL;
+	lo->lo_backing_file = NULL;
+	lo->lo_flags = 0;
+	set_capacity(lo->lo_disk, 0);
+	invalidate_bdev(bdev);
+	bd_set_size(bdev, 0);
+	kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, KOBJ_CHANGE);
+	mapping_set_gfp_mask(mapping, lo->old_gfp_mask);
+	lo->lo_state = Lo_unbound;
+ out_putf:
+	fput(file);
+ out:
+	/* This is safe: open() is still holding a reference. */
+	module_put(THIS_MODULE);
+	return error;
+}
+
+static int
+loop_release_xfer(struct loop_device *lo)
+{
+	int err = 0;
+	struct loop_func_table *xfer = lo->lo_encryption;
+
+	if (xfer) {
+		if (xfer->release)
+			err = xfer->release(lo);
+		lo->transfer = NULL;
+		lo->lo_encryption = NULL;
+		module_put(xfer->owner);
+	}
+	return err;
+}
+
+static int
+loop_init_xfer(struct loop_device *lo, struct loop_func_table *xfer,
+	       const struct loop_info64 *i)
+{
+	int err = 0;
+
+	if (xfer) {
+		struct module *owner = xfer->owner;
+
+		if (!try_module_get(owner))
+			return -EINVAL;
+		if (xfer->init)
+			err = xfer->init(lo, i);
+		if (err)
+			module_put(owner);
+		else
+			lo->lo_encryption = xfer;
+	}
+	return err;
+}
+
+static int loop_clr_fd(struct loop_device *lo, struct block_device *bdev)
+{
+	struct file *filp = lo->lo_backing_file;
+	gfp_t gfp = lo->old_gfp_mask;
+
+	if (lo->lo_state != Lo_bound)
+		return -ENXIO;
+
+	if (lo->lo_refcnt > 1)	/* we needed one fd for the ioctl */
+		return -EBUSY;
+
+	if (filp == NULL)
+		return -EINVAL;
+
+	spin_lock_irq(&lo->lo_lock);
+	lo->lo_state = Lo_rundown;
+	spin_unlock_irq(&lo->lo_lock);
+
+	kthread_stop(lo->lo_thread);
+
+	spin_lock_irq(&lo->lo_lock);
+	lo->lo_backing_file = NULL;
+	spin_unlock_irq(&lo->lo_lock);
+
+	loop_release_xfer(lo);
+	lo->transfer = NULL;
+	lo->ioctl = NULL;
+	lo->lo_device = NULL;
+	lo->lo_encryption = NULL;
+	lo->lo_offset = 0;
+	lo->lo_sizelimit = 0;
+	lo->lo_encrypt_key_size = 0;
+	lo->lo_flags = 0;
+	lo->lo_thread = NULL;
+	memset(lo->lo_encrypt_key, 0, LO_KEY_SIZE);
+	memset(lo->lo_crypt_name, 0, LO_NAME_SIZE);
+	memset(lo->lo_file_name, 0, LO_NAME_SIZE);
+	if (bdev)
+		invalidate_bdev(bdev);
+	set_capacity(lo->lo_disk, 0);
+	loop_sysfs_exit(lo);
+	if (bdev) {
+		bd_set_size(bdev, 0);
+		/* let user-space know about this change */
+		kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, KOBJ_CHANGE);
+	}
+	mapping_set_gfp_mask(filp->f_mapping, gfp);
+	lo->lo_state = Lo_unbound;
+	/* This is safe: open() is still holding a reference. */
+	module_put(THIS_MODULE);
+	if (max_part > 0 && bdev)
+		ioctl_by_bdev(bdev, BLKRRPART, 0);
+	mutex_unlock(&lo->lo_ctl_mutex);
+	/*
+	 * Need not hold lo_ctl_mutex to fput backing file.
+	 * Calling fput holding lo_ctl_mutex triggers a circular
+	 * lock dependency possibility warning as fput can take
+	 * bd_mutex which is usually taken before lo_ctl_mutex.
+	 */
+	fput(filp);
+	return 0;
+}
+
+static int
+loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
+{
+	int err;
+	struct loop_func_table *xfer;
+	uid_t uid = current_uid();
+
+	if (lo->lo_encrypt_key_size &&
+	    lo->lo_key_owner != uid &&
+	    !capable(CAP_SYS_ADMIN))
+		return -EPERM;
+	if (lo->lo_state != Lo_bound)
+		return -ENXIO;
+	if ((unsigned int) info->lo_encrypt_key_size > LO_KEY_SIZE)
+		return -EINVAL;
+
+	err = loop_release_xfer(lo);
+	if (err)
+		return err;
+
+	if (info->lo_encrypt_type) {
+		unsigned int type = info->lo_encrypt_type;
+
+		if (type >= MAX_LO_CRYPT)
+			return -EINVAL;
+		xfer = xfer_funcs[type];
+		if (xfer == NULL)
+			return -EINVAL;
+	} else
+		xfer = NULL;
+
+	err = loop_init_xfer(lo, xfer, info);
+	if (err)
+		return err;
+
+	if (lo->lo_offset != info->lo_offset ||
+	    lo->lo_sizelimit != info->lo_sizelimit) {
+		lo->lo_offset = info->lo_offset;
+		lo->lo_sizelimit = info->lo_sizelimit;
+		if (figure_loop_size(lo))
+			return -EFBIG;
+	}
+
+	memcpy(lo->lo_file_name, info->lo_file_name, LO_NAME_SIZE);
+	memcpy(lo->lo_crypt_name, info->lo_crypt_name, LO_NAME_SIZE);
+	lo->lo_file_name[LO_NAME_SIZE-1] = 0;
+	lo->lo_crypt_name[LO_NAME_SIZE-1] = 0;
+
+	if (!xfer)
+		xfer = &none_funcs;
+	lo->transfer = xfer->transfer;
+	lo->ioctl = xfer->ioctl;
+
+	if ((lo->lo_flags & LO_FLAGS_AUTOCLEAR) !=
+	     (info->lo_flags & LO_FLAGS_AUTOCLEAR))
+		lo->lo_flags ^= LO_FLAGS_AUTOCLEAR;
+
+	lo->lo_encrypt_key_size = info->lo_encrypt_key_size;
+	lo->lo_init[0] = info->lo_init[0];
+	lo->lo_init[1] = info->lo_init[1];
+	if (info->lo_encrypt_key_size) {
+		memcpy(lo->lo_encrypt_key, info->lo_encrypt_key,
+		       info->lo_encrypt_key_size);
+		lo->lo_key_owner = uid;
+	}	
+
+	return 0;
+}
+
+static int
+loop_get_status(struct loop_device *lo, struct loop_info64 *info)
+{
+	struct file *file = lo->lo_backing_file;
+	struct kstat stat;
+	int error;
+
+	if (lo->lo_state != Lo_bound)
+		return -ENXIO;
+	error = vfs_getattr(file->f_path.mnt, file->f_path.dentry, &stat);
+	if (error)
+		return error;
+	memset(info, 0, sizeof(*info));
+	info->lo_number = lo->lo_number;
+	info->lo_device = huge_encode_dev(stat.dev);
+	info->lo_inode = stat.ino;
+	info->lo_rdevice = huge_encode_dev(lo->lo_device ? stat.rdev : stat.dev);
+	info->lo_offset = lo->lo_offset;
+	info->lo_sizelimit = lo->lo_sizelimit;
+	info->lo_flags = lo->lo_flags;
+	memcpy(info->lo_file_name, lo->lo_file_name, LO_NAME_SIZE);
+	memcpy(info->lo_crypt_name, lo->lo_crypt_name, LO_NAME_SIZE);
+	info->lo_encrypt_type =
+		lo->lo_encryption ? lo->lo_encryption->number : 0;
+	if (lo->lo_encrypt_key_size && capable(CAP_SYS_ADMIN)) {
+		info->lo_encrypt_key_size = lo->lo_encrypt_key_size;
+		memcpy(info->lo_encrypt_key, lo->lo_encrypt_key,
+		       lo->lo_encrypt_key_size);
+	}
+	return 0;
+}
+
+static void
+loop_info64_from_old(const struct loop_info *info, struct loop_info64 *info64)
+{
+	memset(info64, 0, sizeof(*info64));
+	info64->lo_number = info->lo_number;
+	info64->lo_device = info->lo_device;
+	info64->lo_inode = info->lo_inode;
+	info64->lo_rdevice = info->lo_rdevice;
+	info64->lo_offset = info->lo_offset;
+	info64->lo_sizelimit = 0;
+	info64->lo_encrypt_type = info->lo_encrypt_type;
+	info64->lo_encrypt_key_size = info->lo_encrypt_key_size;
+	info64->lo_flags = info->lo_flags;
+	info64->lo_init[0] = info->lo_init[0];
+	info64->lo_init[1] = info->lo_init[1];
+	if (info->lo_encrypt_type == LO_CRYPT_CRYPTOAPI)
+		memcpy(info64->lo_crypt_name, info->lo_name, LO_NAME_SIZE);
+	else
+		memcpy(info64->lo_file_name, info->lo_name, LO_NAME_SIZE);
+	memcpy(info64->lo_encrypt_key, info->lo_encrypt_key, LO_KEY_SIZE);
+}
+
+static int
+loop_info64_to_old(const struct loop_info64 *info64, struct loop_info *info)
+{
+	memset(info, 0, sizeof(*info));
+	info->lo_number = info64->lo_number;
+	info->lo_device = info64->lo_device;
+	info->lo_inode = info64->lo_inode;
+	info->lo_rdevice = info64->lo_rdevice;
+	info->lo_offset = info64->lo_offset;
+	info->lo_encrypt_type = info64->lo_encrypt_type;
+	info->lo_encrypt_key_size = info64->lo_encrypt_key_size;
+	info->lo_flags = info64->lo_flags;
+	info->lo_init[0] = info64->lo_init[0];
+	info->lo_init[1] = info64->lo_init[1];
+	if (info->lo_encrypt_type == LO_CRYPT_CRYPTOAPI)
+		memcpy(info->lo_name, info64->lo_crypt_name, LO_NAME_SIZE);
+	else
+		memcpy(info->lo_name, info64->lo_file_name, LO_NAME_SIZE);
+	memcpy(info->lo_encrypt_key, info64->lo_encrypt_key, LO_KEY_SIZE);
+
+	/* error in case values were truncated */
+	if (info->lo_device != info64->lo_device ||
+	    info->lo_rdevice != info64->lo_rdevice ||
+	    info->lo_inode != info64->lo_inode ||
+	    info->lo_offset != info64->lo_offset)
+		return -EOVERFLOW;
+
+	return 0;
+}
+
+static int
+loop_set_status_old(struct loop_device *lo, const struct loop_info __user *arg)
+{
+	struct loop_info info;
+	struct loop_info64 info64;
+
+	if (copy_from_user(&info, arg, sizeof (struct loop_info)))
+		return -EFAULT;
+	loop_info64_from_old(&info, &info64);
+	return loop_set_status(lo, &info64);
+}
+
+static int
+loop_set_status64(struct loop_device *lo, const struct loop_info64 __user *arg)
+{
+	struct loop_info64 info64;
+
+	if (copy_from_user(&info64, arg, sizeof (struct loop_info64)))
+		return -EFAULT;
+	return loop_set_status(lo, &info64);
+}
+
+static int
+loop_get_status_old(struct loop_device *lo, struct loop_info __user *arg) {
+	struct loop_info info;
+	struct loop_info64 info64;
+	int err = 0;
+
+	if (!arg)
+		err = -EINVAL;
+	if (!err)
+		err = loop_get_status(lo, &info64);
+	if (!err)
+		err = loop_info64_to_old(&info64, &info);
+	if (!err && copy_to_user(arg, &info, sizeof(info)))
+		err = -EFAULT;
+
+	return err;
+}
+
+static int
+loop_get_status64(struct loop_device *lo, struct loop_info64 __user *arg) {
+	struct loop_info64 info64;
+	int err = 0;
+
+	if (!arg)
+		err = -EINVAL;
+	if (!err)
+		err = loop_get_status(lo, &info64);
+	if (!err && copy_to_user(arg, &info64, sizeof(info64)))
+		err = -EFAULT;
+
+	return err;
+}
+
+static int loop_set_capacity(struct loop_device *lo, struct block_device *bdev)
+{
+	int err;
+	sector_t sec;
+	loff_t sz;
+
+	err = -ENXIO;
+	if (unlikely(lo->lo_state != Lo_bound))
+		goto out;
+	err = figure_loop_size(lo);
+	if (unlikely(err))
+		goto out;
+	sec = get_capacity(lo->lo_disk);
+	/* the width of sector_t may be narrow for bit-shift */
+	sz = sec;
+	sz <<= 9;
+	mutex_lock(&bdev->bd_mutex);
+	bd_set_size(bdev, sz);
+	/* let user-space know about the new size */
+	kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, KOBJ_CHANGE);
+	mutex_unlock(&bdev->bd_mutex);
+
+ out:
+	return err;
+}
+
+static int lo_ioctl(struct block_device *bdev, fmode_t mode,
+	unsigned int cmd, unsigned long arg)
+{
+	struct loop_device *lo = bdev->bd_disk->private_data;
+	int err;
+
+	mutex_lock_nested(&lo->lo_ctl_mutex, 1);
+	switch (cmd) {
+	case LOOP_SET_FD:
+		err = loop_set_fd(lo, mode, bdev, arg);
+		break;
+	case LOOP_CHANGE_FD:
+		err = loop_change_fd(lo, bdev, arg);
+		break;
+	case LOOP_CLR_FD:
+		/* loop_clr_fd would have unlocked lo_ctl_mutex on success */
+		err = loop_clr_fd(lo, bdev);
+		if (!err)
+			goto out_unlocked;
+		break;
+	case LOOP_SET_STATUS:
+		err = loop_set_status_old(lo, (struct loop_info __user *) arg);
+		break;
+	case LOOP_GET_STATUS:
+		err = loop_get_status_old(lo, (struct loop_info __user *) arg);
+		break;
+	case LOOP_SET_STATUS64:
+		err = loop_set_status64(lo, (struct loop_info64 __user *) arg);
+		break;
+	case LOOP_GET_STATUS64:
+		err = loop_get_status64(lo, (struct loop_info64 __user *) arg);
+		break;
+	case LOOP_SET_CAPACITY:
+		err = -EPERM;
+		if ((mode & FMODE_WRITE) || capable(CAP_SYS_ADMIN))
+			err = loop_set_capacity(lo, bdev);
+		break;
+	default:
+		err = lo->ioctl ? lo->ioctl(lo, cmd, arg) : -EINVAL;
+	}
+	mutex_unlock(&lo->lo_ctl_mutex);
+
+out_unlocked:
+	return err;
+}
+
+#ifdef CONFIG_COMPAT
+struct compat_loop_info {
+	compat_int_t	lo_number;      /* ioctl r/o */
+	compat_dev_t	lo_device;      /* ioctl r/o */
+	compat_ulong_t	lo_inode;       /* ioctl r/o */
+	compat_dev_t	lo_rdevice;     /* ioctl r/o */
+	compat_int_t	lo_offset;
+	compat_int_t	lo_encrypt_type;
+	compat_int_t	lo_encrypt_key_size;    /* ioctl w/o */
+	compat_int_t	lo_flags;       /* ioctl r/o */
+	char		lo_name[LO_NAME_SIZE];
+	unsigned char	lo_encrypt_key[LO_KEY_SIZE]; /* ioctl w/o */
+	compat_ulong_t	lo_init[2];
+	char		reserved[4];
+};
+
+/*
+ * Transfer 32-bit compatibility structure in userspace to 64-bit loop info
+ * - noinlined to reduce stack space usage in main part of driver
+ */
+static noinline int
+loop_info64_from_compat(const struct compat_loop_info __user *arg,
+			struct loop_info64 *info64)
+{
+	struct compat_loop_info info;
+
+	if (copy_from_user(&info, arg, sizeof(info)))
+		return -EFAULT;
+
+	memset(info64, 0, sizeof(*info64));
+	info64->lo_number = info.lo_number;
+	info64->lo_device = info.lo_device;
+	info64->lo_inode = info.lo_inode;
+	info64->lo_rdevice = info.lo_rdevice;
+	info64->lo_offset = info.lo_offset;
+	info64->lo_sizelimit = 0;
+	info64->lo_encrypt_type = info.lo_encrypt_type;
+	info64->lo_encrypt_key_size = info.lo_encrypt_key_size;
+	info64->lo_flags = info.lo_flags;
+	info64->lo_init[0] = info.lo_init[0];
+	info64->lo_init[1] = info.lo_init[1];
+	if (info.lo_encrypt_type == LO_CRYPT_CRYPTOAPI)
+		memcpy(info64->lo_crypt_name, info.lo_name, LO_NAME_SIZE);
+	else
+		memcpy(info64->lo_file_name, info.lo_name, LO_NAME_SIZE);
+	memcpy(info64->lo_encrypt_key, info.lo_encrypt_key, LO_KEY_SIZE);
+	return 0;
+}
+
+/*
+ * Transfer 64-bit loop info to 32-bit compatibility structure in userspace
+ * - noinlined to reduce stack space usage in main part of driver
+ */
+static noinline int
+loop_info64_to_compat(const struct loop_info64 *info64,
+		      struct compat_loop_info __user *arg)
+{
+	struct compat_loop_info info;
+
+	memset(&info, 0, sizeof(info));
+	info.lo_number = info64->lo_number;
+	info.lo_device = info64->lo_device;
+	info.lo_inode = info64->lo_inode;
+	info.lo_rdevice = info64->lo_rdevice;
+	info.lo_offset = info64->lo_offset;
+	info.lo_encrypt_type = info64->lo_encrypt_type;
+	info.lo_encrypt_key_size = info64->lo_encrypt_key_size;
+	info.lo_flags = info64->lo_flags;
+	info.lo_init[0] = info64->lo_init[0];
+	info.lo_init[1] = info64->lo_init[1];
+	if (info.lo_encrypt_type == LO_CRYPT_CRYPTOAPI)
+		memcpy(info.lo_name, info64->lo_crypt_name, LO_NAME_SIZE);
+	else
+		memcpy(info.lo_name, info64->lo_file_name, LO_NAME_SIZE);
+	memcpy(info.lo_encrypt_key, info64->lo_encrypt_key, LO_KEY_SIZE);
+
+	/* error in case values were truncated */
+	if (info.lo_device != info64->lo_device ||
+	    info.lo_rdevice != info64->lo_rdevice ||
+	    info.lo_inode != info64->lo_inode ||
+	    info.lo_offset != info64->lo_offset ||
+	    info.lo_init[0] != info64->lo_init[0] ||
+	    info.lo_init[1] != info64->lo_init[1])
+		return -EOVERFLOW;
+
+	if (copy_to_user(arg, &info, sizeof(info)))
+		return -EFAULT;
+	return 0;
+}
+
+static int
+loop_set_status_compat(struct loop_device *lo,
+		       const struct compat_loop_info __user *arg)
+{
+	struct loop_info64 info64;
+	int ret;
+
+	ret = loop_info64_from_compat(arg, &info64);
+	if (ret < 0)
+		return ret;
+	return loop_set_status(lo, &info64);
+}
+
+static int
+loop_get_status_compat(struct loop_device *lo,
+		       struct compat_loop_info __user *arg)
+{
+	struct loop_info64 info64;
+	int err = 0;
+
+	if (!arg)
+		err = -EINVAL;
+	if (!err)
+		err = loop_get_status(lo, &info64);
+	if (!err)
+		err = loop_info64_to_compat(&info64, arg);
+	return err;
+}
+
+static int lo_compat_ioctl(struct block_device *bdev, fmode_t mode,
+			   unsigned int cmd, unsigned long arg)
+{
+	struct loop_device *lo = bdev->bd_disk->private_data;
+	int err;
+
+	switch(cmd) {
+	case LOOP_SET_STATUS:
+		mutex_lock(&lo->lo_ctl_mutex);
+		err = loop_set_status_compat(
+			lo, (const struct compat_loop_info __user *) arg);
+		mutex_unlock(&lo->lo_ctl_mutex);
+		break;
+	case LOOP_GET_STATUS:
+		mutex_lock(&lo->lo_ctl_mutex);
+		err = loop_get_status_compat(
+			lo, (struct compat_loop_info __user *) arg);
+		mutex_unlock(&lo->lo_ctl_mutex);
+		break;
+	case LOOP_SET_CAPACITY:
+	case LOOP_CLR_FD:
+	case LOOP_GET_STATUS64:
+	case LOOP_SET_STATUS64:
+		arg = (unsigned long) compat_ptr(arg);
+	case LOOP_SET_FD:
+	case LOOP_CHANGE_FD:
+		err = lo_ioctl(bdev, mode, cmd, arg);
+		break;
+	default:
+		err = -ENOIOCTLCMD;
+		break;
+	}
+	return err;
+}
+#endif
+
+static int lo_open(struct block_device *bdev, fmode_t mode)
+{
+	struct loop_device *lo = bdev->bd_disk->private_data;
+
+	mutex_lock(&lo->lo_ctl_mutex);
+	lo->lo_refcnt++;
+	mutex_unlock(&lo->lo_ctl_mutex);
+
+	return 0;
+}
+
+static int lo_release(struct gendisk *disk, fmode_t mode)
+{
+	struct loop_device *lo = disk->private_data;
+	int err;
+
+	mutex_lock(&lo->lo_ctl_mutex);
+
+	if (--lo->lo_refcnt)
+		goto out;
+
+	if (lo->lo_flags & LO_FLAGS_AUTOCLEAR) {
+		/*
+		 * In autoclear mode, stop the loop thread
+		 * and remove configuration after last close.
+		 */
+		err = loop_clr_fd(lo, NULL);
+		if (!err)
+			goto out_unlocked;
+	} else {
+		/*
+		 * Otherwise keep thread (if running) and config,
+		 * but flush possible ongoing bios in thread.
+		 */
+		loop_flush(lo);
+	}
+
+out:
+	mutex_unlock(&lo->lo_ctl_mutex);
+out_unlocked:
+	return 0;
+}
+
+static const struct block_device_operations lo_fops = {
+	.owner =	THIS_MODULE,
+	.open =		lo_open,
+	.release =	lo_release,
+	.ioctl =	lo_ioctl,
+#ifdef CONFIG_COMPAT
+	.compat_ioctl =	lo_compat_ioctl,
+#endif
+};
+
+/*
+ * And now the modules code and kernel interface.
+ */
+static int max_loop;
+module_param(max_loop, int, S_IRUGO);
+MODULE_PARM_DESC(max_loop, "Maximum number of loop devices");
+module_param(max_part, int, S_IRUGO);
+MODULE_PARM_DESC(max_part, "Maximum number of partitions per loop device");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_BLOCKDEV_MAJOR(LOOP_MAJOR);
+
+int loop_register_transfer(struct loop_func_table *funcs)
+{
+	unsigned int n = funcs->number;
+
+	if (n >= MAX_LO_CRYPT || xfer_funcs[n])
+		return -EINVAL;
+	xfer_funcs[n] = funcs;
+	return 0;
+}
+
+int loop_unregister_transfer(int number)
+{
+	unsigned int n = number;
+	struct loop_device *lo;
+	struct loop_func_table *xfer;
+
+	if (n == 0 || n >= MAX_LO_CRYPT || (xfer = xfer_funcs[n]) == NULL)
+		return -EINVAL;
+
+	xfer_funcs[n] = NULL;
+
+	list_for_each_entry(lo, &loop_devices, lo_list) {
+		mutex_lock(&lo->lo_ctl_mutex);
+
+		if (lo->lo_encryption == xfer)
+			loop_release_xfer(lo);
+
+		mutex_unlock(&lo->lo_ctl_mutex);
+	}
+
+	return 0;
+}
+
+EXPORT_SYMBOL(loop_register_transfer);
+EXPORT_SYMBOL(loop_unregister_transfer);
+
+static struct loop_device *loop_alloc(int i)
+{
+	struct loop_device *lo;
+	struct gendisk *disk;
+
+	lo = kzalloc(sizeof(*lo), GFP_KERNEL);
+	if (!lo)
+		goto out;
+
+	lo->lo_queue = blk_alloc_queue(GFP_KERNEL);
+	if (!lo->lo_queue)
+		goto out_free_dev;
+
+	disk = lo->lo_disk = alloc_disk(1 << part_shift);
+	if (!disk)
+		goto out_free_queue;
+
+	mutex_init(&lo->lo_ctl_mutex);
+	lo->lo_number		= i;
+	lo->lo_thread		= NULL;
+	init_waitqueue_head(&lo->lo_event);
+	spin_lock_init(&lo->lo_lock);
+	disk->major		= LOOP_MAJOR;
+	disk->first_minor	= i << part_shift;
+	disk->fops		= &lo_fops;
+	disk->private_data	= lo;
+	disk->queue		= lo->lo_queue;
+	sprintf(disk->disk_name, "loop%d", i);
+	return lo;
+
+out_free_queue:
+	blk_cleanup_queue(lo->lo_queue);
+out_free_dev:
+	kfree(lo);
+out:
+	return NULL;
+}
+
+static void loop_free(struct loop_device *lo)
+{
+	blk_cleanup_queue(lo->lo_queue);
+	put_disk(lo->lo_disk);
+	list_del(&lo->lo_list);
+	kfree(lo);
+}
+
+static struct loop_device *loop_init_one(int i)
+{
+	struct loop_device *lo;
+
+	list_for_each_entry(lo, &loop_devices, lo_list) {
+		if (lo->lo_number == i)
+			return lo;
+	}
+
+	lo = loop_alloc(i);
+	if (lo) {
+		add_disk(lo->lo_disk);
+		list_add_tail(&lo->lo_list, &loop_devices);
+	}
+	return lo;
+}
+
+static void loop_del_one(struct loop_device *lo)
+{
+	del_gendisk(lo->lo_disk);
+	loop_free(lo);
+}
+
+static struct kobject *loop_probe(dev_t dev, int *part, void *data)
+{
+	struct loop_device *lo;
+	struct kobject *kobj;
+
+	mutex_lock(&loop_devices_mutex);
+	lo = loop_init_one(MINOR(dev) >> part_shift);
+	kobj = lo ? get_disk(lo->lo_disk) : ERR_PTR(-ENOMEM);
+	mutex_unlock(&loop_devices_mutex);
+
+	*part = 0;
+	return kobj;
+}
+
+static int __init loop_init(void)
+{
+	int i, nr;
+	unsigned long range;
+	struct loop_device *lo, *next;
+
+	/*
+	 * loop module now has a feature to instantiate underlying device
+	 * structure on-demand, provided that there is an access dev node.
+	 * However, this will not work well with user space tool that doesn't
+	 * know about such "feature".  In order to not break any existing
+	 * tool, we do the following:
+	 *
+	 * (1) if max_loop is specified, create that many upfront, and this
+	 *     also becomes a hard limit.
+	 * (2) if max_loop is not specified, create 8 loop device on module
+	 *     load, user can further extend loop device by create dev node
+	 *     themselves and have kernel automatically instantiate actual
+	 *     device on-demand.
+	 */
+
+	part_shift = 0;
+	if (max_part > 0) {
+		part_shift = fls(max_part);
+
+		/*
+		 * Adjust max_part according to part_shift as it is exported
+		 * to user space so that user can decide correct minor number
+		 * if [s]he want to create more devices.
+		 *
+		 * Note that -1 is required because partition 0 is reserved
+		 * for the whole disk.
+		 */
+		max_part = (1UL << part_shift) - 1;
+	}
+
+	if ((1UL << part_shift) > DISK_MAX_PARTS)
+		return -EINVAL;
+
+	if (max_loop > 1UL << (MINORBITS - part_shift))
+		return -EINVAL;
+
+	if (max_loop) {
+		nr = max_loop;
+		range = max_loop << part_shift;
+	} else {
+		nr = 8;
+		range = 1UL << MINORBITS;
+	}
+
+	if (register_blkdev(LOOP_MAJOR, "loop"))
+		return -EIO;
+
+	for (i = 0; i < nr; i++) {
+		lo = loop_alloc(i);
+		if (!lo)
+			goto Enomem;
+		list_add_tail(&lo->lo_list, &loop_devices);
+	}
+
+	/* point of no return */
+
+	list_for_each_entry(lo, &loop_devices, lo_list)
+		add_disk(lo->lo_disk);
+
+	blk_register_region(MKDEV(LOOP_MAJOR, 0), range,
+				  THIS_MODULE, loop_probe, NULL, NULL);
+
+	printk(KERN_INFO "loop: module loaded\n");
+	return 0;
+
+Enomem:
+	printk(KERN_INFO "loop: out of memory\n");
+
+	list_for_each_entry_safe(lo, next, &loop_devices, lo_list)
+		loop_free(lo);
+
+	unregister_blkdev(LOOP_MAJOR, "loop");
+	return -ENOMEM;
+}
+
+static void __exit loop_exit(void)
+{
+	unsigned long range;
+	struct loop_device *lo, *next;
+
+	range = max_loop ? max_loop << part_shift : 1UL << MINORBITS;
+
+	list_for_each_entry_safe(lo, next, &loop_devices, lo_list)
+		loop_del_one(lo);
+
+	blk_unregister_region(MKDEV(LOOP_MAJOR, 0), range);
+	unregister_blkdev(LOOP_MAJOR, "loop");
+}
+
+module_init(loop_init);
+module_exit(loop_exit);
+
+#ifndef MODULE
+static int __init max_loop_setup(char *str)
+{
+	max_loop = simple_strtol(str, NULL, 0);
+	return 1;
+}
+
+__setup("max_loop=", max_loop_setup);
+#endif
diff --git a/drivers/block/mg_disk.c b/drivers/block/mg_disk.c
new file mode 100644
index 00000000..76fa3dea
--- /dev/null
+++ b/drivers/block/mg_disk.c
@@ -0,0 +1,1108 @@
+/*
+ *  drivers/block/mg_disk.c
+ *
+ *  Support for the mGine m[g]flash IO mode.
+ *  Based on legacy hd.c
+ *
+ * (c) 2008 mGine Co.,LTD
+ * (c) 2008 unsik Kim <donari75@gmail.com>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as
+ *  published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/blkdev.h>
+#include <linux/hdreg.h>
+#include <linux/ata.h>
+#include <linux/interrupt.h>
+#include <linux/delay.h>
+#include <linux/platform_device.h>
+#include <linux/gpio.h>
+#include <linux/mg_disk.h>
+#include <linux/slab.h>
+
+#define MG_RES_SEC (CONFIG_MG_DISK_RES << 1)
+
+/* name for block device */
+#define MG_DISK_NAME "mgd"
+
+#define MG_DISK_MAJ 0
+#define MG_DISK_MAX_PART 16
+#define MG_SECTOR_SIZE 512
+#define MG_MAX_SECTS 256
+
+/* Register offsets */
+#define MG_BUFF_OFFSET			0x8000
+#define MG_REG_OFFSET			0xC000
+#define MG_REG_FEATURE			(MG_REG_OFFSET + 2)	/* write case */
+#define MG_REG_ERROR			(MG_REG_OFFSET + 2)	/* read case */
+#define MG_REG_SECT_CNT			(MG_REG_OFFSET + 4)
+#define MG_REG_SECT_NUM			(MG_REG_OFFSET + 6)
+#define MG_REG_CYL_LOW			(MG_REG_OFFSET + 8)
+#define MG_REG_CYL_HIGH			(MG_REG_OFFSET + 0xA)
+#define MG_REG_DRV_HEAD			(MG_REG_OFFSET + 0xC)
+#define MG_REG_COMMAND			(MG_REG_OFFSET + 0xE)	/* write case */
+#define MG_REG_STATUS			(MG_REG_OFFSET + 0xE)	/* read  case */
+#define MG_REG_DRV_CTRL			(MG_REG_OFFSET + 0x10)
+#define MG_REG_BURST_CTRL		(MG_REG_OFFSET + 0x12)
+
+/* handy status */
+#define MG_STAT_READY	(ATA_DRDY | ATA_DSC)
+#define MG_READY_OK(s)	(((s) & (MG_STAT_READY | (ATA_BUSY | ATA_DF | \
+				 ATA_ERR))) == MG_STAT_READY)
+
+/* error code for others */
+#define MG_ERR_NONE		0
+#define MG_ERR_TIMEOUT		0x100
+#define MG_ERR_INIT_STAT	0x101
+#define MG_ERR_TRANSLATION	0x102
+#define MG_ERR_CTRL_RST		0x103
+#define MG_ERR_INV_STAT		0x104
+#define MG_ERR_RSTOUT		0x105
+
+#define MG_MAX_ERRORS	6	/* Max read/write errors */
+
+/* command */
+#define MG_CMD_RD 0x20
+#define MG_CMD_WR 0x30
+#define MG_CMD_SLEEP 0x99
+#define MG_CMD_WAKEUP 0xC3
+#define MG_CMD_ID 0xEC
+#define MG_CMD_WR_CONF 0x3C
+#define MG_CMD_RD_CONF 0x40
+
+/* operation mode */
+#define MG_OP_CASCADE (1 << 0)
+#define MG_OP_CASCADE_SYNC_RD (1 << 1)
+#define MG_OP_CASCADE_SYNC_WR (1 << 2)
+#define MG_OP_INTERLEAVE (1 << 3)
+
+/* synchronous */
+#define MG_BURST_LAT_4 (3 << 4)
+#define MG_BURST_LAT_5 (4 << 4)
+#define MG_BURST_LAT_6 (5 << 4)
+#define MG_BURST_LAT_7 (6 << 4)
+#define MG_BURST_LAT_8 (7 << 4)
+#define MG_BURST_LEN_4 (1 << 1)
+#define MG_BURST_LEN_8 (2 << 1)
+#define MG_BURST_LEN_16 (3 << 1)
+#define MG_BURST_LEN_32 (4 << 1)
+#define MG_BURST_LEN_CONT (0 << 1)
+
+/* timeout value (unit: ms) */
+#define MG_TMAX_CONF_TO_CMD	1
+#define MG_TMAX_WAIT_RD_DRQ	10
+#define MG_TMAX_WAIT_WR_DRQ	500
+#define MG_TMAX_RST_TO_BUSY	10
+#define MG_TMAX_HDRST_TO_RDY	500
+#define MG_TMAX_SWRST_TO_RDY	500
+#define MG_TMAX_RSTOUT		3000
+
+#define MG_DEV_MASK (MG_BOOT_DEV | MG_STORAGE_DEV | MG_STORAGE_DEV_SKIP_RST)
+
+/* main structure for mflash driver */
+struct mg_host {
+	struct device *dev;
+
+	struct request_queue *breq;
+	struct request *req;
+	spinlock_t lock;
+	struct gendisk *gd;
+
+	struct timer_list timer;
+	void (*mg_do_intr) (struct mg_host *);
+
+	u16 id[ATA_ID_WORDS];
+
+	u16 cyls;
+	u16 heads;
+	u16 sectors;
+	u32 n_sectors;
+	u32 nres_sectors;
+
+	void __iomem *dev_base;
+	unsigned int irq;
+	unsigned int rst;
+	unsigned int rstout;
+
+	u32 major;
+	u32 error;
+};
+
+/*
+ * Debugging macro and defines
+ */
+#undef DO_MG_DEBUG
+#ifdef DO_MG_DEBUG
+#  define MG_DBG(fmt, args...) \
+	printk(KERN_DEBUG "%s:%d "fmt, __func__, __LINE__, ##args)
+#else /* CONFIG_MG_DEBUG */
+#  define MG_DBG(fmt, args...) do { } while (0)
+#endif /* CONFIG_MG_DEBUG */
+
+static void mg_request(struct request_queue *);
+
+static bool mg_end_request(struct mg_host *host, int err, unsigned int nr_bytes)
+{
+	if (__blk_end_request(host->req, err, nr_bytes))
+		return true;
+
+	host->req = NULL;
+	return false;
+}
+
+static bool mg_end_request_cur(struct mg_host *host, int err)
+{
+	return mg_end_request(host, err, blk_rq_cur_bytes(host->req));
+}
+
+static void mg_dump_status(const char *msg, unsigned int stat,
+		struct mg_host *host)
+{
+	char *name = MG_DISK_NAME;
+
+	if (host->req)
+		name = host->req->rq_disk->disk_name;
+
+	printk(KERN_ERR "%s: %s: status=0x%02x { ", name, msg, stat & 0xff);
+	if (stat & ATA_BUSY)
+		printk("Busy ");
+	if (stat & ATA_DRDY)
+		printk("DriveReady ");
+	if (stat & ATA_DF)
+		printk("WriteFault ");
+	if (stat & ATA_DSC)
+		printk("SeekComplete ");
+	if (stat & ATA_DRQ)
+		printk("DataRequest ");
+	if (stat & ATA_CORR)
+		printk("CorrectedError ");
+	if (stat & ATA_ERR)
+		printk("Error ");
+	printk("}\n");
+	if ((stat & ATA_ERR) == 0) {
+		host->error = 0;
+	} else {
+		host->error = inb((unsigned long)host->dev_base + MG_REG_ERROR);
+		printk(KERN_ERR "%s: %s: error=0x%02x { ", name, msg,
+				host->error & 0xff);
+		if (host->error & ATA_BBK)
+			printk("BadSector ");
+		if (host->error & ATA_UNC)
+			printk("UncorrectableError ");
+		if (host->error & ATA_IDNF)
+			printk("SectorIdNotFound ");
+		if (host->error & ATA_ABORTED)
+			printk("DriveStatusError ");
+		if (host->error & ATA_AMNF)
+			printk("AddrMarkNotFound ");
+		printk("}");
+		if (host->error & (ATA_BBK | ATA_UNC | ATA_IDNF | ATA_AMNF)) {
+			if (host->req)
+				printk(", sector=%u",
+				       (unsigned int)blk_rq_pos(host->req));
+		}
+		printk("\n");
+	}
+}
+
+static unsigned int mg_wait(struct mg_host *host, u32 expect, u32 msec)
+{
+	u8 status;
+	unsigned long expire, cur_jiffies;
+	struct mg_drv_data *prv_data = host->dev->platform_data;
+
+	host->error = MG_ERR_NONE;
+	expire = jiffies + msecs_to_jiffies(msec);
+
+	/* These 2 times dummy status read prevents reading invalid
+	 * status. A very little time (3 times of mflash operating clk)
+	 * is required for busy bit is set. Use dummy read instead of
+	 * busy wait, because mflash's PLL is machine dependent.
+	 */
+	if (prv_data->use_polling) {
+		status = inb((unsigned long)host->dev_base + MG_REG_STATUS);
+		status = inb((unsigned long)host->dev_base + MG_REG_STATUS);
+	}
+
+	status = inb((unsigned long)host->dev_base + MG_REG_STATUS);
+
+	do {
+		cur_jiffies = jiffies;
+		if (status & ATA_BUSY) {
+			if (expect == ATA_BUSY)
+				break;
+		} else {
+			/* Check the error condition! */
+			if (status & ATA_ERR) {
+				mg_dump_status("mg_wait", status, host);
+				break;
+			}
+
+			if (expect == MG_STAT_READY)
+				if (MG_READY_OK(status))
+					break;
+
+			if (expect == ATA_DRQ)
+				if (status & ATA_DRQ)
+					break;
+		}
+		if (!msec) {
+			mg_dump_status("not ready", status, host);
+			return MG_ERR_INV_STAT;
+		}
+
+		status = inb((unsigned long)host->dev_base + MG_REG_STATUS);
+	} while (time_before(cur_jiffies, expire));
+
+	if (time_after_eq(cur_jiffies, expire) && msec)
+		host->error = MG_ERR_TIMEOUT;
+
+	return host->error;
+}
+
+static unsigned int mg_wait_rstout(u32 rstout, u32 msec)
+{
+	unsigned long expire;
+
+	expire = jiffies + msecs_to_jiffies(msec);
+	while (time_before(jiffies, expire)) {
+		if (gpio_get_value(rstout) == 1)
+			return MG_ERR_NONE;
+		msleep(10);
+	}
+
+	return MG_ERR_RSTOUT;
+}
+
+static void mg_unexpected_intr(struct mg_host *host)
+{
+	u32 status = inb((unsigned long)host->dev_base + MG_REG_STATUS);
+
+	mg_dump_status("mg_unexpected_intr", status, host);
+}
+
+static irqreturn_t mg_irq(int irq, void *dev_id)
+{
+	struct mg_host *host = dev_id;
+	void (*handler)(struct mg_host *) = host->mg_do_intr;
+
+	spin_lock(&host->lock);
+
+	host->mg_do_intr = NULL;
+	del_timer(&host->timer);
+	if (!handler)
+		handler = mg_unexpected_intr;
+	handler(host);
+
+	spin_unlock(&host->lock);
+
+	return IRQ_HANDLED;
+}
+
+/* local copy of ata_id_string() */
+static void mg_id_string(const u16 *id, unsigned char *s,
+			 unsigned int ofs, unsigned int len)
+{
+	unsigned int c;
+
+	BUG_ON(len & 1);
+
+	while (len > 0) {
+		c = id[ofs] >> 8;
+		*s = c;
+		s++;
+
+		c = id[ofs] & 0xff;
+		*s = c;
+		s++;
+
+		ofs++;
+		len -= 2;
+	}
+}
+
+/* local copy of ata_id_c_string() */
+static void mg_id_c_string(const u16 *id, unsigned char *s,
+			   unsigned int ofs, unsigned int len)
+{
+	unsigned char *p;
+
+	mg_id_string(id, s, ofs, len - 1);
+
+	p = s + strnlen(s, len - 1);
+	while (p > s && p[-1] == ' ')
+		p--;
+	*p = '\0';
+}
+
+static int mg_get_disk_id(struct mg_host *host)
+{
+	u32 i;
+	s32 err;
+	const u16 *id = host->id;
+	struct mg_drv_data *prv_data = host->dev->platform_data;
+	char fwrev[ATA_ID_FW_REV_LEN + 1];
+	char model[ATA_ID_PROD_LEN + 1];
+	char serial[ATA_ID_SERNO_LEN + 1];
+
+	if (!prv_data->use_polling)
+		outb(ATA_NIEN, (unsigned long)host->dev_base + MG_REG_DRV_CTRL);
+
+	outb(MG_CMD_ID, (unsigned long)host->dev_base + MG_REG_COMMAND);
+	err = mg_wait(host, ATA_DRQ, MG_TMAX_WAIT_RD_DRQ);
+	if (err)
+		return err;
+
+	for (i = 0; i < (MG_SECTOR_SIZE >> 1); i++)
+		host->id[i] = le16_to_cpu(inw((unsigned long)host->dev_base +
+					MG_BUFF_OFFSET + i * 2));
+
+	outb(MG_CMD_RD_CONF, (unsigned long)host->dev_base + MG_REG_COMMAND);
+	err = mg_wait(host, MG_STAT_READY, MG_TMAX_CONF_TO_CMD);
+	if (err)
+		return err;
+
+	if ((id[ATA_ID_FIELD_VALID] & 1) == 0)
+		return MG_ERR_TRANSLATION;
+
+	host->n_sectors = ata_id_u32(id, ATA_ID_LBA_CAPACITY);
+	host->cyls = id[ATA_ID_CYLS];
+	host->heads = id[ATA_ID_HEADS];
+	host->sectors = id[ATA_ID_SECTORS];
+
+	if (MG_RES_SEC && host->heads && host->sectors) {
+		/* modify cyls, n_sectors */
+		host->cyls = (host->n_sectors - MG_RES_SEC) /
+			host->heads / host->sectors;
+		host->nres_sectors = host->n_sectors - host->cyls *
+			host->heads * host->sectors;
+		host->n_sectors -= host->nres_sectors;
+	}
+
+	mg_id_c_string(id, fwrev, ATA_ID_FW_REV, sizeof(fwrev));
+	mg_id_c_string(id, model, ATA_ID_PROD, sizeof(model));
+	mg_id_c_string(id, serial, ATA_ID_SERNO, sizeof(serial));
+	printk(KERN_INFO "mg_disk: model: %s\n", model);
+	printk(KERN_INFO "mg_disk: firm: %.8s\n", fwrev);
+	printk(KERN_INFO "mg_disk: serial: %s\n", serial);
+	printk(KERN_INFO "mg_disk: %d + reserved %d sectors\n",
+			host->n_sectors, host->nres_sectors);
+
+	if (!prv_data->use_polling)
+		outb(0, (unsigned long)host->dev_base + MG_REG_DRV_CTRL);
+
+	return err;
+}
+
+
+static int mg_disk_init(struct mg_host *host)
+{
+	struct mg_drv_data *prv_data = host->dev->platform_data;
+	s32 err;
+	u8 init_status;
+
+	/* hdd rst low */
+	gpio_set_value(host->rst, 0);
+	err = mg_wait(host, ATA_BUSY, MG_TMAX_RST_TO_BUSY);
+	if (err)
+		return err;
+
+	/* hdd rst high */
+	gpio_set_value(host->rst, 1);
+	err = mg_wait(host, MG_STAT_READY, MG_TMAX_HDRST_TO_RDY);
+	if (err)
+		return err;
+
+	/* soft reset on */
+	outb(ATA_SRST | (prv_data->use_polling ? ATA_NIEN : 0),
+			(unsigned long)host->dev_base + MG_REG_DRV_CTRL);
+	err = mg_wait(host, ATA_BUSY, MG_TMAX_RST_TO_BUSY);
+	if (err)
+		return err;
+
+	/* soft reset off */
+	outb(prv_data->use_polling ? ATA_NIEN : 0,
+			(unsigned long)host->dev_base + MG_REG_DRV_CTRL);
+	err = mg_wait(host, MG_STAT_READY, MG_TMAX_SWRST_TO_RDY);
+	if (err)
+		return err;
+
+	init_status = inb((unsigned long)host->dev_base + MG_REG_STATUS) & 0xf;
+
+	if (init_status == 0xf)
+		return MG_ERR_INIT_STAT;
+
+	return err;
+}
+
+static void mg_bad_rw_intr(struct mg_host *host)
+{
+	if (host->req)
+		if (++host->req->errors >= MG_MAX_ERRORS ||
+		    host->error == MG_ERR_TIMEOUT)
+			mg_end_request_cur(host, -EIO);
+}
+
+static unsigned int mg_out(struct mg_host *host,
+		unsigned int sect_num,
+		unsigned int sect_cnt,
+		unsigned int cmd,
+		void (*intr_addr)(struct mg_host *))
+{
+	struct mg_drv_data *prv_data = host->dev->platform_data;
+
+	if (mg_wait(host, MG_STAT_READY, MG_TMAX_CONF_TO_CMD))
+		return host->error;
+
+	if (!prv_data->use_polling) {
+		host->mg_do_intr = intr_addr;
+		mod_timer(&host->timer, jiffies + 3 * HZ);
+	}
+	if (MG_RES_SEC)
+		sect_num += MG_RES_SEC;
+	outb((u8)sect_cnt, (unsigned long)host->dev_base + MG_REG_SECT_CNT);
+	outb((u8)sect_num, (unsigned long)host->dev_base + MG_REG_SECT_NUM);
+	outb((u8)(sect_num >> 8), (unsigned long)host->dev_base +
+			MG_REG_CYL_LOW);
+	outb((u8)(sect_num >> 16), (unsigned long)host->dev_base +
+			MG_REG_CYL_HIGH);
+	outb((u8)((sect_num >> 24) | ATA_LBA | ATA_DEVICE_OBS),
+			(unsigned long)host->dev_base + MG_REG_DRV_HEAD);
+	outb(cmd, (unsigned long)host->dev_base + MG_REG_COMMAND);
+	return MG_ERR_NONE;
+}
+
+static void mg_read_one(struct mg_host *host, struct request *req)
+{
+	u16 *buff = (u16 *)req->buffer;
+	u32 i;
+
+	for (i = 0; i < MG_SECTOR_SIZE >> 1; i++)
+		*buff++ = inw((unsigned long)host->dev_base + MG_BUFF_OFFSET +
+			      (i << 1));
+}
+
+static void mg_read(struct request *req)
+{
+	struct mg_host *host = req->rq_disk->private_data;
+
+	if (mg_out(host, blk_rq_pos(req), blk_rq_sectors(req),
+		   MG_CMD_RD, NULL) != MG_ERR_NONE)
+		mg_bad_rw_intr(host);
+
+	MG_DBG("requested %d sects (from %ld), buffer=0x%p\n",
+	       blk_rq_sectors(req), blk_rq_pos(req), req->buffer);
+
+	do {
+		if (mg_wait(host, ATA_DRQ,
+			    MG_TMAX_WAIT_RD_DRQ) != MG_ERR_NONE) {
+			mg_bad_rw_intr(host);
+			return;
+		}
+
+		mg_read_one(host, req);
+
+		outb(MG_CMD_RD_CONF, (unsigned long)host->dev_base +
+				MG_REG_COMMAND);
+	} while (mg_end_request(host, 0, MG_SECTOR_SIZE));
+}
+
+static void mg_write_one(struct mg_host *host, struct request *req)
+{
+	u16 *buff = (u16 *)req->buffer;
+	u32 i;
+
+	for (i = 0; i < MG_SECTOR_SIZE >> 1; i++)
+		outw(*buff++, (unsigned long)host->dev_base + MG_BUFF_OFFSET +
+		     (i << 1));
+}
+
+static void mg_write(struct request *req)
+{
+	struct mg_host *host = req->rq_disk->private_data;
+	unsigned int rem = blk_rq_sectors(req);
+
+	if (mg_out(host, blk_rq_pos(req), rem,
+		   MG_CMD_WR, NULL) != MG_ERR_NONE) {
+		mg_bad_rw_intr(host);
+		return;
+	}
+
+	MG_DBG("requested %d sects (from %ld), buffer=0x%p\n",
+	       rem, blk_rq_pos(req), req->buffer);
+
+	if (mg_wait(host, ATA_DRQ,
+		    MG_TMAX_WAIT_WR_DRQ) != MG_ERR_NONE) {
+		mg_bad_rw_intr(host);
+		return;
+	}
+
+	do {
+		mg_write_one(host, req);
+
+		outb(MG_CMD_WR_CONF, (unsigned long)host->dev_base +
+				MG_REG_COMMAND);
+
+		rem--;
+		if (rem > 1 && mg_wait(host, ATA_DRQ,
+					MG_TMAX_WAIT_WR_DRQ) != MG_ERR_NONE) {
+			mg_bad_rw_intr(host);
+			return;
+		} else if (mg_wait(host, MG_STAT_READY,
+					MG_TMAX_WAIT_WR_DRQ) != MG_ERR_NONE) {
+			mg_bad_rw_intr(host);
+			return;
+		}
+	} while (mg_end_request(host, 0, MG_SECTOR_SIZE));
+}
+
+static void mg_read_intr(struct mg_host *host)
+{
+	struct request *req = host->req;
+	u32 i;
+
+	/* check status */
+	do {
+		i = inb((unsigned long)host->dev_base + MG_REG_STATUS);
+		if (i & ATA_BUSY)
+			break;
+		if (!MG_READY_OK(i))
+			break;
+		if (i & ATA_DRQ)
+			goto ok_to_read;
+	} while (0);
+	mg_dump_status("mg_read_intr", i, host);
+	mg_bad_rw_intr(host);
+	mg_request(host->breq);
+	return;
+
+ok_to_read:
+	mg_read_one(host, req);
+
+	MG_DBG("sector %ld, remaining=%ld, buffer=0x%p\n",
+	       blk_rq_pos(req), blk_rq_sectors(req) - 1, req->buffer);
+
+	/* send read confirm */
+	outb(MG_CMD_RD_CONF, (unsigned long)host->dev_base + MG_REG_COMMAND);
+
+	if (mg_end_request(host, 0, MG_SECTOR_SIZE)) {
+		/* set handler if read remains */
+		host->mg_do_intr = mg_read_intr;
+		mod_timer(&host->timer, jiffies + 3 * HZ);
+	} else /* goto next request */
+		mg_request(host->breq);
+}
+
+static void mg_write_intr(struct mg_host *host)
+{
+	struct request *req = host->req;
+	u32 i;
+	bool rem;
+
+	/* check status */
+	do {
+		i = inb((unsigned long)host->dev_base + MG_REG_STATUS);
+		if (i & ATA_BUSY)
+			break;
+		if (!MG_READY_OK(i))
+			break;
+		if ((blk_rq_sectors(req) <= 1) || (i & ATA_DRQ))
+			goto ok_to_write;
+	} while (0);
+	mg_dump_status("mg_write_intr", i, host);
+	mg_bad_rw_intr(host);
+	mg_request(host->breq);
+	return;
+
+ok_to_write:
+	if ((rem = mg_end_request(host, 0, MG_SECTOR_SIZE))) {
+		/* write 1 sector and set handler if remains */
+		mg_write_one(host, req);
+		MG_DBG("sector %ld, remaining=%ld, buffer=0x%p\n",
+		       blk_rq_pos(req), blk_rq_sectors(req), req->buffer);
+		host->mg_do_intr = mg_write_intr;
+		mod_timer(&host->timer, jiffies + 3 * HZ);
+	}
+
+	/* send write confirm */
+	outb(MG_CMD_WR_CONF, (unsigned long)host->dev_base + MG_REG_COMMAND);
+
+	if (!rem)
+		mg_request(host->breq);
+}
+
+void mg_times_out(unsigned long data)
+{
+	struct mg_host *host = (struct mg_host *)data;
+	char *name;
+
+	spin_lock_irq(&host->lock);
+
+	if (!host->req)
+		goto out_unlock;
+
+	host->mg_do_intr = NULL;
+
+	name = host->req->rq_disk->disk_name;
+	printk(KERN_DEBUG "%s: timeout\n", name);
+
+	host->error = MG_ERR_TIMEOUT;
+	mg_bad_rw_intr(host);
+
+out_unlock:
+	mg_request(host->breq);
+	spin_unlock_irq(&host->lock);
+}
+
+static void mg_request_poll(struct request_queue *q)
+{
+	struct mg_host *host = q->queuedata;
+
+	while (1) {
+		if (!host->req) {
+			host->req = blk_fetch_request(q);
+			if (!host->req)
+				break;
+		}
+
+		if (unlikely(host->req->cmd_type != REQ_TYPE_FS)) {
+			mg_end_request_cur(host, -EIO);
+			continue;
+		}
+
+		if (rq_data_dir(host->req) == READ)
+			mg_read(host->req);
+		else
+			mg_write(host->req);
+	}
+}
+
+static unsigned int mg_issue_req(struct request *req,
+		struct mg_host *host,
+		unsigned int sect_num,
+		unsigned int sect_cnt)
+{
+	switch (rq_data_dir(req)) {
+	case READ:
+		if (mg_out(host, sect_num, sect_cnt, MG_CMD_RD, &mg_read_intr)
+				!= MG_ERR_NONE) {
+			mg_bad_rw_intr(host);
+			return host->error;
+		}
+		break;
+	case WRITE:
+		/* TODO : handler */
+		outb(ATA_NIEN, (unsigned long)host->dev_base + MG_REG_DRV_CTRL);
+		if (mg_out(host, sect_num, sect_cnt, MG_CMD_WR, &mg_write_intr)
+				!= MG_ERR_NONE) {
+			mg_bad_rw_intr(host);
+			return host->error;
+		}
+		del_timer(&host->timer);
+		mg_wait(host, ATA_DRQ, MG_TMAX_WAIT_WR_DRQ);
+		outb(0, (unsigned long)host->dev_base + MG_REG_DRV_CTRL);
+		if (host->error) {
+			mg_bad_rw_intr(host);
+			return host->error;
+		}
+		mg_write_one(host, req);
+		mod_timer(&host->timer, jiffies + 3 * HZ);
+		outb(MG_CMD_WR_CONF, (unsigned long)host->dev_base +
+				MG_REG_COMMAND);
+		break;
+	}
+	return MG_ERR_NONE;
+}
+
+/* This function also called from IRQ context */
+static void mg_request(struct request_queue *q)
+{
+	struct mg_host *host = q->queuedata;
+	struct request *req;
+	u32 sect_num, sect_cnt;
+
+	while (1) {
+		if (!host->req) {
+			host->req = blk_fetch_request(q);
+			if (!host->req)
+				break;
+		}
+		req = host->req;
+
+		/* check unwanted request call */
+		if (host->mg_do_intr)
+			return;
+
+		del_timer(&host->timer);
+
+		sect_num = blk_rq_pos(req);
+		/* deal whole segments */
+		sect_cnt = blk_rq_sectors(req);
+
+		/* sanity check */
+		if (sect_num >= get_capacity(req->rq_disk) ||
+				((sect_num + sect_cnt) >
+				 get_capacity(req->rq_disk))) {
+			printk(KERN_WARNING
+					"%s: bad access: sector=%d, count=%d\n",
+					req->rq_disk->disk_name,
+					sect_num, sect_cnt);
+			mg_end_request_cur(host, -EIO);
+			continue;
+		}
+
+		if (unlikely(req->cmd_type != REQ_TYPE_FS)) {
+			mg_end_request_cur(host, -EIO);
+			continue;
+		}
+
+		if (!mg_issue_req(req, host, sect_num, sect_cnt))
+			return;
+	}
+}
+
+static int mg_getgeo(struct block_device *bdev, struct hd_geometry *geo)
+{
+	struct mg_host *host = bdev->bd_disk->private_data;
+
+	geo->cylinders = (unsigned short)host->cyls;
+	geo->heads = (unsigned char)host->heads;
+	geo->sectors = (unsigned char)host->sectors;
+	return 0;
+}
+
+static const struct block_device_operations mg_disk_ops = {
+	.getgeo = mg_getgeo
+};
+
+static int mg_suspend(struct platform_device *plat_dev, pm_message_t state)
+{
+	struct mg_drv_data *prv_data = plat_dev->dev.platform_data;
+	struct mg_host *host = prv_data->host;
+
+	if (mg_wait(host, MG_STAT_READY, MG_TMAX_CONF_TO_CMD))
+		return -EIO;
+
+	if (!prv_data->use_polling)
+		outb(ATA_NIEN, (unsigned long)host->dev_base + MG_REG_DRV_CTRL);
+
+	outb(MG_CMD_SLEEP, (unsigned long)host->dev_base + MG_REG_COMMAND);
+	/* wait until mflash deep sleep */
+	msleep(1);
+
+	if (mg_wait(host, MG_STAT_READY, MG_TMAX_CONF_TO_CMD)) {
+		if (!prv_data->use_polling)
+			outb(0, (unsigned long)host->dev_base + MG_REG_DRV_CTRL);
+		return -EIO;
+	}
+
+	return 0;
+}
+
+static int mg_resume(struct platform_device *plat_dev)
+{
+	struct mg_drv_data *prv_data = plat_dev->dev.platform_data;
+	struct mg_host *host = prv_data->host;
+
+	if (mg_wait(host, MG_STAT_READY, MG_TMAX_CONF_TO_CMD))
+		return -EIO;
+
+	outb(MG_CMD_WAKEUP, (unsigned long)host->dev_base + MG_REG_COMMAND);
+	/* wait until mflash wakeup */
+	msleep(1);
+
+	if (mg_wait(host, MG_STAT_READY, MG_TMAX_CONF_TO_CMD))
+		return -EIO;
+
+	if (!prv_data->use_polling)
+		outb(0, (unsigned long)host->dev_base + MG_REG_DRV_CTRL);
+
+	return 0;
+}
+
+static int mg_probe(struct platform_device *plat_dev)
+{
+	struct mg_host *host;
+	struct resource *rsc;
+	struct mg_drv_data *prv_data = plat_dev->dev.platform_data;
+	int err = 0;
+
+	if (!prv_data) {
+		printk(KERN_ERR	"%s:%d fail (no driver_data)\n",
+				__func__, __LINE__);
+		err = -EINVAL;
+		goto probe_err;
+	}
+
+	/* alloc mg_host */
+	host = kzalloc(sizeof(struct mg_host), GFP_KERNEL);
+	if (!host) {
+		printk(KERN_ERR "%s:%d fail (no memory for mg_host)\n",
+				__func__, __LINE__);
+		err = -ENOMEM;
+		goto probe_err;
+	}
+	host->major = MG_DISK_MAJ;
+
+	/* link each other */
+	prv_data->host = host;
+	host->dev = &plat_dev->dev;
+
+	/* io remap */
+	rsc = platform_get_resource(plat_dev, IORESOURCE_MEM, 0);
+	if (!rsc) {
+		printk(KERN_ERR "%s:%d platform_get_resource fail\n",
+				__func__, __LINE__);
+		err = -EINVAL;
+		goto probe_err_2;
+	}
+	host->dev_base = ioremap(rsc->start, resource_size(rsc));
+	if (!host->dev_base) {
+		printk(KERN_ERR "%s:%d ioremap fail\n",
+				__func__, __LINE__);
+		err = -EIO;
+		goto probe_err_2;
+	}
+	MG_DBG("dev_base = 0x%x\n", (u32)host->dev_base);
+
+	/* get reset pin */
+	rsc = platform_get_resource_byname(plat_dev, IORESOURCE_IO,
+			MG_RST_PIN);
+	if (!rsc) {
+		printk(KERN_ERR "%s:%d get reset pin fail\n",
+				__func__, __LINE__);
+		err = -EIO;
+		goto probe_err_3;
+	}
+	host->rst = rsc->start;
+
+	/* init rst pin */
+	err = gpio_request(host->rst, MG_RST_PIN);
+	if (err)
+		goto probe_err_3;
+	gpio_direction_output(host->rst, 1);
+
+	/* reset out pin */
+	if (!(prv_data->dev_attr & MG_DEV_MASK))
+		goto probe_err_3a;
+
+	if (prv_data->dev_attr != MG_BOOT_DEV) {
+		rsc = platform_get_resource_byname(plat_dev, IORESOURCE_IO,
+				MG_RSTOUT_PIN);
+		if (!rsc) {
+			printk(KERN_ERR "%s:%d get reset-out pin fail\n",
+					__func__, __LINE__);
+			err = -EIO;
+			goto probe_err_3a;
+		}
+		host->rstout = rsc->start;
+		err = gpio_request(host->rstout, MG_RSTOUT_PIN);
+		if (err)
+			goto probe_err_3a;
+		gpio_direction_input(host->rstout);
+	}
+
+	/* disk reset */
+	if (prv_data->dev_attr == MG_STORAGE_DEV) {
+		/* If POR seq. not yet finised, wait */
+		err = mg_wait_rstout(host->rstout, MG_TMAX_RSTOUT);
+		if (err)
+			goto probe_err_3b;
+		err = mg_disk_init(host);
+		if (err) {
+			printk(KERN_ERR "%s:%d fail (err code : %d)\n",
+					__func__, __LINE__, err);
+			err = -EIO;
+			goto probe_err_3b;
+		}
+	}
+
+	/* get irq resource */
+	if (!prv_data->use_polling) {
+		host->irq = platform_get_irq(plat_dev, 0);
+		if (host->irq == -ENXIO) {
+			err = host->irq;
+			goto probe_err_3b;
+		}
+		err = request_irq(host->irq, mg_irq,
+				IRQF_DISABLED | IRQF_TRIGGER_RISING,
+				MG_DEV_NAME, host);
+		if (err) {
+			printk(KERN_ERR "%s:%d fail (request_irq err=%d)\n",
+					__func__, __LINE__, err);
+			goto probe_err_3b;
+		}
+
+	}
+
+	/* get disk id */
+	err = mg_get_disk_id(host);
+	if (err) {
+		printk(KERN_ERR "%s:%d fail (err code : %d)\n",
+				__func__, __LINE__, err);
+		err = -EIO;
+		goto probe_err_4;
+	}
+
+	err = register_blkdev(host->major, MG_DISK_NAME);
+	if (err < 0) {
+		printk(KERN_ERR "%s:%d register_blkdev fail (err code : %d)\n",
+				__func__, __LINE__, err);
+		goto probe_err_4;
+	}
+	if (!host->major)
+		host->major = err;
+
+	spin_lock_init(&host->lock);
+
+	if (prv_data->use_polling)
+		host->breq = blk_init_queue(mg_request_poll, &host->lock);
+	else
+		host->breq = blk_init_queue(mg_request, &host->lock);
+
+	if (!host->breq) {
+		err = -ENOMEM;
+		printk(KERN_ERR "%s:%d (blk_init_queue) fail\n",
+				__func__, __LINE__);
+		goto probe_err_5;
+	}
+	host->breq->queuedata = host;
+
+	/* mflash is random device, thanx for the noop */
+	err = elevator_change(host->breq, "noop");
+	if (err) {
+		printk(KERN_ERR "%s:%d (elevator_init) fail\n",
+				__func__, __LINE__);
+		goto probe_err_6;
+	}
+	blk_queue_max_hw_sectors(host->breq, MG_MAX_SECTS);
+	blk_queue_logical_block_size(host->breq, MG_SECTOR_SIZE);
+
+	init_timer(&host->timer);
+	host->timer.function = mg_times_out;
+	host->timer.data = (unsigned long)host;
+
+	host->gd = alloc_disk(MG_DISK_MAX_PART);
+	if (!host->gd) {
+		printk(KERN_ERR "%s:%d (alloc_disk) fail\n",
+				__func__, __LINE__);
+		err = -ENOMEM;
+		goto probe_err_7;
+	}
+	host->gd->major = host->major;
+	host->gd->first_minor = 0;
+	host->gd->fops = &mg_disk_ops;
+	host->gd->queue = host->breq;
+	host->gd->private_data = host;
+	sprintf(host->gd->disk_name, MG_DISK_NAME"a");
+
+	set_capacity(host->gd, host->n_sectors);
+
+	add_disk(host->gd);
+
+	return err;
+
+probe_err_7:
+	del_timer_sync(&host->timer);
+probe_err_6:
+	blk_cleanup_queue(host->breq);
+probe_err_5:
+	unregister_blkdev(MG_DISK_MAJ, MG_DISK_NAME);
+probe_err_4:
+	if (!prv_data->use_polling)
+		free_irq(host->irq, host);
+probe_err_3b:
+	gpio_free(host->rstout);
+probe_err_3a:
+	gpio_free(host->rst);
+probe_err_3:
+	iounmap(host->dev_base);
+probe_err_2:
+	kfree(host);
+probe_err:
+	return err;
+}
+
+static int mg_remove(struct platform_device *plat_dev)
+{
+	struct mg_drv_data *prv_data = plat_dev->dev.platform_data;
+	struct mg_host *host = prv_data->host;
+	int err = 0;
+
+	/* delete timer */
+	del_timer_sync(&host->timer);
+
+	/* remove disk */
+	if (host->gd) {
+		del_gendisk(host->gd);
+		put_disk(host->gd);
+	}
+	/* remove queue */
+	if (host->breq)
+		blk_cleanup_queue(host->breq);
+
+	/* unregister blk device */
+	unregister_blkdev(host->major, MG_DISK_NAME);
+
+	/* free irq */
+	if (!prv_data->use_polling)
+		free_irq(host->irq, host);
+
+	/* free reset-out pin */
+	if (prv_data->dev_attr != MG_BOOT_DEV)
+		gpio_free(host->rstout);
+
+	/* free rst pin */
+	if (host->rst)
+		gpio_free(host->rst);
+
+	/* unmap io */
+	if (host->dev_base)
+		iounmap(host->dev_base);
+
+	/* free mg_host */
+	kfree(host);
+
+	return err;
+}
+
+static struct platform_driver mg_disk_driver = {
+	.probe = mg_probe,
+	.remove = mg_remove,
+	.suspend = mg_suspend,
+	.resume = mg_resume,
+	.driver = {
+		.name = MG_DEV_NAME,
+		.owner = THIS_MODULE,
+	}
+};
+
+/****************************************************************************
+ *
+ * Module stuff
+ *
+ ****************************************************************************/
+
+static int __init mg_init(void)
+{
+	printk(KERN_INFO "mGine mflash driver, (c) 2008 mGine Co.\n");
+	return platform_driver_register(&mg_disk_driver);
+}
+
+static void __exit mg_exit(void)
+{
+	printk(KERN_INFO "mflash driver : bye bye\n");
+	platform_driver_unregister(&mg_disk_driver);
+}
+
+module_init(mg_init);
+module_exit(mg_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("unsik Kim <donari75@gmail.com>");
+MODULE_DESCRIPTION("mGine m[g]flash device driver");
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
new file mode 100644
index 00000000..f533f337
--- /dev/null
+++ b/drivers/block/nbd.c
@@ -0,0 +1,868 @@
+/*
+ * Network block device - make block devices work over TCP
+ *
+ * Note that you can not swap over this thing, yet. Seems to work but
+ * deadlocks sometimes - you can not swap over TCP in general.
+ * 
+ * Copyright 1997-2000, 2008 Pavel Machek <pavel@ucw.cz>
+ * Parts copyright 2001 Steven Whitehouse <steve@chygwyn.com>
+ *
+ * This file is released under GPLv2 or later.
+ *
+ * (part of code stolen from loop.c)
+ */
+
+#include <linux/major.h>
+
+#include <linux/blkdev.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/sched.h>
+#include <linux/fs.h>
+#include <linux/bio.h>
+#include <linux/stat.h>
+#include <linux/errno.h>
+#include <linux/file.h>
+#include <linux/ioctl.h>
+#include <linux/mutex.h>
+#include <linux/compiler.h>
+#include <linux/err.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <net/sock.h>
+#include <linux/net.h>
+#include <linux/kthread.h>
+
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <asm/types.h>
+
+#include <linux/nbd.h>
+
+#define LO_MAGIC 0x68797548
+
+#ifdef NDEBUG
+#define dprintk(flags, fmt...)
+#else /* NDEBUG */
+#define dprintk(flags, fmt...) do { \
+	if (debugflags & (flags)) printk(KERN_DEBUG fmt); \
+} while (0)
+#define DBG_IOCTL       0x0004
+#define DBG_INIT        0x0010
+#define DBG_EXIT        0x0020
+#define DBG_BLKDEV      0x0100
+#define DBG_RX          0x0200
+#define DBG_TX          0x0400
+static unsigned int debugflags;
+#endif /* NDEBUG */
+
+static unsigned int nbds_max = 16;
+static struct nbd_device *nbd_dev;
+static int max_part;
+
+/*
+ * Use just one lock (or at most 1 per NIC). Two arguments for this:
+ * 1. Each NIC is essentially a synchronization point for all servers
+ *    accessed through that NIC so there's no need to have more locks
+ *    than NICs anyway.
+ * 2. More locks lead to more "Dirty cache line bouncing" which will slow
+ *    down each lock to the point where they're actually slower than just
+ *    a single lock.
+ * Thanks go to Jens Axboe and Al Viro for their LKML emails explaining this!
+ */
+static DEFINE_SPINLOCK(nbd_lock);
+
+#ifndef NDEBUG
+static const char *ioctl_cmd_to_ascii(int cmd)
+{
+	switch (cmd) {
+	case NBD_SET_SOCK: return "set-sock";
+	case NBD_SET_BLKSIZE: return "set-blksize";
+	case NBD_SET_SIZE: return "set-size";
+	case NBD_DO_IT: return "do-it";
+	case NBD_CLEAR_SOCK: return "clear-sock";
+	case NBD_CLEAR_QUE: return "clear-que";
+	case NBD_PRINT_DEBUG: return "print-debug";
+	case NBD_SET_SIZE_BLOCKS: return "set-size-blocks";
+	case NBD_DISCONNECT: return "disconnect";
+	case BLKROSET: return "set-read-only";
+	case BLKFLSBUF: return "flush-buffer-cache";
+	}
+	return "unknown";
+}
+
+static const char *nbdcmd_to_ascii(int cmd)
+{
+	switch (cmd) {
+	case  NBD_CMD_READ: return "read";
+	case NBD_CMD_WRITE: return "write";
+	case  NBD_CMD_DISC: return "disconnect";
+	}
+	return "invalid";
+}
+#endif /* NDEBUG */
+
+static void nbd_end_request(struct request *req)
+{
+	int error = req->errors ? -EIO : 0;
+	struct request_queue *q = req->q;
+	unsigned long flags;
+
+	dprintk(DBG_BLKDEV, "%s: request %p: %s\n", req->rq_disk->disk_name,
+			req, error ? "failed" : "done");
+
+	spin_lock_irqsave(q->queue_lock, flags);
+	__blk_end_request_all(req, error);
+	spin_unlock_irqrestore(q->queue_lock, flags);
+}
+
+static void sock_shutdown(struct nbd_device *lo, int lock)
+{
+	/* Forcibly shutdown the socket causing all listeners
+	 * to error
+	 *
+	 * FIXME: This code is duplicated from sys_shutdown, but
+	 * there should be a more generic interface rather than
+	 * calling socket ops directly here */
+	if (lock)
+		mutex_lock(&lo->tx_lock);
+	if (lo->sock) {
+		printk(KERN_WARNING "%s: shutting down socket\n",
+			lo->disk->disk_name);
+		kernel_sock_shutdown(lo->sock, SHUT_RDWR);
+		lo->sock = NULL;
+	}
+	if (lock)
+		mutex_unlock(&lo->tx_lock);
+}
+
+static void nbd_xmit_timeout(unsigned long arg)
+{
+	struct task_struct *task = (struct task_struct *)arg;
+
+	printk(KERN_WARNING "nbd: killing hung xmit (%s, pid: %d)\n",
+		task->comm, task->pid);
+	force_sig(SIGKILL, task);
+}
+
+/*
+ *  Send or receive packet.
+ */
+static int sock_xmit(struct nbd_device *lo, int send, void *buf, int size,
+		int msg_flags)
+{
+	struct socket *sock = lo->sock;
+	int result;
+	struct msghdr msg;
+	struct kvec iov;
+	sigset_t blocked, oldset;
+
+	if (unlikely(!sock)) {
+		printk(KERN_ERR "%s: Attempted %s on closed socket in sock_xmit\n",
+		       lo->disk->disk_name, (send ? "send" : "recv"));
+		return -EINVAL;
+	}
+
+	/* Allow interception of SIGKILL only
+	 * Don't allow other signals to interrupt the transmission */
+	siginitsetinv(&blocked, sigmask(SIGKILL));
+	sigprocmask(SIG_SETMASK, &blocked, &oldset);
+
+	do {
+		sock->sk->sk_allocation = GFP_NOIO;
+		iov.iov_base = buf;
+		iov.iov_len = size;
+		msg.msg_name = NULL;
+		msg.msg_namelen = 0;
+		msg.msg_control = NULL;
+		msg.msg_controllen = 0;
+		msg.msg_flags = msg_flags | MSG_NOSIGNAL;
+
+		if (send) {
+			struct timer_list ti;
+
+			if (lo->xmit_timeout) {
+				init_timer(&ti);
+				ti.function = nbd_xmit_timeout;
+				ti.data = (unsigned long)current;
+				ti.expires = jiffies + lo->xmit_timeout;
+				add_timer(&ti);
+			}
+			result = kernel_sendmsg(sock, &msg, &iov, 1, size);
+			if (lo->xmit_timeout)
+				del_timer_sync(&ti);
+		} else
+			result = kernel_recvmsg(sock, &msg, &iov, 1, size,
+						msg.msg_flags);
+
+		if (signal_pending(current)) {
+			siginfo_t info;
+			printk(KERN_WARNING "nbd (pid %d: %s) got signal %d\n",
+				task_pid_nr(current), current->comm,
+				dequeue_signal_lock(current, &current->blocked, &info));
+			result = -EINTR;
+			sock_shutdown(lo, !send);
+			break;
+		}
+
+		if (result <= 0) {
+			if (result == 0)
+				result = -EPIPE; /* short read */
+			break;
+		}
+		size -= result;
+		buf += result;
+	} while (size > 0);
+
+	sigprocmask(SIG_SETMASK, &oldset, NULL);
+
+	return result;
+}
+
+static inline int sock_send_bvec(struct nbd_device *lo, struct bio_vec *bvec,
+		int flags)
+{
+	int result;
+	void *kaddr = kmap(bvec->bv_page);
+	result = sock_xmit(lo, 1, kaddr + bvec->bv_offset, bvec->bv_len, flags);
+	kunmap(bvec->bv_page);
+	return result;
+}
+
+/* always call with the tx_lock held */
+static int nbd_send_req(struct nbd_device *lo, struct request *req)
+{
+	int result, flags;
+	struct nbd_request request;
+	unsigned long size = blk_rq_bytes(req);
+
+	request.magic = htonl(NBD_REQUEST_MAGIC);
+	request.type = htonl(nbd_cmd(req));
+	request.from = cpu_to_be64((u64)blk_rq_pos(req) << 9);
+	request.len = htonl(size);
+	memcpy(request.handle, &req, sizeof(req));
+
+	dprintk(DBG_TX, "%s: request %p: sending control (%s@%llu,%uB)\n",
+			lo->disk->disk_name, req,
+			nbdcmd_to_ascii(nbd_cmd(req)),
+			(unsigned long long)blk_rq_pos(req) << 9,
+			blk_rq_bytes(req));
+	result = sock_xmit(lo, 1, &request, sizeof(request),
+			(nbd_cmd(req) == NBD_CMD_WRITE) ? MSG_MORE : 0);
+	if (result <= 0) {
+		printk(KERN_ERR "%s: Send control failed (result %d)\n",
+				lo->disk->disk_name, result);
+		goto error_out;
+	}
+
+	if (nbd_cmd(req) == NBD_CMD_WRITE) {
+		struct req_iterator iter;
+		struct bio_vec *bvec;
+		/*
+		 * we are really probing at internals to determine
+		 * whether to set MSG_MORE or not...
+		 */
+		rq_for_each_segment(bvec, req, iter) {
+			flags = 0;
+			if (!rq_iter_last(req, iter))
+				flags = MSG_MORE;
+			dprintk(DBG_TX, "%s: request %p: sending %d bytes data\n",
+					lo->disk->disk_name, req, bvec->bv_len);
+			result = sock_send_bvec(lo, bvec, flags);
+			if (result <= 0) {
+				printk(KERN_ERR "%s: Send data failed (result %d)\n",
+						lo->disk->disk_name, result);
+				goto error_out;
+			}
+		}
+	}
+	return 0;
+
+error_out:
+	return -EIO;
+}
+
+static struct request *nbd_find_request(struct nbd_device *lo,
+					struct request *xreq)
+{
+	struct request *req, *tmp;
+	int err;
+
+	err = wait_event_interruptible(lo->active_wq, lo->active_req != xreq);
+	if (unlikely(err))
+		goto out;
+
+	spin_lock(&lo->queue_lock);
+	list_for_each_entry_safe(req, tmp, &lo->queue_head, queuelist) {
+		if (req != xreq)
+			continue;
+		list_del_init(&req->queuelist);
+		spin_unlock(&lo->queue_lock);
+		return req;
+	}
+	spin_unlock(&lo->queue_lock);
+
+	err = -ENOENT;
+
+out:
+	return ERR_PTR(err);
+}
+
+static inline int sock_recv_bvec(struct nbd_device *lo, struct bio_vec *bvec)
+{
+	int result;
+	void *kaddr = kmap(bvec->bv_page);
+	result = sock_xmit(lo, 0, kaddr + bvec->bv_offset, bvec->bv_len,
+			MSG_WAITALL);
+	kunmap(bvec->bv_page);
+	return result;
+}
+
+/* NULL returned = something went wrong, inform userspace */
+static struct request *nbd_read_stat(struct nbd_device *lo)
+{
+	int result;
+	struct nbd_reply reply;
+	struct request *req;
+
+	reply.magic = 0;
+	result = sock_xmit(lo, 0, &reply, sizeof(reply), MSG_WAITALL);
+	if (result <= 0) {
+		printk(KERN_ERR "%s: Receive control failed (result %d)\n",
+				lo->disk->disk_name, result);
+		goto harderror;
+	}
+
+	if (ntohl(reply.magic) != NBD_REPLY_MAGIC) {
+		printk(KERN_ERR "%s: Wrong magic (0x%lx)\n",
+				lo->disk->disk_name,
+				(unsigned long)ntohl(reply.magic));
+		result = -EPROTO;
+		goto harderror;
+	}
+
+	req = nbd_find_request(lo, *(struct request **)reply.handle);
+	if (IS_ERR(req)) {
+		result = PTR_ERR(req);
+		if (result != -ENOENT)
+			goto harderror;
+
+		printk(KERN_ERR "%s: Unexpected reply (%p)\n",
+				lo->disk->disk_name, reply.handle);
+		result = -EBADR;
+		goto harderror;
+	}
+
+	if (ntohl(reply.error)) {
+		printk(KERN_ERR "%s: Other side returned error (%d)\n",
+				lo->disk->disk_name, ntohl(reply.error));
+		req->errors++;
+		return req;
+	}
+
+	dprintk(DBG_RX, "%s: request %p: got reply\n",
+			lo->disk->disk_name, req);
+	if (nbd_cmd(req) == NBD_CMD_READ) {
+		struct req_iterator iter;
+		struct bio_vec *bvec;
+
+		rq_for_each_segment(bvec, req, iter) {
+			result = sock_recv_bvec(lo, bvec);
+			if (result <= 0) {
+				printk(KERN_ERR "%s: Receive data failed (result %d)\n",
+						lo->disk->disk_name, result);
+				req->errors++;
+				return req;
+			}
+			dprintk(DBG_RX, "%s: request %p: got %d bytes data\n",
+				lo->disk->disk_name, req, bvec->bv_len);
+		}
+	}
+	return req;
+harderror:
+	lo->harderror = result;
+	return NULL;
+}
+
+static ssize_t pid_show(struct device *dev,
+			struct device_attribute *attr, char *buf)
+{
+	struct gendisk *disk = dev_to_disk(dev);
+
+	return sprintf(buf, "%ld\n",
+		(long) ((struct nbd_device *)disk->private_data)->pid);
+}
+
+static struct device_attribute pid_attr = {
+	.attr = { .name = "pid", .mode = S_IRUGO},
+	.show = pid_show,
+};
+
+static int nbd_do_it(struct nbd_device *lo)
+{
+	struct request *req;
+	int ret;
+
+	BUG_ON(lo->magic != LO_MAGIC);
+
+	lo->pid = current->pid;
+	ret = sysfs_create_file(&disk_to_dev(lo->disk)->kobj, &pid_attr.attr);
+	if (ret) {
+		printk(KERN_ERR "nbd: sysfs_create_file failed!");
+		lo->pid = 0;
+		return ret;
+	}
+
+	while ((req = nbd_read_stat(lo)) != NULL)
+		nbd_end_request(req);
+
+	sysfs_remove_file(&disk_to_dev(lo->disk)->kobj, &pid_attr.attr);
+	lo->pid = 0;
+	return 0;
+}
+
+static void nbd_clear_que(struct nbd_device *lo)
+{
+	struct request *req;
+
+	BUG_ON(lo->magic != LO_MAGIC);
+
+	/*
+	 * Because we have set lo->sock to NULL under the tx_lock, all
+	 * modifications to the list must have completed by now.  For
+	 * the same reason, the active_req must be NULL.
+	 *
+	 * As a consequence, we don't need to take the spin lock while
+	 * purging the list here.
+	 */
+	BUG_ON(lo->sock);
+	BUG_ON(lo->active_req);
+
+	while (!list_empty(&lo->queue_head)) {
+		req = list_entry(lo->queue_head.next, struct request,
+				 queuelist);
+		list_del_init(&req->queuelist);
+		req->errors++;
+		nbd_end_request(req);
+	}
+}
+
+
+static void nbd_handle_req(struct nbd_device *lo, struct request *req)
+{
+	if (req->cmd_type != REQ_TYPE_FS)
+		goto error_out;
+
+	nbd_cmd(req) = NBD_CMD_READ;
+	if (rq_data_dir(req) == WRITE) {
+		nbd_cmd(req) = NBD_CMD_WRITE;
+		if (lo->flags & NBD_READ_ONLY) {
+			printk(KERN_ERR "%s: Write on read-only\n",
+					lo->disk->disk_name);
+			goto error_out;
+		}
+	}
+
+	req->errors = 0;
+
+	mutex_lock(&lo->tx_lock);
+	if (unlikely(!lo->sock)) {
+		mutex_unlock(&lo->tx_lock);
+		printk(KERN_ERR "%s: Attempted send on closed socket\n",
+		       lo->disk->disk_name);
+		goto error_out;
+	}
+
+	lo->active_req = req;
+
+	if (nbd_send_req(lo, req) != 0) {
+		printk(KERN_ERR "%s: Request send failed\n",
+				lo->disk->disk_name);
+		req->errors++;
+		nbd_end_request(req);
+	} else {
+		spin_lock(&lo->queue_lock);
+		list_add(&req->queuelist, &lo->queue_head);
+		spin_unlock(&lo->queue_lock);
+	}
+
+	lo->active_req = NULL;
+	mutex_unlock(&lo->tx_lock);
+	wake_up_all(&lo->active_wq);
+
+	return;
+
+error_out:
+	req->errors++;
+	nbd_end_request(req);
+}
+
+static int nbd_thread(void *data)
+{
+	struct nbd_device *lo = data;
+	struct request *req;
+
+	set_user_nice(current, -20);
+	while (!kthread_should_stop() || !list_empty(&lo->waiting_queue)) {
+		/* wait for something to do */
+		wait_event_interruptible(lo->waiting_wq,
+					 kthread_should_stop() ||
+					 !list_empty(&lo->waiting_queue));
+
+		/* extract request */
+		if (list_empty(&lo->waiting_queue))
+			continue;
+
+		spin_lock_irq(&lo->queue_lock);
+		req = list_entry(lo->waiting_queue.next, struct request,
+				 queuelist);
+		list_del_init(&req->queuelist);
+		spin_unlock_irq(&lo->queue_lock);
+
+		/* handle request */
+		nbd_handle_req(lo, req);
+	}
+	return 0;
+}
+
+/*
+ * We always wait for result of write, for now. It would be nice to make it optional
+ * in future
+ * if ((rq_data_dir(req) == WRITE) && (lo->flags & NBD_WRITE_NOCHK))
+ *   { printk( "Warning: Ignoring result!\n"); nbd_end_request( req ); }
+ */
+
+static void do_nbd_request(struct request_queue *q)
+{
+	struct request *req;
+	
+	while ((req = blk_fetch_request(q)) != NULL) {
+		struct nbd_device *lo;
+
+		spin_unlock_irq(q->queue_lock);
+
+		dprintk(DBG_BLKDEV, "%s: request %p: dequeued (flags=%x)\n",
+				req->rq_disk->disk_name, req, req->cmd_type);
+
+		lo = req->rq_disk->private_data;
+
+		BUG_ON(lo->magic != LO_MAGIC);
+
+		if (unlikely(!lo->sock)) {
+			printk(KERN_ERR "%s: Attempted send on closed socket\n",
+				lo->disk->disk_name);
+			req->errors++;
+			nbd_end_request(req);
+			spin_lock_irq(q->queue_lock);
+			continue;
+		}
+
+		spin_lock_irq(&lo->queue_lock);
+		list_add_tail(&req->queuelist, &lo->waiting_queue);
+		spin_unlock_irq(&lo->queue_lock);
+
+		wake_up(&lo->waiting_wq);
+
+		spin_lock_irq(q->queue_lock);
+	}
+}
+
+/* Must be called with tx_lock held */
+
+static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *lo,
+		       unsigned int cmd, unsigned long arg)
+{
+	switch (cmd) {
+	case NBD_DISCONNECT: {
+		struct request sreq;
+
+	        printk(KERN_INFO "%s: NBD_DISCONNECT\n", lo->disk->disk_name);
+
+		blk_rq_init(NULL, &sreq);
+		sreq.cmd_type = REQ_TYPE_SPECIAL;
+		nbd_cmd(&sreq) = NBD_CMD_DISC;
+		if (!lo->sock)
+			return -EINVAL;
+		nbd_send_req(lo, &sreq);
+                return 0;
+	}
+ 
+	case NBD_CLEAR_SOCK: {
+		struct file *file;
+
+		lo->sock = NULL;
+		file = lo->file;
+		lo->file = NULL;
+		nbd_clear_que(lo);
+		BUG_ON(!list_empty(&lo->queue_head));
+		if (file)
+			fput(file);
+		return 0;
+	}
+
+	case NBD_SET_SOCK: {
+		struct file *file;
+		if (lo->file)
+			return -EBUSY;
+		file = fget(arg);
+		if (file) {
+			struct inode *inode = file->f_path.dentry->d_inode;
+			if (S_ISSOCK(inode->i_mode)) {
+				lo->file = file;
+				lo->sock = SOCKET_I(inode);
+				if (max_part > 0)
+					bdev->bd_invalidated = 1;
+				return 0;
+			} else {
+				fput(file);
+			}
+		}
+		return -EINVAL;
+	}
+
+	case NBD_SET_BLKSIZE:
+		lo->blksize = arg;
+		lo->bytesize &= ~(lo->blksize-1);
+		bdev->bd_inode->i_size = lo->bytesize;
+		set_blocksize(bdev, lo->blksize);
+		set_capacity(lo->disk, lo->bytesize >> 9);
+		return 0;
+
+	case NBD_SET_SIZE:
+		lo->bytesize = arg & ~(lo->blksize-1);
+		bdev->bd_inode->i_size = lo->bytesize;
+		set_blocksize(bdev, lo->blksize);
+		set_capacity(lo->disk, lo->bytesize >> 9);
+		return 0;
+
+	case NBD_SET_TIMEOUT:
+		lo->xmit_timeout = arg * HZ;
+		return 0;
+
+	case NBD_SET_SIZE_BLOCKS:
+		lo->bytesize = ((u64) arg) * lo->blksize;
+		bdev->bd_inode->i_size = lo->bytesize;
+		set_blocksize(bdev, lo->blksize);
+		set_capacity(lo->disk, lo->bytesize >> 9);
+		return 0;
+
+	case NBD_DO_IT: {
+		struct task_struct *thread;
+		struct file *file;
+		int error;
+
+		if (lo->pid)
+			return -EBUSY;
+		if (!lo->file)
+			return -EINVAL;
+
+		mutex_unlock(&lo->tx_lock);
+
+		thread = kthread_create(nbd_thread, lo, lo->disk->disk_name);
+		if (IS_ERR(thread)) {
+			mutex_lock(&lo->tx_lock);
+			return PTR_ERR(thread);
+		}
+		wake_up_process(thread);
+		error = nbd_do_it(lo);
+		kthread_stop(thread);
+
+		mutex_lock(&lo->tx_lock);
+		if (error)
+			return error;
+		sock_shutdown(lo, 0);
+		file = lo->file;
+		lo->file = NULL;
+		nbd_clear_que(lo);
+		printk(KERN_WARNING "%s: queue cleared\n", lo->disk->disk_name);
+		if (file)
+			fput(file);
+		lo->bytesize = 0;
+		bdev->bd_inode->i_size = 0;
+		set_capacity(lo->disk, 0);
+		if (max_part > 0)
+			ioctl_by_bdev(bdev, BLKRRPART, 0);
+		return lo->harderror;
+	}
+
+	case NBD_CLEAR_QUE:
+		/*
+		 * This is for compatibility only.  The queue is always cleared
+		 * by NBD_DO_IT or NBD_CLEAR_SOCK.
+		 */
+		BUG_ON(!lo->sock && !list_empty(&lo->queue_head));
+		return 0;
+
+	case NBD_PRINT_DEBUG:
+		printk(KERN_INFO "%s: next = %p, prev = %p, head = %p\n",
+			bdev->bd_disk->disk_name,
+			lo->queue_head.next, lo->queue_head.prev,
+			&lo->queue_head);
+		return 0;
+	}
+	return -ENOTTY;
+}
+
+static int nbd_ioctl(struct block_device *bdev, fmode_t mode,
+		     unsigned int cmd, unsigned long arg)
+{
+	struct nbd_device *lo = bdev->bd_disk->private_data;
+	int error;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	BUG_ON(lo->magic != LO_MAGIC);
+
+	/* Anyone capable of this syscall can do *real bad* things */
+	dprintk(DBG_IOCTL, "%s: nbd_ioctl cmd=%s(0x%x) arg=%lu\n",
+			lo->disk->disk_name, ioctl_cmd_to_ascii(cmd), cmd, arg);
+
+	mutex_lock(&lo->tx_lock);
+	error = __nbd_ioctl(bdev, lo, cmd, arg);
+	mutex_unlock(&lo->tx_lock);
+
+	return error;
+}
+
+static const struct block_device_operations nbd_fops =
+{
+	.owner =	THIS_MODULE,
+	.ioctl =	nbd_ioctl,
+};
+
+/*
+ * And here should be modules and kernel interface 
+ *  (Just smiley confuses emacs :-)
+ */
+
+static int __init nbd_init(void)
+{
+	int err = -ENOMEM;
+	int i;
+	int part_shift;
+
+	BUILD_BUG_ON(sizeof(struct nbd_request) != 28);
+
+	if (max_part < 0) {
+		printk(KERN_CRIT "nbd: max_part must be >= 0\n");
+		return -EINVAL;
+	}
+
+	nbd_dev = kcalloc(nbds_max, sizeof(*nbd_dev), GFP_KERNEL);
+	if (!nbd_dev)
+		return -ENOMEM;
+
+	part_shift = 0;
+	if (max_part > 0) {
+		part_shift = fls(max_part);
+
+		/*
+		 * Adjust max_part according to part_shift as it is exported
+		 * to user space so that user can know the max number of
+		 * partition kernel should be able to manage.
+		 *
+		 * Note that -1 is required because partition 0 is reserved
+		 * for the whole disk.
+		 */
+		max_part = (1UL << part_shift) - 1;
+	}
+
+	if ((1UL << part_shift) > DISK_MAX_PARTS)
+		return -EINVAL;
+
+	if (nbds_max > 1UL << (MINORBITS - part_shift))
+		return -EINVAL;
+
+	for (i = 0; i < nbds_max; i++) {
+		struct gendisk *disk = alloc_disk(1 << part_shift);
+		if (!disk)
+			goto out;
+		nbd_dev[i].disk = disk;
+		/*
+		 * The new linux 2.5 block layer implementation requires
+		 * every gendisk to have its very own request_queue struct.
+		 * These structs are big so we dynamically allocate them.
+		 */
+		disk->queue = blk_init_queue(do_nbd_request, &nbd_lock);
+		if (!disk->queue) {
+			put_disk(disk);
+			goto out;
+		}
+		/*
+		 * Tell the block layer that we are not a rotational device
+		 */
+		queue_flag_set_unlocked(QUEUE_FLAG_NONROT, disk->queue);
+	}
+
+	if (register_blkdev(NBD_MAJOR, "nbd")) {
+		err = -EIO;
+		goto out;
+	}
+
+	printk(KERN_INFO "nbd: registered device at major %d\n", NBD_MAJOR);
+	dprintk(DBG_INIT, "nbd: debugflags=0x%x\n", debugflags);
+
+	for (i = 0; i < nbds_max; i++) {
+		struct gendisk *disk = nbd_dev[i].disk;
+		nbd_dev[i].file = NULL;
+		nbd_dev[i].magic = LO_MAGIC;
+		nbd_dev[i].flags = 0;
+		INIT_LIST_HEAD(&nbd_dev[i].waiting_queue);
+		spin_lock_init(&nbd_dev[i].queue_lock);
+		INIT_LIST_HEAD(&nbd_dev[i].queue_head);
+		mutex_init(&nbd_dev[i].tx_lock);
+		init_waitqueue_head(&nbd_dev[i].active_wq);
+		init_waitqueue_head(&nbd_dev[i].waiting_wq);
+		nbd_dev[i].blksize = 1024;
+		nbd_dev[i].bytesize = 0;
+		disk->major = NBD_MAJOR;
+		disk->first_minor = i << part_shift;
+		disk->fops = &nbd_fops;
+		disk->private_data = &nbd_dev[i];
+		sprintf(disk->disk_name, "nbd%d", i);
+		set_capacity(disk, 0);
+		add_disk(disk);
+	}
+
+	return 0;
+out:
+	while (i--) {
+		blk_cleanup_queue(nbd_dev[i].disk->queue);
+		put_disk(nbd_dev[i].disk);
+	}
+	kfree(nbd_dev);
+	return err;
+}
+
+static void __exit nbd_cleanup(void)
+{
+	int i;
+	for (i = 0; i < nbds_max; i++) {
+		struct gendisk *disk = nbd_dev[i].disk;
+		nbd_dev[i].magic = 0;
+		if (disk) {
+			del_gendisk(disk);
+			blk_cleanup_queue(disk->queue);
+			put_disk(disk);
+		}
+	}
+	unregister_blkdev(NBD_MAJOR, "nbd");
+	kfree(nbd_dev);
+	printk(KERN_INFO "nbd: unregistered device at major %d\n", NBD_MAJOR);
+}
+
+module_init(nbd_init);
+module_exit(nbd_cleanup);
+
+MODULE_DESCRIPTION("Network Block Device");
+MODULE_LICENSE("GPL");
+
+module_param(nbds_max, int, 0444);
+MODULE_PARM_DESC(nbds_max, "number of network block devices to initialize (default: 16)");
+module_param(max_part, int, 0444);
+MODULE_PARM_DESC(max_part, "number of partitions per device (default: 0)");
+#ifndef NDEBUG
+module_param(debugflags, int, 0644);
+MODULE_PARM_DESC(debugflags, "flags for controlling debug output");
+#endif
diff --git a/drivers/block/osdblk.c b/drivers/block/osdblk.c
new file mode 100644
index 00000000..87311eba
--- /dev/null
+++ b/drivers/block/osdblk.c
@@ -0,0 +1,700 @@
+
+/*
+   osdblk.c -- Export a single SCSI OSD object as a Linux block device
+
+
+   Copyright 2009 Red Hat, Inc.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; see the file COPYING.  If not, write to
+   the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+
+
+   Instructions for use
+   --------------------
+
+   1) Map a Linux block device to an existing OSD object.
+
+      In this example, we will use partition id 1234, object id 5678,
+      OSD device /dev/osd1.
+
+      $ echo "1234 5678 /dev/osd1" > /sys/class/osdblk/add
+
+
+   2) List all active blkdev<->object mappings.
+
+      In this example, we have performed step #1 twice, creating two blkdevs,
+      mapped to two separate OSD objects.
+
+      $ cat /sys/class/osdblk/list
+      0 174 1234 5678 /dev/osd1
+      1 179 1994 897123 /dev/osd0
+
+      The columns, in order, are:
+      - blkdev unique id
+      - blkdev assigned major
+      - OSD object partition id
+      - OSD object id
+      - OSD device
+
+
+   3) Remove an active blkdev<->object mapping.
+
+      In this example, we remove the mapping with blkdev unique id 1.
+
+      $ echo 1 > /sys/class/osdblk/remove
+
+
+   NOTE:  The actual creation and deletion of OSD objects is outside the scope
+   of this driver.
+
+ */
+
+#include <linux/kernel.h>
+#include <linux/device.h>
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/slab.h>
+#include <scsi/osd_initiator.h>
+#include <scsi/osd_attributes.h>
+#include <scsi/osd_sec.h>
+#include <scsi/scsi_device.h>
+
+#define DRV_NAME "osdblk"
+#define PFX DRV_NAME ": "
+
+/* #define _OSDBLK_DEBUG */
+#ifdef _OSDBLK_DEBUG
+#define OSDBLK_DEBUG(fmt, a...) \
+	printk(KERN_NOTICE "osdblk @%s:%d: " fmt, __func__, __LINE__, ##a)
+#else
+#define OSDBLK_DEBUG(fmt, a...) \
+	do { if (0) printk(fmt, ##a); } while (0)
+#endif
+
+MODULE_AUTHOR("Jeff Garzik <jeff@garzik.org>");
+MODULE_DESCRIPTION("block device inside an OSD object osdblk.ko");
+MODULE_LICENSE("GPL");
+
+struct osdblk_device;
+
+enum {
+	OSDBLK_MINORS_PER_MAJOR	= 256,		/* max minors per blkdev */
+	OSDBLK_MAX_REQ		= 32,		/* max parallel requests */
+	OSDBLK_OP_TIMEOUT	= 4 * 60,	/* sync OSD req timeout */
+};
+
+struct osdblk_request {
+	struct request		*rq;		/* blk layer request */
+	struct bio		*bio;		/* cloned bio */
+	struct osdblk_device	*osdev;		/* associated blkdev */
+};
+
+struct osdblk_device {
+	int			id;		/* blkdev unique id */
+
+	int			major;		/* blkdev assigned major */
+	struct gendisk		*disk;		/* blkdev's gendisk and rq */
+	struct request_queue	*q;
+
+	struct osd_dev		*osd;		/* associated OSD */
+
+	char			name[32];	/* blkdev name, e.g. osdblk34 */
+
+	spinlock_t		lock;		/* queue lock */
+
+	struct osd_obj_id	obj;		/* OSD partition, obj id */
+	uint8_t			obj_cred[OSD_CAP_LEN]; /* OSD cred */
+
+	struct osdblk_request	req[OSDBLK_MAX_REQ]; /* request table */
+
+	struct list_head	node;
+
+	char			osd_path[0];	/* OSD device path */
+};
+
+static struct class *class_osdblk;		/* /sys/class/osdblk */
+static DEFINE_MUTEX(ctl_mutex);	/* Serialize open/close/setup/teardown */
+static LIST_HEAD(osdblkdev_list);
+
+static const struct block_device_operations osdblk_bd_ops = {
+	.owner		= THIS_MODULE,
+};
+
+static const struct osd_attr g_attr_logical_length = ATTR_DEF(
+	OSD_APAGE_OBJECT_INFORMATION, OSD_ATTR_OI_LOGICAL_LENGTH, 8);
+
+static void osdblk_make_credential(u8 cred_a[OSD_CAP_LEN],
+				   const struct osd_obj_id *obj)
+{
+	osd_sec_init_nosec_doall_caps(cred_a, obj, false, true);
+}
+
+/* copied from exofs; move to libosd? */
+/*
+ * Perform a synchronous OSD operation.  copied from exofs; move to libosd?
+ */
+static int osd_sync_op(struct osd_request *or, int timeout, uint8_t *credential)
+{
+	int ret;
+
+	or->timeout = timeout;
+	ret = osd_finalize_request(or, 0, credential, NULL);
+	if (ret)
+		return ret;
+
+	ret = osd_execute_request(or);
+
+	/* osd_req_decode_sense(or, ret); */
+	return ret;
+}
+
+/*
+ * Perform an asynchronous OSD operation.  copied from exofs; move to libosd?
+ */
+static int osd_async_op(struct osd_request *or, osd_req_done_fn *async_done,
+		   void *caller_context, u8 *cred)
+{
+	int ret;
+
+	ret = osd_finalize_request(or, 0, cred, NULL);
+	if (ret)
+		return ret;
+
+	ret = osd_execute_request_async(or, async_done, caller_context);
+
+	return ret;
+}
+
+/* copied from exofs; move to libosd? */
+static int extract_attr_from_req(struct osd_request *or, struct osd_attr *attr)
+{
+	struct osd_attr cur_attr = {.attr_page = 0}; /* start with zeros */
+	void *iter = NULL;
+	int nelem;
+
+	do {
+		nelem = 1;
+		osd_req_decode_get_attr_list(or, &cur_attr, &nelem, &iter);
+		if ((cur_attr.attr_page == attr->attr_page) &&
+		    (cur_attr.attr_id == attr->attr_id)) {
+			attr->len = cur_attr.len;
+			attr->val_ptr = cur_attr.val_ptr;
+			return 0;
+		}
+	} while (iter);
+
+	return -EIO;
+}
+
+static int osdblk_get_obj_size(struct osdblk_device *osdev, u64 *size_out)
+{
+	struct osd_request *or;
+	struct osd_attr attr;
+	int ret;
+
+	/* start request */
+	or = osd_start_request(osdev->osd, GFP_KERNEL);
+	if (!or)
+		return -ENOMEM;
+
+	/* create a get-attributes(length) request */
+	osd_req_get_attributes(or, &osdev->obj);
+
+	osd_req_add_get_attr_list(or, &g_attr_logical_length, 1);
+
+	/* execute op synchronously */
+	ret = osd_sync_op(or, OSDBLK_OP_TIMEOUT, osdev->obj_cred);
+	if (ret)
+		goto out;
+
+	/* extract length from returned attribute info */
+	attr = g_attr_logical_length;
+	ret = extract_attr_from_req(or, &attr);
+	if (ret)
+		goto out;
+
+	*size_out = get_unaligned_be64(attr.val_ptr);
+
+out:
+	osd_end_request(or);
+	return ret;
+
+}
+
+static void osdblk_osd_complete(struct osd_request *or, void *private)
+{
+	struct osdblk_request *orq = private;
+	struct osd_sense_info osi;
+	int ret = osd_req_decode_sense(or, &osi);
+
+	if (ret) {
+		ret = -EIO;
+		OSDBLK_DEBUG("osdblk_osd_complete with err=%d\n", ret);
+	}
+
+	/* complete OSD request */
+	osd_end_request(or);
+
+	/* complete request passed to osdblk by block layer */
+	__blk_end_request_all(orq->rq, ret);
+}
+
+static void bio_chain_put(struct bio *chain)
+{
+	struct bio *tmp;
+
+	while (chain) {
+		tmp = chain;
+		chain = chain->bi_next;
+
+		bio_put(tmp);
+	}
+}
+
+static struct bio *bio_chain_clone(struct bio *old_chain, gfp_t gfpmask)
+{
+	struct bio *tmp, *new_chain = NULL, *tail = NULL;
+
+	while (old_chain) {
+		tmp = bio_kmalloc(gfpmask, old_chain->bi_max_vecs);
+		if (!tmp)
+			goto err_out;
+
+		__bio_clone(tmp, old_chain);
+		tmp->bi_bdev = NULL;
+		gfpmask &= ~__GFP_WAIT;
+		tmp->bi_next = NULL;
+
+		if (!new_chain)
+			new_chain = tail = tmp;
+		else {
+			tail->bi_next = tmp;
+			tail = tmp;
+		}
+
+		old_chain = old_chain->bi_next;
+	}
+
+	return new_chain;
+
+err_out:
+	OSDBLK_DEBUG("bio_chain_clone with err\n");
+	bio_chain_put(new_chain);
+	return NULL;
+}
+
+static void osdblk_rq_fn(struct request_queue *q)
+{
+	struct osdblk_device *osdev = q->queuedata;
+
+	while (1) {
+		struct request *rq;
+		struct osdblk_request *orq;
+		struct osd_request *or;
+		struct bio *bio;
+		bool do_write, do_flush;
+
+		/* peek at request from block layer */
+		rq = blk_fetch_request(q);
+		if (!rq)
+			break;
+
+		/* filter out block requests we don't understand */
+		if (rq->cmd_type != REQ_TYPE_FS) {
+			blk_end_request_all(rq, 0);
+			continue;
+		}
+
+		/* deduce our operation (read, write, flush) */
+		/* I wish the block layer simplified cmd_type/cmd_flags/cmd[]
+		 * into a clearly defined set of RPC commands:
+		 * read, write, flush, scsi command, power mgmt req,
+		 * driver-specific, etc.
+		 */
+
+		do_flush = rq->cmd_flags & REQ_FLUSH;
+		do_write = (rq_data_dir(rq) == WRITE);
+
+		if (!do_flush) { /* osd_flush does not use a bio */
+			/* a bio clone to be passed down to OSD request */
+			bio = bio_chain_clone(rq->bio, GFP_ATOMIC);
+			if (!bio)
+				break;
+		} else
+			bio = NULL;
+
+		/* alloc internal OSD request, for OSD command execution */
+		or = osd_start_request(osdev->osd, GFP_ATOMIC);
+		if (!or) {
+			bio_chain_put(bio);
+			OSDBLK_DEBUG("osd_start_request with err\n");
+			break;
+		}
+
+		orq = &osdev->req[rq->tag];
+		orq->rq = rq;
+		orq->bio = bio;
+		orq->osdev = osdev;
+
+		/* init OSD command: flush, write or read */
+		if (do_flush)
+			osd_req_flush_object(or, &osdev->obj,
+					     OSD_CDB_FLUSH_ALL, 0, 0);
+		else if (do_write)
+			osd_req_write(or, &osdev->obj, blk_rq_pos(rq) * 512ULL,
+				      bio, blk_rq_bytes(rq));
+		else
+			osd_req_read(or, &osdev->obj, blk_rq_pos(rq) * 512ULL,
+				     bio, blk_rq_bytes(rq));
+
+		OSDBLK_DEBUG("%s 0x%x bytes at 0x%llx\n",
+			do_flush ? "flush" : do_write ?
+				"write" : "read", blk_rq_bytes(rq),
+			blk_rq_pos(rq) * 512ULL);
+
+		/* begin OSD command execution */
+		if (osd_async_op(or, osdblk_osd_complete, orq,
+				 osdev->obj_cred)) {
+			osd_end_request(or);
+			blk_requeue_request(q, rq);
+			bio_chain_put(bio);
+			OSDBLK_DEBUG("osd_execute_request_async with err\n");
+			break;
+		}
+
+		/* remove the special 'flush' marker, now that the command
+		 * is executing
+		 */
+		rq->special = NULL;
+	}
+}
+
+static void osdblk_free_disk(struct osdblk_device *osdev)
+{
+	struct gendisk *disk = osdev->disk;
+
+	if (!disk)
+		return;
+
+	if (disk->flags & GENHD_FL_UP)
+		del_gendisk(disk);
+	if (disk->queue)
+		blk_cleanup_queue(disk->queue);
+	put_disk(disk);
+}
+
+static int osdblk_init_disk(struct osdblk_device *osdev)
+{
+	struct gendisk *disk;
+	struct request_queue *q;
+	int rc;
+	u64 obj_size = 0;
+
+	/* contact OSD, request size info about the object being mapped */
+	rc = osdblk_get_obj_size(osdev, &obj_size);
+	if (rc)
+		return rc;
+
+	/* create gendisk info */
+	disk = alloc_disk(OSDBLK_MINORS_PER_MAJOR);
+	if (!disk)
+		return -ENOMEM;
+
+	sprintf(disk->disk_name, DRV_NAME "%d", osdev->id);
+	disk->major = osdev->major;
+	disk->first_minor = 0;
+	disk->fops = &osdblk_bd_ops;
+	disk->private_data = osdev;
+
+	/* init rq */
+	q = blk_init_queue(osdblk_rq_fn, &osdev->lock);
+	if (!q) {
+		put_disk(disk);
+		return -ENOMEM;
+	}
+
+	/* switch queue to TCQ mode; allocate tag map */
+	rc = blk_queue_init_tags(q, OSDBLK_MAX_REQ, NULL);
+	if (rc) {
+		blk_cleanup_queue(q);
+		put_disk(disk);
+		return rc;
+	}
+
+	/* Set our limits to the lower device limits, because osdblk cannot
+	 * sleep when allocating a lower-request and therefore cannot be
+	 * bouncing.
+	 */
+	blk_queue_stack_limits(q, osd_request_queue(osdev->osd));
+
+	blk_queue_prep_rq(q, blk_queue_start_tag);
+	blk_queue_flush(q, REQ_FLUSH);
+
+	disk->queue = q;
+
+	q->queuedata = osdev;
+
+	osdev->disk = disk;
+	osdev->q = q;
+
+	/* finally, announce the disk to the world */
+	set_capacity(disk, obj_size / 512ULL);
+	add_disk(disk);
+
+	printk(KERN_INFO "%s: Added of size 0x%llx\n",
+		disk->disk_name, (unsigned long long)obj_size);
+
+	return 0;
+}
+
+/********************************************************************
+ * /sys/class/osdblk/
+ *                   add	map OSD object to blkdev
+ *                   remove	unmap OSD object
+ *                   list	show mappings
+ *******************************************************************/
+
+static void class_osdblk_release(struct class *cls)
+{
+	kfree(cls);
+}
+
+static ssize_t class_osdblk_list(struct class *c,
+				struct class_attribute *attr,
+				char *data)
+{
+	int n = 0;
+	struct list_head *tmp;
+
+	mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
+
+	list_for_each(tmp, &osdblkdev_list) {
+		struct osdblk_device *osdev;
+
+		osdev = list_entry(tmp, struct osdblk_device, node);
+
+		n += sprintf(data+n, "%d %d %llu %llu %s\n",
+			osdev->id,
+			osdev->major,
+			osdev->obj.partition,
+			osdev->obj.id,
+			osdev->osd_path);
+	}
+
+	mutex_unlock(&ctl_mutex);
+	return n;
+}
+
+static ssize_t class_osdblk_add(struct class *c,
+				struct class_attribute *attr,
+				const char *buf, size_t count)
+{
+	struct osdblk_device *osdev;
+	ssize_t rc;
+	int irc, new_id = 0;
+	struct list_head *tmp;
+
+	if (!try_module_get(THIS_MODULE))
+		return -ENODEV;
+
+	/* new osdblk_device object */
+	osdev = kzalloc(sizeof(*osdev) + strlen(buf) + 1, GFP_KERNEL);
+	if (!osdev) {
+		rc = -ENOMEM;
+		goto err_out_mod;
+	}
+
+	/* static osdblk_device initialization */
+	spin_lock_init(&osdev->lock);
+	INIT_LIST_HEAD(&osdev->node);
+
+	/* generate unique id: find highest unique id, add one */
+
+	mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
+
+	list_for_each(tmp, &osdblkdev_list) {
+		struct osdblk_device *osdev;
+
+		osdev = list_entry(tmp, struct osdblk_device, node);
+		if (osdev->id > new_id)
+			new_id = osdev->id + 1;
+	}
+
+	osdev->id = new_id;
+
+	/* add to global list */
+	list_add_tail(&osdev->node, &osdblkdev_list);
+
+	mutex_unlock(&ctl_mutex);
+
+	/* parse add command */
+	if (sscanf(buf, "%llu %llu %s", &osdev->obj.partition, &osdev->obj.id,
+		   osdev->osd_path) != 3) {
+		rc = -EINVAL;
+		goto err_out_slot;
+	}
+
+	/* initialize rest of new object */
+	sprintf(osdev->name, DRV_NAME "%d", osdev->id);
+
+	/* contact requested OSD */
+	osdev->osd = osduld_path_lookup(osdev->osd_path);
+	if (IS_ERR(osdev->osd)) {
+		rc = PTR_ERR(osdev->osd);
+		goto err_out_slot;
+	}
+
+	/* build OSD credential */
+	osdblk_make_credential(osdev->obj_cred, &osdev->obj);
+
+	/* register our block device */
+	irc = register_blkdev(0, osdev->name);
+	if (irc < 0) {
+		rc = irc;
+		goto err_out_osd;
+	}
+
+	osdev->major = irc;
+
+	/* set up and announce blkdev mapping */
+	rc = osdblk_init_disk(osdev);
+	if (rc)
+		goto err_out_blkdev;
+
+	return count;
+
+err_out_blkdev:
+	unregister_blkdev(osdev->major, osdev->name);
+err_out_osd:
+	osduld_put_device(osdev->osd);
+err_out_slot:
+	mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
+	list_del_init(&osdev->node);
+	mutex_unlock(&ctl_mutex);
+
+	kfree(osdev);
+err_out_mod:
+	OSDBLK_DEBUG("Error adding device %s\n", buf);
+	module_put(THIS_MODULE);
+	return rc;
+}
+
+static ssize_t class_osdblk_remove(struct class *c,
+					struct class_attribute *attr,
+					const char *buf,
+					size_t count)
+{
+	struct osdblk_device *osdev = NULL;
+	int target_id, rc;
+	unsigned long ul;
+	struct list_head *tmp;
+
+	rc = strict_strtoul(buf, 10, &ul);
+	if (rc)
+		return rc;
+
+	/* convert to int; abort if we lost anything in the conversion */
+	target_id = (int) ul;
+	if (target_id != ul)
+		return -EINVAL;
+
+	/* remove object from list immediately */
+	mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
+
+	list_for_each(tmp, &osdblkdev_list) {
+		osdev = list_entry(tmp, struct osdblk_device, node);
+		if (osdev->id == target_id) {
+			list_del_init(&osdev->node);
+			break;
+		}
+		osdev = NULL;
+	}
+
+	mutex_unlock(&ctl_mutex);
+
+	if (!osdev)
+		return -ENOENT;
+
+	/* clean up and free blkdev and associated OSD connection */
+	osdblk_free_disk(osdev);
+	unregister_blkdev(osdev->major, osdev->name);
+	osduld_put_device(osdev->osd);
+	kfree(osdev);
+
+	/* release module ref */
+	module_put(THIS_MODULE);
+
+	return count;
+}
+
+static struct class_attribute class_osdblk_attrs[] = {
+	__ATTR(add,	0200, NULL, class_osdblk_add),
+	__ATTR(remove,	0200, NULL, class_osdblk_remove),
+	__ATTR(list,	0444, class_osdblk_list, NULL),
+	__ATTR_NULL
+};
+
+static int osdblk_sysfs_init(void)
+{
+	int ret = 0;
+
+	/*
+	 * create control files in sysfs
+	 * /sys/class/osdblk/...
+	 */
+	class_osdblk = kzalloc(sizeof(*class_osdblk), GFP_KERNEL);
+	if (!class_osdblk)
+		return -ENOMEM;
+
+	class_osdblk->name = DRV_NAME;
+	class_osdblk->owner = THIS_MODULE;
+	class_osdblk->class_release = class_osdblk_release;
+	class_osdblk->class_attrs = class_osdblk_attrs;
+
+	ret = class_register(class_osdblk);
+	if (ret) {
+		kfree(class_osdblk);
+		class_osdblk = NULL;
+		printk(PFX "failed to create class osdblk\n");
+		return ret;
+	}
+
+	return 0;
+}
+
+static void osdblk_sysfs_cleanup(void)
+{
+	if (class_osdblk)
+		class_destroy(class_osdblk);
+	class_osdblk = NULL;
+}
+
+static int __init osdblk_init(void)
+{
+	int rc;
+
+	rc = osdblk_sysfs_init();
+	if (rc)
+		return rc;
+
+	return 0;
+}
+
+static void __exit osdblk_exit(void)
+{
+	osdblk_sysfs_cleanup();
+}
+
+module_init(osdblk_init);
+module_exit(osdblk_exit);
+
diff --git a/drivers/block/paride/Kconfig b/drivers/block/paride/Kconfig
new file mode 100644
index 00000000..28cf3082
--- /dev/null
+++ b/drivers/block/paride/Kconfig
@@ -0,0 +1,300 @@
+#
+# PARIDE configuration
+#
+# PARIDE doesn't need PARPORT, but if PARPORT is configured as a module,
+# PARIDE must also be a module.
+# PARIDE only supports PC style parports. Tough for USB or other parports...
+
+comment "Parallel IDE high-level drivers"
+	depends on PARIDE
+
+config PARIDE_PD
+	tristate "Parallel port IDE disks"
+	depends on PARIDE
+	help
+	  This option enables the high-level driver for IDE-type disk devices
+	  connected through a parallel port. If you chose to build PARIDE
+	  support into your kernel, you may answer Y here to build in the
+	  parallel port IDE driver, otherwise you should answer M to build
+	  it as a loadable module. The module will be called pd. You
+	  must also have at least one parallel port protocol driver in your
+	  system. Among the devices supported by this driver are the SyQuest
+	  EZ-135, EZ-230 and SparQ drives, the Avatar Shark and the backpack
+	  hard drives from MicroSolutions.
+
+config PARIDE_PCD
+	tristate "Parallel port ATAPI CD-ROMs"
+	depends on PARIDE
+	---help---
+	  This option enables the high-level driver for ATAPI CD-ROM devices
+	  connected through a parallel port. If you chose to build PARIDE
+	  support into your kernel, you may answer Y here to build in the
+	  parallel port ATAPI CD-ROM driver, otherwise you should answer M to
+	  build it as a loadable module. The module will be called pcd. You
+	  must also have at least one parallel port protocol driver in your
+	  system. Among the devices supported by this driver are the
+	  MicroSolutions backpack CD-ROM drives and the Freecom Power CD. If
+	  you have such a CD-ROM drive, you should also say Y or M to "ISO
+	  9660 CD-ROM file system support" below, because that's the file
+	  system used on CD-ROMs.
+
+config PARIDE_PF
+	tristate "Parallel port ATAPI disks"
+	depends on PARIDE
+	help
+	  This option enables the high-level driver for ATAPI disk devices
+	  connected through a parallel port. If you chose to build PARIDE
+	  support into your kernel, you may answer Y here to build in the
+	  parallel port ATAPI disk driver, otherwise you should answer M
+	  to build it as a loadable module. The module will be called pf.
+	  You must also have at least one parallel port protocol driver in
+	  your system. Among the devices supported by this driver are the
+	  MicroSolutions backpack PD/CD drive and the Imation Superdisk
+	  LS-120 drive.
+
+config PARIDE_PT
+	tristate "Parallel port ATAPI tapes"
+	depends on PARIDE
+	help
+	  This option enables the high-level driver for ATAPI tape devices
+	  connected through a parallel port. If you chose to build PARIDE
+	  support into your kernel, you may answer Y here to build in the
+	  parallel port ATAPI disk driver, otherwise you should answer M
+	  to build it as a loadable module. The module will be called pt.
+	  You must also have at least one parallel port protocol driver in
+	  your system. Among the devices supported by this driver is the
+	  parallel port version of the HP 5GB drive.
+
+config PARIDE_PG
+	tristate "Parallel port generic ATAPI devices"
+	depends on PARIDE
+	---help---
+	  This option enables a special high-level driver for generic ATAPI
+	  devices connected through a parallel port. The driver allows user
+	  programs, such as cdrtools, to send ATAPI commands directly to a
+	  device.
+
+	  If you chose to build PARIDE support into your kernel, you may
+	  answer Y here to build in the parallel port generic ATAPI driver,
+	  otherwise you should answer M to build it as a loadable module. The
+	  module will be called pg.
+
+	  You must also have at least one parallel port protocol driver in
+	  your system.
+
+	  This driver implements an API loosely related to the generic SCSI
+	  driver. See <file:include/linux/pg.h>. for details.
+
+	  You can obtain the most recent version of cdrtools from
+	  <ftp://ftp.berlios.de/pub/cdrecord/>. Versions 1.6.1a3 and
+	  later fully support this driver.
+
+comment "Parallel IDE protocol modules"
+	depends on PARIDE
+
+config PARIDE_ATEN
+	tristate "ATEN EH-100 protocol"
+	depends on PARIDE
+	help
+	  This option enables support for the ATEN EH-100 parallel port IDE
+	  protocol. This protocol is used in some inexpensive low performance
+	  parallel port kits made in Hong Kong. If you chose to build PARIDE
+	  support into your kernel, you may answer Y here to build in the
+	  protocol driver, otherwise you should answer M to build it as a
+	  loadable module. The module will be called aten. You must also
+	  have a high-level driver for the type of device that you want to
+	  support.
+
+config PARIDE_BPCK
+	tristate "MicroSolutions backpack (Series 5) protocol"
+	depends on PARIDE
+	---help---
+	  This option enables support for the Micro Solutions BACKPACK
+	  parallel port Series 5 IDE protocol.  (Most BACKPACK drives made
+	  before 1999 were Series 5) Series 5 drives will NOT always have the
+	  Series noted on the bottom of the drive. Series 6 drivers will.
+
+	  In other words, if your BACKPACK drive doesn't say "Series 6" on the
+	  bottom, enable this option.
+
+	  If you chose to build PARIDE support into your kernel, you may
+	  answer Y here to build in the protocol driver, otherwise you should
+	  answer M to build it as a loadable module.  The module will be
+	  called bpck.  You must also have a high-level driver for the type
+	  of device that you want to support.
+
+config PARIDE_BPCK6
+	tristate "MicroSolutions backpack (Series 6) protocol"
+	depends on PARIDE && !64BIT
+	---help---
+	  This option enables support for the Micro Solutions BACKPACK
+	  parallel port Series 6 IDE protocol.  (Most BACKPACK drives made
+	  after 1999 were Series 6) Series 6 drives will have the Series noted
+	  on the bottom of the drive.  Series 5 drivers don't always have it
+	  noted.
+
+	  In other words, if your BACKPACK drive says "Series 6" on the
+	  bottom, enable this option.
+
+	  If you chose to build PARIDE support into your kernel, you may
+	  answer Y here to build in the protocol driver, otherwise you should
+	  answer M to build it as a loadable module.  The module will be
+	  called bpck6.  You must also have a high-level driver for the type
+	  of device that you want to support.
+
+config PARIDE_COMM
+	tristate "DataStor Commuter protocol"
+	depends on PARIDE
+	help
+	  This option enables support for the Commuter parallel port IDE
+	  protocol from DataStor. If you chose to build PARIDE support
+	  into your kernel, you may answer Y here to build in the protocol
+	  driver, otherwise you should answer M to build it as a loadable
+	  module. The module will be called comm. You must also have
+	  a high-level driver for the type of device that you want to support.
+
+config PARIDE_DSTR
+	tristate "DataStor EP-2000 protocol"
+	depends on PARIDE
+	help
+	  This option enables support for the EP-2000 parallel port IDE
+	  protocol from DataStor. If you chose to build PARIDE support
+	  into your kernel, you may answer Y here to build in the protocol
+	  driver, otherwise you should answer M to build it as a loadable
+	  module. The module will be called dstr. You must also have
+	  a high-level driver for the type of device that you want to support.
+
+config PARIDE_FIT2
+	tristate "FIT TD-2000 protocol"
+	depends on PARIDE
+	help
+	  This option enables support for the TD-2000 parallel port IDE
+	  protocol from Fidelity International Technology. This is a simple
+	  (low speed) adapter that is used in some portable hard drives. If
+	  you chose to build PARIDE support into your kernel, you may answer Y
+	  here to build in the protocol driver, otherwise you should answer M
+	  to build it as a loadable module. The module will be called ktti.
+	  You must also have a high-level driver for the type of device that
+	  you want to support.
+
+config PARIDE_FIT3
+	tristate "FIT TD-3000 protocol"
+	depends on PARIDE
+	help
+	  This option enables support for the TD-3000 parallel port IDE
+	  protocol from Fidelity International Technology. This protocol is
+	  used in newer models of their portable disk, CD-ROM and PD/CD
+	  devices. If you chose to build PARIDE support into your kernel, you
+	  may answer Y here to build in the protocol driver, otherwise you
+	  should answer M to build it as a loadable module. The module will be
+	  called fit3. You must also have a high-level driver for the type
+	  of device that you want to support.
+
+config PARIDE_EPAT
+	tristate "Shuttle EPAT/EPEZ protocol"
+	depends on PARIDE
+	help
+	  This option enables support for the EPAT parallel port IDE protocol.
+	  EPAT is a parallel port IDE adapter manufactured by Shuttle
+	  Technology and widely used in devices from major vendors such as
+	  Hewlett-Packard, SyQuest, Imation and Avatar. If you chose to build
+	  PARIDE support into your kernel, you may answer Y here to build in
+	  the protocol driver, otherwise you should answer M to build it as a
+	  loadable module. The module will be called epat. You must also
+	  have a high-level driver for the type of device that you want to
+	  support.
+
+config PARIDE_EPATC8
+	bool "Support c7/c8 chips (EXPERIMENTAL)"
+	depends on PARIDE_EPAT && EXPERIMENTAL
+	help
+	  This option enables support for the newer Shuttle EP1284 (aka c7 and
+	  c8) chip. You need this if you are using any recent Imation SuperDisk
+	  (LS-120) drive.
+
+config PARIDE_EPIA
+	tristate "Shuttle EPIA protocol"
+	depends on PARIDE
+	help
+	  This option enables support for the (obsolete) EPIA parallel port
+	  IDE protocol from Shuttle Technology. This adapter can still be
+	  found in some no-name kits. If you chose to build PARIDE support
+	  into your kernel, you may answer Y here to build in the protocol
+	  driver, otherwise you should answer M to build it as a loadable
+	  module. The module will be called epia. You must also have a
+	  high-level driver for the type of device that you want to support.
+
+config PARIDE_FRIQ
+	tristate "Freecom IQ ASIC-2 protocol"
+	depends on PARIDE
+	help
+	  This option enables support for version 2 of the Freecom IQ parallel
+	  port IDE adapter.  This adapter is used by the Maxell Superdisk
+	  drive.  If you chose to build PARIDE support into your kernel, you
+	  may answer Y here to build in the protocol driver, otherwise you
+	  should answer M to build it as a loadable module. The module will be
+	  called friq. You must also have a high-level driver for the type
+	  of device that you want to support.
+
+config PARIDE_FRPW
+	tristate "FreeCom power protocol"
+	depends on PARIDE
+	help
+	  This option enables support for the Freecom power parallel port IDE
+	  protocol. If you chose to build PARIDE support into your kernel, you
+	  may answer Y here to build in the protocol driver, otherwise you
+	  should answer M to build it as a loadable module. The module will be
+	  called frpw. You must also have a high-level driver for the type
+	  of device that you want to support.
+
+config PARIDE_KBIC
+	tristate "KingByte KBIC-951A/971A protocols"
+	depends on PARIDE
+	help
+	  This option enables support for the KBIC-951A and KBIC-971A parallel
+	  port IDE protocols from KingByte Information Corp. KingByte's
+	  adapters appear in many no-name portable disk and CD-ROM products,
+	  especially in Europe. If you chose to build PARIDE support into your
+	  kernel, you may answer Y here to build in the protocol driver,
+	  otherwise you should answer M to build it as a loadable module. The
+	  module will be called kbic. You must also have a high-level driver
+	  for the type of device that you want to support.
+
+config PARIDE_KTTI
+	tristate "KT PHd protocol"
+	depends on PARIDE
+	help
+	  This option enables support for the "PHd" parallel port IDE protocol
+	  from KT Technology. This is a simple (low speed) adapter that is
+	  used in some 2.5" portable hard drives. If you chose to build PARIDE
+	  support into your kernel, you may answer Y here to build in the
+	  protocol driver, otherwise you should answer M to build it as a
+	  loadable module. The module will be called ktti. You must also
+	  have a high-level driver for the type of device that you want to
+	  support.
+
+config PARIDE_ON20
+	tristate "OnSpec 90c20 protocol"
+	depends on PARIDE
+	help
+	  This option enables support for the (obsolete) 90c20 parallel port
+	  IDE protocol from OnSpec (often marketed under the ValuStore brand
+	  name). If you chose to build PARIDE support into your kernel, you
+	  may answer Y here to build in the protocol driver, otherwise you
+	  should answer M to build it as a loadable module. The module will
+	  be called on20. You must also have a high-level driver for the
+	  type of device that you want to support.
+
+config PARIDE_ON26
+	tristate "OnSpec 90c26 protocol"
+	depends on PARIDE
+	help
+	  This option enables support for the 90c26 parallel port IDE protocol
+	  from OnSpec Electronics (often marketed under the ValuStore brand
+	  name). If you chose to build PARIDE support into your kernel, you
+	  may answer Y here to build in the protocol driver, otherwise you
+	  should answer M to build it as a loadable module. The module will be
+	  called on26. You must also have a high-level driver for the type
+	  of device that you want to support.
+
+#
diff --git a/drivers/block/paride/Makefile b/drivers/block/paride/Makefile
new file mode 100644
index 00000000..a539e004
--- /dev/null
+++ b/drivers/block/paride/Makefile
@@ -0,0 +1,28 @@
+#
+# Makefile for Parallel port IDE device drivers.
+#
+# 7 October 2000, Bartlomiej Zolnierkiewicz <bkz@linux-ide.org>
+# Rewritten to use lists instead of if-statements.
+#
+
+obj-$(CONFIG_PARIDE)		+= paride.o
+obj-$(CONFIG_PARIDE_ATEN)	+= aten.o
+obj-$(CONFIG_PARIDE_BPCK)	+= bpck.o
+obj-$(CONFIG_PARIDE_COMM)	+= comm.o
+obj-$(CONFIG_PARIDE_DSTR)	+= dstr.o
+obj-$(CONFIG_PARIDE_KBIC)	+= kbic.o
+obj-$(CONFIG_PARIDE_EPAT)	+= epat.o
+obj-$(CONFIG_PARIDE_EPIA)	+= epia.o
+obj-$(CONFIG_PARIDE_FRPW)	+= frpw.o
+obj-$(CONFIG_PARIDE_FRIQ)	+= friq.o
+obj-$(CONFIG_PARIDE_FIT2)	+= fit2.o
+obj-$(CONFIG_PARIDE_FIT3)	+= fit3.o
+obj-$(CONFIG_PARIDE_ON20)	+= on20.o
+obj-$(CONFIG_PARIDE_ON26)	+= on26.o
+obj-$(CONFIG_PARIDE_KTTI)	+= ktti.o
+obj-$(CONFIG_PARIDE_BPCK6)	+= bpck6.o
+obj-$(CONFIG_PARIDE_PD)		+= pd.o
+obj-$(CONFIG_PARIDE_PCD)	+= pcd.o
+obj-$(CONFIG_PARIDE_PF)		+= pf.o
+obj-$(CONFIG_PARIDE_PT)		+= pt.o
+obj-$(CONFIG_PARIDE_PG)		+= pg.o
diff --git a/drivers/block/paride/Transition-notes b/drivers/block/paride/Transition-notes
new file mode 100644
index 00000000..70374907
--- /dev/null
+++ b/drivers/block/paride/Transition-notes
@@ -0,0 +1,128 @@
+Lemma 1:
+	If ps_tq is scheduled, ps_tq_active is 1.  ps_tq_int() can be called
+	only when ps_tq_active is 1.
+Proof:	All assignments to ps_tq_active and all scheduling of ps_tq happen
+	under ps_spinlock.  There are three places where that can happen:
+	one in ps_set_intr() (A) and two in ps_tq_int() (B and C).
+	Consider the sequnce of these events.  A can not be preceded by
+	anything except B, since it is under if (!ps_tq_active) under
+	ps_spinlock.  C is always preceded by B, since we can't reach it
+	other than through B and we don't drop ps_spinlock between them.
+	IOW, the sequence is A?(BA|BC|B)*.  OTOH, number of B can not exceed
+	the sum of numbers of A and C, since each call of ps_tq_int() is
+	the result of ps_tq execution.  Therefore, the sequence starts with
+	A and each B is preceded by either A or C.  Moments when we enter
+	ps_tq_int() are sandwiched between {A,C} and B in that sequence,
+	since at any time number of B can not exceed the number of these
+	moments which, in turn, can not exceed the number of A and C.
+	In other words, the sequence of events is (A or C set ps_tq_active to
+	1 and schedule ps_tq, ps_tq is executed, ps_tq_int() is entered,
+	B resets ps_tq_active)*.
+
+
+consider the following area:
+	* in do_pd_request1(): to calls of pi_do_claimed() and return in
+	  case when pd_req is NULL.
+	* in next_request(): to call of do_pd_request1()
+	* in do_pd_read(): to call of ps_set_intr()
+	* in do_pd_read_start(): to calls of pi_do_claimed(), next_request()
+and ps_set_intr()
+	* in do_pd_read_drq(): to calls of pi_do_claimed() and next_request()
+	* in do_pd_write(): to call of ps_set_intr()
+	* in do_pd_write_start(): to calls of pi_do_claimed(), next_request()
+and ps_set_intr()
+	* in do_pd_write_done(): to calls of pi_do_claimed() and next_request()
+	* in ps_set_intr(): to check for ps_tq_active and to scheduling
+	  ps_tq if ps_tq_active was 0.
+	* in ps_tq_int(): from the moment when we get ps_spinlock() to the
+	  return, call of con() or scheduling ps_tq.
+	* in pi_schedule_claimed() when called from pi_do_claimed() called from
+	  pd.c, everything until returning 1 or setting or setting ->claim_cont
+	  on the path that returns 0
+	* in pi_do_claimed() when called from pd.c, everything until the call
+	  of pi_do_claimed() plus the everything until the call of cont() if
+	  pi_do_claimed() has returned 1.
+	* in pi_wake_up() called for PIA that belongs to pd.c, everything from
+	  the moment when pi_spinlock has been acquired.
+
+Lemma 2:
+	1) at any time at most one thread of execution can be in that area or
+	be preempted there.
+	2) When there is such a thread, pd_busy is set or pd_lock is held by
+	that thread.
+	3) When there is such a thread, ps_tq_active is 0 or ps_spinlock is
+	held by that thread.
+	4) When there is such a thread, all PIA belonging to pd.c have NULL
+	->claim_cont or pi_spinlock is held by thread in question.
+
+Proof:	consider the first moment when the above is not true.
+
+(1) can become not true if some thread enters that area while another is there.
+	a) do_pd_request1() can be called from next_request() or do_pd_request()
+	   In the first case the thread was already in the area.  In the second,
+	   the thread was holding pd_lock and found pd_busy not set, which would
+	   mean that (2) was already not true.
+	b) ps_set_intr() and pi_schedule_claimed() can be called only from the
+	   area.
+	c) pi_do_claimed() is called by pd.c only from the area.
+	d) ps_tq_int() can enter the area only when the thread is holding
+	   ps_spinlock and ps_tq_active is 1 (due to Lemma 1).  It means that
+	   (3) was already not true.
+	e) do_pd_{read,write}* could be called only from the area.  The only
+	   case that needs consideration is call from pi_wake_up() and there
+	   we would have to be called for the PIA that got ->claimed_cont
+	   from pd.c.  That could happen only if pi_do_claimed() had been
+	   called from pd.c for that PIA, which happens only for PIA belonging
+	   to pd.c.
+	f) pi_wake_up() can enter the area only when the thread is holding
+	   pi_spinlock and ->claimed_cont is non-NULL for PIA belonging to
+	   pd.c.  It means that (4) was already not true.
+
+(2) can become not true only when pd_lock is released by the thread in question.
+	Indeed, pd_busy is reset only in the area and thread that resets
+	it is holding pd_lock.	The only place within the area where we
+	release pd_lock is in pd_next_buf() (called from within the area).
+	But that code does not reset pd_busy, so pd_busy would have to be
+	0 when pd_next_buf() had acquired pd_lock.  If it become 0 while
+	we were acquiring the lock, (1) would be already false, since
+	the thread that had reset it would be in the area simulateously.
+	If it was 0 before we tried to acquire pd_lock, (2) would be
+	already false.
+
+For similar reasons, (3) can become not true only when ps_spinlock is released
+by the thread in question.  However, all such places within the area are right
+after resetting ps_tq_active to 0.
+
+(4) is done the same way - all places where we release pi_spinlock within
+the area are either after resetting ->claimed_cont to NULL while holding
+pi_spinlock, or after not tocuhing ->claimed_cont since acquiring pi_spinlock
+also in the area.  The only place where ->claimed_cont is made non-NULL is
+in the area, under pi_spinlock and we do not release it until after leaving
+the area.
+
+QED.
+
+
+Corollary 1: ps_tq_active can be killed.  Indeed, the only place where we
+check its value is in ps_set_intr() and if it had been non-zero at that
+point, we would have violated either (2.1) (if it was set while ps_set_intr()
+was acquiring ps_spinlock) or (2.3) (if it was set when we started to
+acquire ps_spinlock).
+
+Corollary 2: ps_spinlock can be killed.  Indeed, Lemma 1 and Lemma 2 show
+that the only possible contention is between scheduling ps_tq followed by
+immediate release of spinlock and beginning of execution of ps_tq on
+another CPU.
+
+Corollary 3: assignment to pd_busy in do_pd_read_start() and do_pd_write_start()
+can be killed.  Indeed, we are not holding pd_lock and thus pd_busy is already
+1 here.
+
+Corollary 4: in ps_tq_int() uses of con can be replaced with uses of
+ps_continuation, since the latter is changed only from the area.
+We don't need to reset it to NULL, since we are guaranteed that there
+will be a call of ps_set_intr() before we look at ps_continuation again.
+We can remove the check for ps_continuation being NULL for the same
+reason - the value is guaranteed to be set by the last ps_set_intr() and
+we never pass it NULL.  Assignements in the beginning of ps_set_intr()
+can be taken to callers as long as they remain within the area.
diff --git a/drivers/block/paride/aten.c b/drivers/block/paride/aten.c
new file mode 100644
index 00000000..26954655
--- /dev/null
+++ b/drivers/block/paride/aten.c
@@ -0,0 +1,162 @@
+/* 
+        aten.c  (c) 1997-8  Grant R. Guenther <grant@torque.net>
+                            Under the terms of the GNU General Public License.
+
+	aten.c is a low-level protocol driver for the ATEN EH-100
+	parallel port adapter.  The EH-100 supports 4-bit and 8-bit
+        modes only.  There is also an EH-132 which supports EPP mode
+        transfers.  The EH-132 is not yet supported.
+
+*/
+
+/* Changes:
+
+	1.01	GRG 1998.05.05	init_proto, release_proto
+
+*/
+
+#define ATEN_VERSION      "1.01"
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/kernel.h>
+#include <linux/wait.h>
+#include <linux/types.h>
+#include <asm/io.h>
+
+#include "paride.h"
+
+#define j44(a,b)                ((((a>>4)&0x0f)|(b&0xf0))^0x88)
+
+/* cont = 0 - access the IDE register file 
+   cont = 1 - access the IDE command set 
+*/
+
+static int  cont_map[2] = { 0x08, 0x20 };
+
+static void  aten_write_regr( PIA *pi, int cont, int regr, int val)
+
+{	int r;
+
+	r = regr + cont_map[cont] + 0x80;
+
+	w0(r); w2(0xe); w2(6); w0(val); w2(7); w2(6); w2(0xc);
+}
+
+static int aten_read_regr( PIA *pi, int cont, int regr )
+
+{	int  a, b, r;
+
+        r = regr + cont_map[cont] + 0x40;
+
+	switch (pi->mode) {
+
+        case 0: w0(r); w2(0xe); w2(6); 
+		w2(7); w2(6); w2(0);
+		a = r1(); w0(0x10); b = r1(); w2(0xc);
+		return j44(a,b);
+
+        case 1: r |= 0x10;
+		w0(r); w2(0xe); w2(6); w0(0xff); 
+		w2(0x27); w2(0x26); w2(0x20);
+		a = r0();
+		w2(0x26); w2(0xc);
+		return a;
+	}
+	return -1;
+}
+
+static void aten_read_block( PIA *pi, char * buf, int count )
+
+{	int  k, a, b, c, d;
+
+	switch (pi->mode) {
+
+	case 0:	w0(0x48); w2(0xe); w2(6);
+		for (k=0;k<count/2;k++) {
+			w2(7); w2(6); w2(2);
+			a = r1(); w0(0x58); b = r1();
+			w2(0); d = r1(); w0(0x48); c = r1();
+			buf[2*k] = j44(c,d);
+			buf[2*k+1] = j44(a,b);
+		}
+		w2(0xc);
+		break;
+
+	case 1: w0(0x58); w2(0xe); w2(6);
+		for (k=0;k<count/2;k++) {
+			w2(0x27); w2(0x26); w2(0x22);
+			a = r0(); w2(0x20); b = r0();
+			buf[2*k] = b; buf[2*k+1] = a;
+		}
+		w2(0x26); w2(0xc);
+		break;
+	}
+}
+
+static void aten_write_block( PIA *pi, char * buf, int count )
+
+{	int k;
+
+	w0(0x88); w2(0xe); w2(6);
+	for (k=0;k<count/2;k++) {
+		w0(buf[2*k+1]); w2(0xe); w2(6);
+		w0(buf[2*k]); w2(7); w2(6);
+	}
+	w2(0xc);
+}
+
+static void aten_connect ( PIA *pi  )
+
+{       pi->saved_r0 = r0();
+        pi->saved_r2 = r2();
+	w2(0xc);	
+}
+
+static void aten_disconnect ( PIA *pi )
+
+{       w0(pi->saved_r0);
+        w2(pi->saved_r2);
+} 
+
+static void aten_log_adapter( PIA *pi, char * scratch, int verbose )
+
+{       char    *mode_string[2] = {"4-bit","8-bit"};
+
+        printk("%s: aten %s, ATEN EH-100 at 0x%x, ",
+                pi->device,ATEN_VERSION,pi->port);
+        printk("mode %d (%s), delay %d\n",pi->mode,
+		mode_string[pi->mode],pi->delay);
+
+}
+
+static struct pi_protocol aten = {
+	.owner		= THIS_MODULE,
+	.name		= "aten",
+	.max_mode	= 2,
+	.epp_first	= 2,
+	.default_delay	= 1,
+	.max_units	= 1,
+	.write_regr	= aten_write_regr,
+	.read_regr	= aten_read_regr,
+	.write_block	= aten_write_block,
+	.read_block	= aten_read_block,
+	.connect	= aten_connect,
+	.disconnect	= aten_disconnect,
+	.log_adapter	= aten_log_adapter,
+};
+
+static int __init aten_init(void)
+{
+	return paride_register(&aten);
+}
+
+static void __exit aten_exit(void)
+{
+	paride_unregister( &aten );
+}
+
+MODULE_LICENSE("GPL");
+module_init(aten_init)
+module_exit(aten_exit)
diff --git a/drivers/block/paride/bpck.c b/drivers/block/paride/bpck.c
new file mode 100644
index 00000000..4f27e739
--- /dev/null
+++ b/drivers/block/paride/bpck.c
@@ -0,0 +1,477 @@
+/* 
+	bpck.c	(c) 1996-8  Grant R. Guenther <grant@torque.net>
+		            Under the terms of the GNU General Public License.
+
+	bpck.c is a low-level protocol driver for the MicroSolutions 
+	"backpack" parallel port IDE adapter.  
+
+*/
+
+/* Changes:
+
+	1.01	GRG 1998.05.05 init_proto, release_proto, pi->delay 
+	1.02    GRG 1998.08.15 default pi->delay returned to 4
+
+*/
+
+#define	BPCK_VERSION	"1.02" 
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/wait.h>
+#include <asm/io.h>
+
+#include "paride.h"
+
+#undef r2
+#undef w2
+
+#define PC			pi->private
+#define r2()			(PC=(in_p(2) & 0xff))
+#define w2(byte)  		{out_p(2,byte); PC = byte;}
+#define t2(pat)   		{PC ^= pat; out_p(2,PC);}
+#define e2()			{PC &= 0xfe; out_p(2,PC);}
+#define o2()			{PC |= 1; out_p(2,PC);}
+
+#define j44(l,h)     (((l>>3)&0x7)|((l>>4)&0x8)|((h<<1)&0x70)|(h&0x80))
+
+/* cont = 0 - access the IDE register file 
+   cont = 1 - access the IDE command set 
+   cont = 2 - use internal bpck register addressing
+*/
+
+static int  cont_map[3] = { 0x40, 0x48, 0 };
+
+static int bpck_read_regr( PIA *pi, int cont, int regr )
+
+{       int r, l, h;
+
+	r = regr + cont_map[cont];
+
+	switch (pi->mode) {
+
+	case 0: w0(r & 0xf); w0(r); t2(2); t2(4);
+	        l = r1();
+        	t2(4);
+        	h = r1();
+        	return j44(l,h);
+
+	case 1: w0(r & 0xf); w0(r); t2(2);
+	        e2(); t2(0x20);
+		t2(4); h = r0();
+	        t2(1); t2(0x20);
+	        return h;
+
+	case 2:
+	case 3:
+	case 4: w0(r); w2(9); w2(0); w2(0x20);
+		h = r4();
+		w2(0);
+		return h;
+
+	}
+	return -1;
+}	
+
+static void bpck_write_regr( PIA *pi, int cont, int regr, int val )
+
+{	int	r;
+
+        r = regr + cont_map[cont];
+
+	switch (pi->mode) {
+
+	case 0:
+	case 1: w0(r);
+		t2(2);
+		w0(val);
+		o2(); t2(4); t2(1);
+		break;
+
+	case 2:
+	case 3:
+	case 4: w0(r); w2(9); w2(0);
+		w0(val); w2(1); w2(3); w2(0);
+		break;
+
+	}
+}
+
+/* These macros access the bpck registers in native addressing */
+
+#define WR(r,v)		bpck_write_regr(pi,2,r,v)
+#define RR(r)		(bpck_read_regr(pi,2,r))
+
+static void bpck_write_block( PIA *pi, char * buf, int count )
+
+{	int i;
+
+	switch (pi->mode) {
+
+	case 0: WR(4,0x40);
+		w0(0x40); t2(2); t2(1);
+		for (i=0;i<count;i++) { w0(buf[i]); t2(4); }
+		WR(4,0);
+		break;
+
+	case 1: WR(4,0x50);
+                w0(0x40); t2(2); t2(1);
+                for (i=0;i<count;i++) { w0(buf[i]); t2(4); }
+                WR(4,0x10);
+		break;
+
+	case 2: WR(4,0x48);
+		w0(0x40); w2(9); w2(0); w2(1);
+		for (i=0;i<count;i++) w4(buf[i]);
+		w2(0);
+		WR(4,8);
+		break;
+
+        case 3: WR(4,0x48);
+                w0(0x40); w2(9); w2(0); w2(1);
+                for (i=0;i<count/2;i++) w4w(((u16 *)buf)[i]);
+                w2(0);
+                WR(4,8);
+                break;
+ 
+        case 4: WR(4,0x48);
+                w0(0x40); w2(9); w2(0); w2(1);
+                for (i=0;i<count/4;i++) w4l(((u32 *)buf)[i]);
+                w2(0);
+                WR(4,8);
+                break;
+ 	}
+}
+
+static void bpck_read_block( PIA *pi, char * buf, int count )
+
+{	int i, l, h;
+
+	switch (pi->mode) {
+
+      	case 0: WR(4,0x40);
+		w0(0x40); t2(2);
+		for (i=0;i<count;i++) {
+		    t2(4); l = r1();
+		    t2(4); h = r1();
+		    buf[i] = j44(l,h);
+		}
+		WR(4,0);
+		break;
+
+	case 1: WR(4,0x50);
+		w0(0x40); t2(2); t2(0x20);
+      	        for(i=0;i<count;i++) { t2(4); buf[i] = r0(); }
+	        t2(1); t2(0x20);
+	        WR(4,0x10);
+		break;
+
+	case 2: WR(4,0x48);
+		w0(0x40); w2(9); w2(0); w2(0x20);
+		for (i=0;i<count;i++) buf[i] = r4();
+		w2(0);
+		WR(4,8);
+		break;
+
+        case 3: WR(4,0x48);
+                w0(0x40); w2(9); w2(0); w2(0x20);
+                for (i=0;i<count/2;i++) ((u16 *)buf)[i] = r4w();
+                w2(0);
+                WR(4,8);
+                break;
+
+        case 4: WR(4,0x48);
+                w0(0x40); w2(9); w2(0); w2(0x20);
+                for (i=0;i<count/4;i++) ((u32 *)buf)[i] = r4l();
+                w2(0);
+                WR(4,8);
+                break;
+
+	}
+}
+
+static int bpck_probe_unit ( PIA *pi )
+
+{	int o1, o0, f7, id;
+	int t, s;
+
+	id = pi->unit;
+	s = 0;
+	w2(4); w2(0xe); r2(); t2(2); 
+	o1 = r1()&0xf8;
+	o0 = r0();
+	w0(255-id); w2(4); w0(id);
+	t2(8); t2(8); t2(8);
+	t2(2); t = r1()&0xf8;
+	f7 = ((id % 8) == 7);
+	if ((f7) || (t != o1)) { t2(2); s = r1()&0xf8; }
+	if ((t == o1) && ((!f7) || (s == o1)))  {
+		w2(0x4c); w0(o0);
+		return 0;	
+	}
+	t2(8); w0(0); t2(2); w2(0x4c); w0(o0);
+	return 1;
+}
+	
+static void bpck_connect ( PIA *pi  )
+
+{       pi->saved_r0 = r0();
+	w0(0xff-pi->unit); w2(4); w0(pi->unit);
+	t2(8); t2(8); t2(8); 
+	t2(2); t2(2);
+	
+	switch (pi->mode) {
+
+	case 0: t2(8); WR(4,0);
+		break;
+
+	case 1: t2(8); WR(4,0x10);
+		break;
+
+	case 2:
+        case 3:
+	case 4: w2(0); WR(4,8);
+		break;
+
+	}
+
+	WR(5,8);
+
+	if (pi->devtype == PI_PCD) {
+		WR(0x46,0x10);		/* fiddle with ESS logic ??? */
+		WR(0x4c,0x38);
+		WR(0x4d,0x88);
+		WR(0x46,0xa0);
+		WR(0x41,0);
+		WR(0x4e,8);
+		}
+}
+
+static void bpck_disconnect ( PIA *pi )
+
+{	w0(0); 
+	if (pi->mode >= 2) { w2(9); w2(0); } else t2(2);
+	w2(0x4c); w0(pi->saved_r0);
+} 
+
+static void bpck_force_spp ( PIA *pi )
+
+/* This fakes the EPP protocol to turn off EPP ... */
+
+{       pi->saved_r0 = r0();
+        w0(0xff-pi->unit); w2(4); w0(pi->unit);
+        t2(8); t2(8); t2(8); 
+        t2(2); t2(2);
+
+        w2(0); 
+        w0(4); w2(9); w2(0); 
+        w0(0); w2(1); w2(3); w2(0);     
+        w0(0); w2(9); w2(0);
+        w2(0x4c); w0(pi->saved_r0);
+}
+
+#define TEST_LEN  16
+
+static int bpck_test_proto( PIA *pi, char * scratch, int verbose )
+
+{	int i, e, l, h, om;
+	char buf[TEST_LEN];
+
+	bpck_force_spp(pi);
+
+	switch (pi->mode) {
+
+	case 0: bpck_connect(pi);
+		WR(0x13,0x7f);
+		w0(0x13); t2(2);
+		for(i=0;i<TEST_LEN;i++) {
+                    t2(4); l = r1();
+                    t2(4); h = r1();
+                    buf[i] = j44(l,h);
+		}
+		bpck_disconnect(pi);
+		break;
+
+        case 1: bpck_connect(pi);
+		WR(0x13,0x7f);
+                w0(0x13); t2(2); t2(0x20);
+                for(i=0;i<TEST_LEN;i++) { t2(4); buf[i] = r0(); }
+                t2(1); t2(0x20);
+		bpck_disconnect(pi);
+		break;
+
+	case 2:
+	case 3:
+	case 4: om = pi->mode;
+		pi->mode = 0;
+		bpck_connect(pi);
+		WR(7,3);
+		WR(4,8);
+		bpck_disconnect(pi);
+
+		pi->mode = om;
+		bpck_connect(pi);
+		w0(0x13); w2(9); w2(1); w0(0); w2(3); w2(0); w2(0xe0);
+
+		switch (pi->mode) {
+		  case 2: for (i=0;i<TEST_LEN;i++) buf[i] = r4();
+			  break;
+		  case 3: for (i=0;i<TEST_LEN/2;i++) ((u16 *)buf)[i] = r4w();
+                          break;
+		  case 4: for (i=0;i<TEST_LEN/4;i++) ((u32 *)buf)[i] = r4l();
+                          break;
+		}
+
+		w2(0);
+		WR(7,0);
+		bpck_disconnect(pi);
+
+		break;
+
+	}
+
+	if (verbose) {
+	    printk("%s: bpck: 0x%x unit %d mode %d: ",
+		   pi->device,pi->port,pi->unit,pi->mode);
+	    for (i=0;i<TEST_LEN;i++) printk("%3d",buf[i]);
+	    printk("\n");
+	}
+
+	e = 0;
+	for (i=0;i<TEST_LEN;i++) if (buf[i] != (i+1)) e++;
+	return e;
+}
+
+static void bpck_read_eeprom ( PIA *pi, char * buf )
+
+{       int i,j,k,n,p,v,f, om, od;
+
+	bpck_force_spp(pi);
+
+	om = pi->mode;  od = pi->delay;
+	pi->mode = 0; pi->delay = 6;
+
+	bpck_connect(pi);
+	
+	n = 0;
+	WR(4,0);
+	for (i=0;i<64;i++) {
+	    WR(6,8);  
+	    WR(6,0xc);
+	    p = 0x100;
+	    for (k=0;k<9;k++) {
+		f = (((i + 0x180) & p) != 0) * 2;
+		WR(6,f+0xc); 
+		WR(6,f+0xd); 
+		WR(6,f+0xc);
+		p = (p >> 1);
+	    }
+	    for (j=0;j<2;j++) {
+		v = 0;
+		for (k=0;k<8;k++) {
+		    WR(6,0xc); 
+		    WR(6,0xd); 
+		    WR(6,0xc); 
+		    f = RR(0);
+		    v = 2*v + (f == 0x84);
+		}
+		buf[2*i+1-j] = v;
+	    }
+	}
+	WR(6,8);
+	WR(6,0);
+	WR(5,8);
+
+	bpck_disconnect(pi);
+
+        if (om >= 2) {
+                bpck_connect(pi);
+                WR(7,3);
+                WR(4,8);
+                bpck_disconnect(pi);
+        }
+
+	pi->mode = om; pi->delay = od;
+}
+
+static int bpck_test_port ( PIA *pi ) 	/* check for 8-bit port */
+
+{	int	i, r, m;
+
+	w2(0x2c); i = r0(); w0(255-i); r = r0(); w0(i);
+	m = -1;
+	if (r == i) m = 2;
+	if (r == (255-i)) m = 0;
+
+	w2(0xc); i = r0(); w0(255-i); r = r0(); w0(i);
+	if (r != (255-i)) m = -1;
+	
+	if (m == 0) { w2(6); w2(0xc); r = r0(); w0(0xaa); w0(r); w0(0xaa); }
+	if (m == 2) { w2(0x26); w2(0xc); }
+
+	if (m == -1) return 0;
+	return 5;
+}
+
+static void bpck_log_adapter( PIA *pi, char * scratch, int verbose )
+
+{	char	*mode_string[5] = { "4-bit","8-bit","EPP-8",
+				    "EPP-16","EPP-32" };
+
+#ifdef DUMP_EEPROM
+	int i;
+#endif
+
+	bpck_read_eeprom(pi,scratch);
+
+#ifdef DUMP_EEPROM
+	if (verbose) {
+	   for(i=0;i<128;i++)
+		if ((scratch[i] < ' ') || (scratch[i] > '~'))
+		    scratch[i] = '.';
+	   printk("%s: bpck EEPROM: %64.64s\n",pi->device,scratch);
+	   printk("%s:              %64.64s\n",pi->device,&scratch[64]);
+	}
+#endif
+
+	printk("%s: bpck %s, backpack %8.8s unit %d",
+		pi->device,BPCK_VERSION,&scratch[110],pi->unit);
+	printk(" at 0x%x, mode %d (%s), delay %d\n",pi->port,
+		pi->mode,mode_string[pi->mode],pi->delay);
+}
+
+static struct pi_protocol bpck = {
+	.owner		= THIS_MODULE,
+	.name		= "bpck",
+	.max_mode	= 5,
+	.epp_first	= 2,
+	.default_delay	= 4,
+	.max_units	= 255,
+	.write_regr	= bpck_write_regr,
+	.read_regr	= bpck_read_regr,
+	.write_block	= bpck_write_block,
+	.read_block	= bpck_read_block,
+	.connect	= bpck_connect,
+	.disconnect	= bpck_disconnect,
+	.test_port	= bpck_test_port,
+	.probe_unit	= bpck_probe_unit,
+	.test_proto	= bpck_test_proto,
+	.log_adapter	= bpck_log_adapter,
+};
+
+static int __init bpck_init(void)
+{
+	return paride_register(&bpck);
+}
+
+static void __exit bpck_exit(void)
+{
+	paride_unregister(&bpck);
+}
+
+MODULE_LICENSE("GPL");
+module_init(bpck_init)
+module_exit(bpck_exit)
diff --git a/drivers/block/paride/bpck6.c b/drivers/block/paride/bpck6.c
new file mode 100644
index 00000000..ad124525
--- /dev/null
+++ b/drivers/block/paride/bpck6.c
@@ -0,0 +1,268 @@
+/*
+	backpack.c (c) 2001 Micro Solutions Inc.
+		Released under the terms of the GNU General Public license
+
+	backpack.c is a low-level protocol driver for the Micro Solutions
+		"BACKPACK" parallel port IDE adapter
+		(Works on Series 6 drives)
+
+	Written by: Ken Hahn     (linux-dev@micro-solutions.com)
+	            Clive Turvey (linux-dev@micro-solutions.com)
+
+*/
+
+/*
+   This is Ken's linux wrapper for the PPC library
+   Version 1.0.0 is the backpack driver for which source is not available
+   Version 2.0.0 is the first to have source released 
+   Version 2.0.1 is the "Cox-ified" source code 
+   Version 2.0.2 - fixed version string usage, and made ppc functions static 
+*/
+
+
+/* PARAMETERS */
+static int verbose; /* set this to 1 to see debugging messages and whatnot */
+
+#define BACKPACK_VERSION "2.0.2"
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <asm/io.h>
+#include <linux/parport.h>
+
+#include "ppc6lnx.c"
+#include "paride.h"
+
+ 
+
+#define PPCSTRUCT(pi) ((Interface *)(pi->private))
+
+/****************************************************************/
+/*
+ ATAPI CDROM DRIVE REGISTERS
+*/
+#define ATAPI_DATA       0      /* data port                  */
+#define ATAPI_ERROR      1      /* error register (read)      */
+#define ATAPI_FEATURES   1      /* feature register (write)   */
+#define ATAPI_INT_REASON 2      /* interrupt reason register  */
+#define ATAPI_COUNT_LOW  4      /* byte count register (low)  */
+#define ATAPI_COUNT_HIGH 5      /* byte count register (high) */
+#define ATAPI_DRIVE_SEL  6      /* drive select register      */
+#define ATAPI_STATUS     7      /* status port (read)         */
+#define ATAPI_COMMAND    7      /* command port (write)       */
+#define ATAPI_ALT_STATUS 0x0e /* alternate status reg (read) */
+#define ATAPI_DEVICE_CONTROL 0x0e /* device control (write)   */
+/****************************************************************/
+
+static int bpck6_read_regr(PIA *pi, int cont, int reg)
+{
+	unsigned int out;
+
+	/* check for bad settings */
+	if (reg<0 || reg>7 || cont<0 || cont>2)
+	{
+		return(-1);
+	}
+	out=ppc6_rd_port(PPCSTRUCT(pi),cont?reg|8:reg);
+	return(out);
+}
+
+static void bpck6_write_regr(PIA *pi, int cont, int reg, int val)
+{
+	/* check for bad settings */
+	if (reg>=0 && reg<=7 && cont>=0 && cont<=1)
+	{
+		ppc6_wr_port(PPCSTRUCT(pi),cont?reg|8:reg,(u8)val);
+	}
+}
+
+static void bpck6_write_block( PIA *pi, char * buf, int len )
+{
+	ppc6_wr_port16_blk(PPCSTRUCT(pi),ATAPI_DATA,buf,(u32)len>>1); 
+}
+
+static void bpck6_read_block( PIA *pi, char * buf, int len )
+{
+	ppc6_rd_port16_blk(PPCSTRUCT(pi),ATAPI_DATA,buf,(u32)len>>1);
+}
+
+static void bpck6_connect ( PIA *pi  )
+{
+	if(verbose)
+	{
+		printk(KERN_DEBUG "connect\n");
+	}
+
+	if(pi->mode >=2)
+  	{
+		PPCSTRUCT(pi)->mode=4+pi->mode-2;	
+	}
+	else if(pi->mode==1)
+	{
+		PPCSTRUCT(pi)->mode=3;	
+	}
+	else
+	{
+		PPCSTRUCT(pi)->mode=1;		
+	}
+
+	ppc6_open(PPCSTRUCT(pi));  
+	ppc6_wr_extout(PPCSTRUCT(pi),0x3);
+}
+
+static void bpck6_disconnect ( PIA *pi )
+{
+	if(verbose)
+	{
+		printk("disconnect\n");
+	}
+	ppc6_wr_extout(PPCSTRUCT(pi),0x0);
+	ppc6_close(PPCSTRUCT(pi));
+}
+
+static int bpck6_test_port ( PIA *pi )   /* check for 8-bit port */
+{
+	if(verbose)
+	{
+		printk(KERN_DEBUG "PARPORT indicates modes=%x for lp=0x%lx\n",
+               		((struct pardevice*)(pi->pardev))->port->modes,
+			((struct pardevice *)(pi->pardev))->port->base); 
+	}
+
+	/*copy over duplicate stuff.. initialize state info*/
+	PPCSTRUCT(pi)->ppc_id=pi->unit;
+	PPCSTRUCT(pi)->lpt_addr=pi->port;
+
+	/* look at the parport device to see if what modes we can use */
+	if(((struct pardevice *)(pi->pardev))->port->modes & 
+		(PARPORT_MODE_EPP)
+          )
+	{
+		return 5; /* Can do EPP*/
+	}
+	else if(((struct pardevice *)(pi->pardev))->port->modes & 
+			(PARPORT_MODE_TRISTATE)
+               )
+	{
+		return 2;
+	}
+	else /*Just flat SPP*/
+	{
+		return 1;
+	}
+}
+
+static int bpck6_probe_unit ( PIA *pi )
+{
+	int out;
+
+	if(verbose)
+	{
+		printk(KERN_DEBUG "PROBE UNIT %x on port:%x\n",pi->unit,pi->port);
+	}
+
+	/*SET PPC UNIT NUMBER*/
+	PPCSTRUCT(pi)->ppc_id=pi->unit;
+
+	/*LOWER DOWN TO UNIDIRECTIONAL*/
+	PPCSTRUCT(pi)->mode=1;		
+
+	out=ppc6_open(PPCSTRUCT(pi));
+
+	if(verbose)
+	{
+		printk(KERN_DEBUG "ppc_open returned %2x\n",out);
+	}
+
+  	if(out)
+ 	{
+		ppc6_close(PPCSTRUCT(pi));
+		if(verbose)
+		{
+			printk(KERN_DEBUG "leaving probe\n");
+		}
+               return(1);
+	}
+  	else
+  	{
+		if(verbose)
+		{
+			printk(KERN_DEBUG "Failed open\n");
+		}
+    		return(0);
+  	}
+}
+
+static void bpck6_log_adapter( PIA *pi, char * scratch, int verbose )
+{
+	char *mode_string[5]=
+		{"4-bit","8-bit","EPP-8","EPP-16","EPP-32"};
+
+	printk("%s: BACKPACK Protocol Driver V"BACKPACK_VERSION"\n",pi->device);
+	printk("%s: Copyright 2001 by Micro Solutions, Inc., DeKalb IL.\n",pi->device);
+	printk("%s: BACKPACK %s, Micro Solutions BACKPACK Drive at 0x%x\n",
+		pi->device,BACKPACK_VERSION,pi->port);
+	printk("%s: Unit: %d Mode:%d (%s) Delay %d\n",pi->device,
+		pi->unit,pi->mode,mode_string[pi->mode],pi->delay);
+}
+
+static int bpck6_init_proto(PIA *pi)
+{
+	Interface *p = kzalloc(sizeof(Interface), GFP_KERNEL);
+
+	if (p) {
+		pi->private = (unsigned long)p;
+		return 0;
+	}
+
+	printk(KERN_ERR "%s: ERROR COULDN'T ALLOCATE MEMORY\n", pi->device); 
+	return -1;
+}
+
+static void bpck6_release_proto(PIA *pi)
+{
+	kfree((void *)(pi->private)); 
+}
+
+static struct pi_protocol bpck6 = {
+	.owner		= THIS_MODULE,
+	.name		= "bpck6",
+	.max_mode	= 5,
+	.epp_first	= 2, /* 2-5 use epp (need 8 ports) */
+	.max_units	= 255,
+	.write_regr	= bpck6_write_regr,
+	.read_regr	= bpck6_read_regr,
+	.write_block	= bpck6_write_block,
+	.read_block	= bpck6_read_block,
+	.connect	= bpck6_connect,
+	.disconnect	= bpck6_disconnect,
+	.test_port	= bpck6_test_port,
+	.probe_unit	= bpck6_probe_unit,
+	.log_adapter	= bpck6_log_adapter,
+	.init_proto	= bpck6_init_proto,
+	.release_proto	= bpck6_release_proto,
+};
+
+static int __init bpck6_init(void)
+{
+	printk(KERN_INFO "bpck6: BACKPACK Protocol Driver V"BACKPACK_VERSION"\n");
+	printk(KERN_INFO "bpck6: Copyright 2001 by Micro Solutions, Inc., DeKalb IL. USA\n");
+	if(verbose)
+		printk(KERN_DEBUG "bpck6: verbose debug enabled.\n");
+	return paride_register(&bpck6);
+}
+
+static void __exit bpck6_exit(void)
+{
+	paride_unregister(&bpck6);
+}
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Micro Solutions Inc.");
+MODULE_DESCRIPTION("BACKPACK Protocol module, compatible with PARIDE");
+module_param(verbose, bool, 0644);
+module_init(bpck6_init)
+module_exit(bpck6_exit)
diff --git a/drivers/block/paride/comm.c b/drivers/block/paride/comm.c
new file mode 100644
index 00000000..9bcd3549
--- /dev/null
+++ b/drivers/block/paride/comm.c
@@ -0,0 +1,218 @@
+/* 
+        comm.c    (c) 1997-8  Grant R. Guenther <grant@torque.net>
+                              Under the terms of the GNU General Public License.
+
+	comm.c is a low-level protocol driver for some older models
+	of the DataStor "Commuter" parallel to IDE adapter.  Some of
+	the parallel port devices marketed by Arista currently
+	use this adapter.
+*/
+
+/* Changes:
+
+	1.01	GRG 1998.05.05  init_proto, release_proto
+
+*/
+
+#define COMM_VERSION      "1.01"
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/wait.h>
+#include <asm/io.h>
+
+#include "paride.h"
+
+/* mode codes:  0  nybble reads, 8-bit writes
+                1  8-bit reads and writes
+                2  8-bit EPP mode
+*/
+
+#define j44(a,b)	(((a>>3)&0x0f)|((b<<1)&0xf0))
+
+#define P1	w2(5);w2(0xd);w2(0xd);w2(5);w2(4);
+#define P2	w2(5);w2(7);w2(7);w2(5);w2(4);
+
+/* cont = 0 - access the IDE register file 
+   cont = 1 - access the IDE command set 
+*/
+
+static int  cont_map[2] = { 0x08, 0x10 };
+
+static int comm_read_regr( PIA *pi, int cont, int regr )
+
+{       int     l, h, r;
+
+        r = regr + cont_map[cont];
+
+        switch (pi->mode)  {
+
+        case 0: w0(r); P1; w0(0);
+        	w2(6); l = r1(); w0(0x80); h = r1(); w2(4);
+                return j44(l,h);
+
+        case 1: w0(r+0x20); P1; 
+        	w0(0); w2(0x26); h = r0(); w2(4);
+                return h;
+
+	case 2:
+	case 3:
+        case 4: w3(r+0x20); (void)r1();
+        	w2(0x24); h = r4(); w2(4);
+                return h;
+
+        }
+        return -1;
+}       
+
+static void comm_write_regr( PIA *pi, int cont, int regr, int val )
+
+{       int  r;
+
+        r = regr + cont_map[cont];
+
+        switch (pi->mode)  {
+
+        case 0:
+        case 1: w0(r); P1; w0(val); P2;
+		break;
+
+	case 2:
+	case 3:
+        case 4: w3(r); (void)r1(); w4(val);
+                break;
+        }
+}
+
+static void comm_connect ( PIA *pi  )
+
+{       pi->saved_r0 = r0();
+        pi->saved_r2 = r2();
+        w2(4); w0(0xff); w2(6);
+        w2(4); w0(0xaa); w2(6);
+        w2(4); w0(0x00); w2(6);
+        w2(4); w0(0x87); w2(6);
+        w2(4); w0(0xe0); w2(0xc); w2(0xc); w2(4);
+}
+
+static void comm_disconnect ( PIA *pi )
+
+{       w2(0); w2(0); w2(0); w2(4); 
+	w0(pi->saved_r0);
+        w2(pi->saved_r2);
+} 
+
+static void comm_read_block( PIA *pi, char * buf, int count )
+
+{       int     i, l, h;
+
+        switch (pi->mode) {
+        
+        case 0: w0(0x48); P1;
+                for(i=0;i<count;i++) {
+                        w0(0); w2(6); l = r1();
+                        w0(0x80); h = r1(); w2(4);
+                        buf[i] = j44(l,h);
+                }
+                break;
+
+        case 1: w0(0x68); P1; w0(0);
+                for(i=0;i<count;i++) {
+                        w2(0x26); buf[i] = r0(); w2(0x24);
+                }
+		w2(4);
+		break;
+		
+	case 2: w3(0x68); (void)r1(); w2(0x24);
+		for (i=0;i<count;i++) buf[i] = r4();
+		w2(4);
+		break;
+
+        case 3: w3(0x68); (void)r1(); w2(0x24);
+                for (i=0;i<count/2;i++) ((u16 *)buf)[i] = r4w();
+                w2(4);
+                break;
+
+        case 4: w3(0x68); (void)r1(); w2(0x24);
+                for (i=0;i<count/4;i++) ((u32 *)buf)[i] = r4l();
+                w2(4);
+                break;
+		
+	}
+}
+
+/* NB: Watch out for the byte swapped writes ! */
+
+static void comm_write_block( PIA *pi, char * buf, int count )
+
+{       int	k;
+
+        switch (pi->mode) {
+
+        case 0:
+        case 1: w0(0x68); P1;
+        	for (k=0;k<count;k++) {
+                        w2(5); w0(buf[k^1]); w2(7);
+                }
+                w2(5); w2(4);
+                break;
+
+        case 2: w3(0x48); (void)r1();
+                for (k=0;k<count;k++) w4(buf[k^1]);
+                break;
+
+        case 3: w3(0x48); (void)r1();
+                for (k=0;k<count/2;k++) w4w(pi_swab16(buf,k));
+                break;
+
+        case 4: w3(0x48); (void)r1();
+                for (k=0;k<count/4;k++) w4l(pi_swab32(buf,k));
+                break;
+
+
+        }
+}
+
+static void comm_log_adapter( PIA *pi, char * scratch, int verbose )
+
+{       char    *mode_string[5] = {"4-bit","8-bit","EPP-8","EPP-16","EPP-32"};
+
+        printk("%s: comm %s, DataStor Commuter at 0x%x, ",
+                pi->device,COMM_VERSION,pi->port);
+        printk("mode %d (%s), delay %d\n",pi->mode,
+		mode_string[pi->mode],pi->delay);
+
+}
+
+static struct pi_protocol comm = {
+	.owner		= THIS_MODULE,
+	.name		= "comm",
+	.max_mode	= 5,
+	.epp_first	= 2,
+	.default_delay	= 1,
+	.max_units	= 1,
+	.write_regr	= comm_write_regr,
+	.read_regr	= comm_read_regr,
+	.write_block	= comm_write_block,
+	.read_block	= comm_read_block,
+	.connect	= comm_connect,
+	.disconnect	= comm_disconnect,
+	.log_adapter	= comm_log_adapter,
+};
+
+static int __init comm_init(void)
+{
+	return paride_register(&comm);
+}
+
+static void __exit comm_exit(void)
+{
+	paride_unregister(&comm);
+}
+
+MODULE_LICENSE("GPL");
+module_init(comm_init)
+module_exit(comm_exit)
diff --git a/drivers/block/paride/dstr.c b/drivers/block/paride/dstr.c
new file mode 100644
index 00000000..accc5c77
--- /dev/null
+++ b/drivers/block/paride/dstr.c
@@ -0,0 +1,233 @@
+/* 
+        dstr.c    (c) 1997-8  Grant R. Guenther <grant@torque.net>
+                              Under the terms of the GNU General Public License.
+
+        dstr.c is a low-level protocol driver for the 
+        DataStor EP2000 parallel to IDE adapter chip.
+
+*/
+
+/* Changes:
+
+        1.01    GRG 1998.05.06 init_proto, release_proto
+
+*/
+
+#define DSTR_VERSION      "1.01"
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/wait.h>
+#include <asm/io.h>
+
+#include "paride.h"
+
+/* mode codes:  0  nybble reads, 8-bit writes
+                1  8-bit reads and writes
+                2  8-bit EPP mode
+		3  EPP-16
+		4  EPP-32
+*/
+
+#define j44(a,b)  (((a>>3)&0x07)|((~a>>4)&0x08)|((b<<1)&0x70)|((~b)&0x80))
+
+#define P1	w2(5);w2(0xd);w2(5);w2(4);
+#define P2	w2(5);w2(7);w2(5);w2(4);
+#define P3      w2(6);w2(4);w2(6);w2(4);
+
+/* cont = 0 - access the IDE register file 
+   cont = 1 - access the IDE command set 
+*/
+
+static int  cont_map[2] = { 0x20, 0x40 };
+
+static int dstr_read_regr( PIA *pi, int cont, int regr )
+
+{       int     a, b, r;
+
+        r = regr + cont_map[cont];
+
+	w0(0x81); P1;
+	if (pi->mode) { w0(0x11); } else { w0(1); }
+	P2; w0(r); P1;
+
+        switch (pi->mode)  {
+
+        case 0: w2(6); a = r1(); w2(4); w2(6); b = r1(); w2(4);
+                return j44(a,b);
+
+        case 1: w0(0); w2(0x26); a = r0(); w2(4);
+                return a;
+
+	case 2:
+	case 3:
+        case 4: w2(0x24); a = r4(); w2(4);
+                return a;
+
+        }
+        return -1;
+}       
+
+static void dstr_write_regr(  PIA *pi, int cont, int regr, int val )
+
+{       int  r;
+
+        r = regr + cont_map[cont];
+
+	w0(0x81); P1; 
+	if (pi->mode >= 2) { w0(0x11); } else { w0(1); }
+	P2; w0(r); P1;
+	
+        switch (pi->mode)  {
+
+        case 0:
+        case 1: w0(val); w2(5); w2(7); w2(5); w2(4);
+		break;
+
+	case 2:
+	case 3:
+        case 4: w4(val); 
+                break;
+        }
+}
+
+#define  CCP(x)  w0(0xff);w2(0xc);w2(4);\
+		 w0(0xaa);w0(0x55);w0(0);w0(0xff);w0(0x87);w0(0x78);\
+		 w0(x);w2(5);w2(4);
+
+static void dstr_connect ( PIA *pi  )
+
+{       pi->saved_r0 = r0();
+        pi->saved_r2 = r2();
+        w2(4); CCP(0xe0); w0(0xff);
+}
+
+static void dstr_disconnect ( PIA *pi )
+
+{       CCP(0x30);
+        w0(pi->saved_r0);
+        w2(pi->saved_r2);
+} 
+
+static void dstr_read_block( PIA *pi, char * buf, int count )
+
+{       int     k, a, b;
+
+        w0(0x81); P1;
+        if (pi->mode) { w0(0x19); } else { w0(9); }
+	P2; w0(0x82); P1; P3; w0(0x20); P1;
+
+        switch (pi->mode) {
+
+        case 0: for (k=0;k<count;k++) {
+                        w2(6); a = r1(); w2(4);
+                        w2(6); b = r1(); w2(4);
+                        buf[k] = j44(a,b);
+                } 
+                break;
+
+        case 1: w0(0);
+                for (k=0;k<count;k++) {
+                        w2(0x26); buf[k] = r0(); w2(0x24);
+                }
+                w2(4);
+                break;
+
+        case 2: w2(0x24); 
+                for (k=0;k<count;k++) buf[k] = r4();
+                w2(4);
+                break;
+
+        case 3: w2(0x24); 
+                for (k=0;k<count/2;k++) ((u16 *)buf)[k] = r4w();
+                w2(4);
+                break;
+
+        case 4: w2(0x24); 
+                for (k=0;k<count/4;k++) ((u32 *)buf)[k] = r4l();
+                w2(4);
+                break;
+
+        }
+}
+
+static void dstr_write_block( PIA *pi, char * buf, int count )
+
+{       int	k;
+
+        w0(0x81); P1;
+        if (pi->mode) { w0(0x19); } else { w0(9); }
+        P2; w0(0x82); P1; P3; w0(0x20); P1;
+
+        switch (pi->mode) {
+
+        case 0:
+        case 1: for (k=0;k<count;k++) {
+                        w2(5); w0(buf[k]); w2(7);
+                }
+                w2(5); w2(4);
+                break;
+
+        case 2: w2(0xc5);
+                for (k=0;k<count;k++) w4(buf[k]);
+		w2(0xc4);
+                break;
+
+        case 3: w2(0xc5);
+                for (k=0;k<count/2;k++) w4w(((u16 *)buf)[k]);
+                w2(0xc4);
+                break;
+
+        case 4: w2(0xc5);
+                for (k=0;k<count/4;k++) w4l(((u32 *)buf)[k]);
+                w2(0xc4);
+                break;
+
+        }
+}
+
+
+static void dstr_log_adapter( PIA *pi, char * scratch, int verbose )
+
+{       char    *mode_string[5] = {"4-bit","8-bit","EPP-8",
+				   "EPP-16","EPP-32"};
+
+        printk("%s: dstr %s, DataStor EP2000 at 0x%x, ",
+                pi->device,DSTR_VERSION,pi->port);
+        printk("mode %d (%s), delay %d\n",pi->mode,
+		mode_string[pi->mode],pi->delay);
+
+}
+
+static struct pi_protocol dstr = {
+	.owner		= THIS_MODULE,
+	.name		= "dstr",
+	.max_mode	= 5,
+	.epp_first	= 2,
+	.default_delay	= 1,
+	.max_units	= 1,
+	.write_regr	= dstr_write_regr,
+	.read_regr	= dstr_read_regr,
+	.write_block	= dstr_write_block,
+	.read_block	= dstr_read_block,
+	.connect	= dstr_connect,
+	.disconnect	= dstr_disconnect,
+	.log_adapter	= dstr_log_adapter,
+};
+
+static int __init dstr_init(void)
+{
+	return paride_register(&dstr);
+}
+
+static void __exit dstr_exit(void)
+{
+	paride_unregister(&dstr);
+}
+
+MODULE_LICENSE("GPL");
+module_init(dstr_init)
+module_exit(dstr_exit)
diff --git a/drivers/block/paride/epat.c b/drivers/block/paride/epat.c
new file mode 100644
index 00000000..1bcdff77
--- /dev/null
+++ b/drivers/block/paride/epat.c
@@ -0,0 +1,340 @@
+/* 
+        epat.c  (c) 1997-8  Grant R. Guenther <grant@torque.net>
+                            Under the terms of the GNU General Public License.
+
+	This is the low level protocol driver for the EPAT parallel
+        to IDE adapter from Shuttle Technologies.  This adapter is
+        used in many popular parallel port disk products such as the
+        SyQuest EZ drives, the Avatar Shark and the Imation SuperDisk.
+	
+*/
+
+/* Changes:
+
+        1.01    GRG 1998.05.06 init_proto, release_proto
+        1.02    Joshua b. Jore CPP(renamed), epat_connect, epat_disconnect
+
+*/
+
+#define EPAT_VERSION      "1.02"
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/wait.h>
+#include <asm/io.h>
+
+#include "paride.h"
+
+#define j44(a,b)		(((a>>4)&0x0f)+(b&0xf0))
+#define j53(a,b)		(((a>>3)&0x1f)+((b<<4)&0xe0))
+
+static int epatc8;
+
+module_param(epatc8, int, 0);
+MODULE_PARM_DESC(epatc8, "support for the Shuttle EP1284 chip, "
+	"used in any recent Imation SuperDisk (LS-120) drive.");
+
+/* cont =  0   IDE register file
+   cont =  1   IDE control registers
+   cont =  2   internal EPAT registers
+*/
+
+static int cont_map[3] = { 0x18, 0x10, 0 };
+
+static void epat_write_regr( PIA *pi, int cont, int regr, int val)
+
+{	int r;
+
+	r = regr + cont_map[cont];
+
+	switch (pi->mode) {
+
+	case 0:
+	case 1:
+	case 2:	w0(0x60+r); w2(1); w0(val); w2(4);
+		break;
+
+	case 3:
+	case 4:
+	case 5: w3(0x40+r); w4(val);
+		break;
+
+	}
+}
+
+static int epat_read_regr( PIA *pi, int cont, int regr )
+
+{	int  a, b, r;
+
+	r = regr + cont_map[cont];
+
+	switch (pi->mode) {
+
+	case 0:	w0(r); w2(1); w2(3); 
+		a = r1(); w2(4); b = r1();
+		return j44(a,b);
+
+	case 1: w0(0x40+r); w2(1); w2(4);
+		a = r1(); b = r2(); w0(0xff);
+		return j53(a,b);
+
+	case 2: w0(0x20+r); w2(1); w2(0x25);
+		a = r0(); w2(4);
+		return a;
+
+	case 3:
+	case 4:
+	case 5: w3(r); w2(0x24); a = r4(); w2(4);
+		return a;
+
+	}
+	return -1;	/* never gets here */
+}
+
+static void epat_read_block( PIA *pi, char * buf, int count )
+
+{	int  k, ph, a, b;
+
+	switch (pi->mode) {
+
+	case 0:	w0(7); w2(1); w2(3); w0(0xff);
+		ph = 0;
+		for(k=0;k<count;k++) {
+			if (k == count-1) w0(0xfd);
+			w2(6+ph); a = r1();
+			if (a & 8) b = a; 
+			  else { w2(4+ph); b = r1(); }
+			buf[k] = j44(a,b);
+			ph =  1 - ph;
+		}
+		w0(0); w2(4);
+		break;
+
+	case 1: w0(0x47); w2(1); w2(5); w0(0xff);
+		ph = 0;
+		for(k=0;k<count;k++) {
+			if (k == count-1) w0(0xfd); 
+			w2(4+ph);
+			a = r1(); b = r2();
+			buf[k] = j53(a,b);
+			ph = 1 - ph;
+		}
+		w0(0); w2(4);
+		break;
+
+	case 2: w0(0x27); w2(1); w2(0x25); w0(0);
+		ph = 0;
+		for(k=0;k<count-1;k++) {
+			w2(0x24+ph);
+			buf[k] = r0();
+			ph = 1 - ph;
+		}
+		w2(0x26); w2(0x27); buf[count-1] = r0(); 
+		w2(0x25); w2(4);
+		break;
+
+	case 3: w3(0x80); w2(0x24);
+		for(k=0;k<count-1;k++) buf[k] = r4();
+		w2(4); w3(0xa0); w2(0x24); buf[count-1] = r4();
+		w2(4);
+		break;
+
+	case 4: w3(0x80); w2(0x24);
+		for(k=0;k<(count/2)-1;k++) ((u16 *)buf)[k] = r4w();
+		buf[count-2] = r4();
+		w2(4); w3(0xa0); w2(0x24); buf[count-1] = r4();
+		w2(4);
+		break;
+
+	case 5: w3(0x80); w2(0x24);
+		for(k=0;k<(count/4)-1;k++) ((u32 *)buf)[k] = r4l();
+		for(k=count-4;k<count-1;k++) buf[k] = r4();
+		w2(4); w3(0xa0); w2(0x24); buf[count-1] = r4();
+		w2(4);
+		break;
+
+	}
+}
+
+static void epat_write_block( PIA *pi, char * buf, int count )   
+
+{	int ph, k;
+
+	switch (pi->mode) {
+
+	case 0:
+	case 1:
+	case 2: w0(0x67); w2(1); w2(5);
+		ph = 0;
+		for(k=0;k<count;k++) {
+		  	w0(buf[k]);
+			w2(4+ph);
+			ph = 1 - ph;
+		}
+		w2(7); w2(4);
+		break;
+
+	case 3: w3(0xc0); 
+		for(k=0;k<count;k++) w4(buf[k]);
+		w2(4);
+		break;
+
+	case 4: w3(0xc0); 
+		for(k=0;k<(count/2);k++) w4w(((u16 *)buf)[k]);
+		w2(4);
+		break;
+
+	case 5: w3(0xc0); 
+		for(k=0;k<(count/4);k++) w4l(((u32 *)buf)[k]);
+		w2(4);
+		break;
+
+	}
+}
+
+/* these macros access the EPAT registers in native addressing */
+
+#define	WR(r,v)		epat_write_regr(pi,2,r,v)
+#define	RR(r)		(epat_read_regr(pi,2,r))
+
+/* and these access the IDE task file */
+
+#define WRi(r,v)         epat_write_regr(pi,0,r,v)
+#define RRi(r)           (epat_read_regr(pi,0,r))
+
+/* FIXME:  the CPP stuff should be fixed to handle multiple EPATs on a chain */
+
+#define CPP(x) 	w2(4);w0(0x22);w0(0xaa);w0(0x55);w0(0);w0(0xff);\
+                w0(0x87);w0(0x78);w0(x);w2(4);w2(5);w2(4);w0(0xff);
+
+static void epat_connect ( PIA *pi )
+
+{       pi->saved_r0 = r0();
+        pi->saved_r2 = r2();
+
+ 	/* Initialize the chip */
+	CPP(0);
+
+	if (epatc8) {
+		CPP(0x40);CPP(0xe0);
+		w0(0);w2(1);w2(4);
+		WR(0x8,0x12);WR(0xc,0x14);WR(0x12,0x10);
+		WR(0xe,0xf);WR(0xf,4);
+		/* WR(0xe,0xa);WR(0xf,4); */
+		WR(0xe,0xd);WR(0xf,0);
+		/* CPP(0x30); */
+	}
+
+        /* Connect to the chip */
+	CPP(0xe0);
+        w0(0);w2(1);w2(4); /* Idle into SPP */
+        if (pi->mode >= 3) {
+          w0(0);w2(1);w2(4);w2(0xc);
+          /* Request EPP */
+          w0(0x40);w2(6);w2(7);w2(4);w2(0xc);w2(4);
+        }
+
+	if (!epatc8) {
+		WR(8,0x10); WR(0xc,0x14); WR(0xa,0x38); WR(0x12,0x10);
+	}
+}
+
+static void epat_disconnect (PIA *pi)
+{	CPP(0x30);
+	w0(pi->saved_r0);
+	w2(pi->saved_r2);
+}
+
+static int epat_test_proto( PIA *pi, char * scratch, int verbose )
+
+{       int     k, j, f, cc;
+	int	e[2] = {0,0};
+
+        epat_connect(pi);
+	cc = RR(0xd);
+	epat_disconnect(pi);
+
+	epat_connect(pi);
+	for (j=0;j<2;j++) {
+  	    WRi(6,0xa0+j*0x10);
+            for (k=0;k<256;k++) {
+                WRi(2,k^0xaa);
+                WRi(3,k^0x55);
+                if (RRi(2) != (k^0xaa)) e[j]++;
+                }
+	    }
+        epat_disconnect(pi);
+
+        f = 0;
+        epat_connect(pi);
+        WR(0x13,1); WR(0x13,0); WR(0xa,0x11);
+        epat_read_block(pi,scratch,512);
+	
+        for (k=0;k<256;k++) {
+            if ((scratch[2*k] & 0xff) != k) f++;
+            if ((scratch[2*k+1] & 0xff) != (0xff-k)) f++;
+        }
+        epat_disconnect(pi);
+
+        if (verbose)  {
+            printk("%s: epat: port 0x%x, mode %d, ccr %x, test=(%d,%d,%d)\n",
+		   pi->device,pi->port,pi->mode,cc,e[0],e[1],f);
+	}
+	
+        return (e[0] && e[1]) || f;
+}
+
+static void epat_log_adapter( PIA *pi, char * scratch, int verbose )
+
+{	int	ver;
+        char    *mode_string[6] = 
+		   {"4-bit","5/3","8-bit","EPP-8","EPP-16","EPP-32"};
+
+	epat_connect(pi);
+	WR(0xa,0x38);		/* read the version code */
+        ver = RR(0xb);
+        epat_disconnect(pi);
+
+	printk("%s: epat %s, Shuttle EPAT chip %x at 0x%x, ",
+		pi->device,EPAT_VERSION,ver,pi->port);
+	printk("mode %d (%s), delay %d\n",pi->mode,
+		mode_string[pi->mode],pi->delay);
+
+}
+
+static struct pi_protocol epat = {
+	.owner		= THIS_MODULE,
+	.name		= "epat",
+	.max_mode	= 6,
+	.epp_first	= 3,
+	.default_delay	= 1,
+	.max_units	= 1,
+	.write_regr	= epat_write_regr,
+	.read_regr	= epat_read_regr,
+	.write_block	= epat_write_block,
+	.read_block	= epat_read_block,
+	.connect	= epat_connect,
+	.disconnect	= epat_disconnect,
+	.test_proto	= epat_test_proto,
+	.log_adapter	= epat_log_adapter,
+};
+
+static int __init epat_init(void)
+{
+#ifdef CONFIG_PARIDE_EPATC8
+	epatc8 = 1;
+#endif
+	return paride_register(&epat);
+}
+
+static void __exit epat_exit(void)
+{
+	paride_unregister(&epat);
+}
+
+MODULE_LICENSE("GPL");
+module_init(epat_init)
+module_exit(epat_exit)
diff --git a/drivers/block/paride/epia.c b/drivers/block/paride/epia.c
new file mode 100644
index 00000000..fb0e782d
--- /dev/null
+++ b/drivers/block/paride/epia.c
@@ -0,0 +1,316 @@
+/* 
+        epia.c    (c) 1997-8  Grant R. Guenther <grant@torque.net>
+                              Under the terms of the GNU General Public License.
+
+        epia.c is a low-level protocol driver for Shuttle Technologies 
+	EPIA parallel to IDE adapter chip.  This device is now obsolete
+	and has been replaced with the EPAT chip, which is supported
+	by epat.c, however, some devices based on EPIA are still
+	available.
+
+*/
+
+/* Changes:
+
+        1.01    GRG 1998.05.06 init_proto, release_proto
+	1.02    GRG 1998.06.17 support older versions of EPIA
+
+*/
+
+#define EPIA_VERSION      "1.02"
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/wait.h>
+#include <asm/io.h>
+
+#include "paride.h"
+
+/* mode codes:  0  nybble reads on port 1, 8-bit writes
+                1  5/3 reads on ports 1 & 2, 8-bit writes
+                2  8-bit reads and writes
+                3  8-bit EPP mode
+		4  16-bit EPP
+		5  32-bit EPP
+*/
+
+#define j44(a,b)                (((a>>4)&0x0f)+(b&0xf0))
+#define j53(a,b)                (((a>>3)&0x1f)+((b<<4)&0xe0))
+
+/* cont =  0   IDE register file
+   cont =  1   IDE control registers
+*/
+
+static int cont_map[2] = { 0, 0x80 };
+
+static int epia_read_regr( PIA *pi, int cont, int regr )
+
+{       int     a, b, r;
+
+	regr += cont_map[cont];
+
+        switch (pi->mode)  {
+
+        case 0: r = regr^0x39;
+                w0(r); w2(1); w2(3); w0(r);
+                a = r1(); w2(1); b = r1(); w2(4);
+                return j44(a,b);
+
+        case 1: r = regr^0x31;
+                w0(r); w2(1); w0(r&0x37); 
+                w2(3); w2(5); w0(r|0xf0);
+                a = r1(); b = r2(); w2(4);
+                return j53(a,b);
+
+        case 2: r = regr^0x29;
+                w0(r); w2(1); w2(0X21); w2(0x23); 
+                a = r0(); w2(4);
+                return a;
+
+	case 3:
+	case 4:
+        case 5: w3(regr); w2(0x24); a = r4(); w2(4);
+                return a;
+
+        }
+        return -1;
+}       
+
+static void epia_write_regr( PIA *pi, int cont, int regr, int val)
+
+{       int  r;
+
+	regr += cont_map[cont];
+
+        switch (pi->mode)  {
+
+        case 0:
+        case 1:
+        case 2: r = regr^0x19;
+                w0(r); w2(1); w0(val); w2(3); w2(4);
+                break;
+
+	case 3:
+	case 4:
+        case 5: r = regr^0x40;
+                w3(r); w4(val); w2(4);
+                break;
+        }
+}
+
+#define WR(r,v)         epia_write_regr(pi,0,r,v)
+#define RR(r)           (epia_read_regr(pi,0,r))
+
+/* The use of register 0x84 is entirely unclear - it seems to control
+   some EPP counters ...  currently we know about 3 different block
+   sizes:  the standard 512 byte reads and writes, 12 byte writes and 
+   2048 byte reads (the last two being used in the CDrom drivers.
+*/
+
+static void epia_connect ( PIA *pi  )
+
+{       pi->saved_r0 = r0();
+        pi->saved_r2 = r2();
+
+        w2(4); w0(0xa0); w0(0x50); w0(0xc0); w0(0x30); w0(0xa0); w0(0);
+        w2(1); w2(4);
+        if (pi->mode >= 3) { 
+                w0(0xa); w2(1); w2(4); w0(0x82); w2(4); w2(0xc); w2(4);
+                w2(0x24); w2(0x26); w2(4);
+        }
+        WR(0x86,8);  
+}
+
+static void epia_disconnect ( PIA *pi )
+
+{       /* WR(0x84,0x10); */
+        w0(pi->saved_r0);
+        w2(1); w2(4);
+        w0(pi->saved_r0);
+        w2(pi->saved_r2);
+} 
+
+static void epia_read_block( PIA *pi, char * buf, int count )
+
+{       int     k, ph, a, b;
+
+        switch (pi->mode) {
+
+        case 0: w0(0x81); w2(1); w2(3); w0(0xc1);
+                ph = 1;
+                for (k=0;k<count;k++) {
+                        w2(2+ph); a = r1();
+                        w2(4+ph); b = r1();
+                        buf[k] = j44(a,b);
+                        ph = 1 - ph;
+                } 
+                w0(0); w2(4);
+                break;
+
+        case 1: w0(0x91); w2(1); w0(0x10); w2(3); 
+                w0(0x51); w2(5); w0(0xd1); 
+                ph = 1;
+                for (k=0;k<count;k++) {
+                        w2(4+ph);
+                        a = r1(); b = r2();
+                        buf[k] = j53(a,b);
+                        ph = 1 - ph;
+                }
+                w0(0); w2(4);
+                break;
+
+        case 2: w0(0x89); w2(1); w2(0x23); w2(0x21); 
+                ph = 1;
+                for (k=0;k<count;k++) {
+                        w2(0x24+ph);
+                        buf[k] = r0();
+                        ph = 1 - ph;
+                }
+                w2(6); w2(4);
+                break;
+
+        case 3: if (count > 512) WR(0x84,3);
+		w3(0); w2(0x24);
+                for (k=0;k<count;k++) buf[k] = r4();
+                w2(4); WR(0x84,0);
+                break;
+
+        case 4: if (count > 512) WR(0x84,3);
+		w3(0); w2(0x24);
+		for (k=0;k<count/2;k++) ((u16 *)buf)[k] = r4w();
+                w2(4); WR(0x84,0);
+                break;
+
+        case 5: if (count > 512) WR(0x84,3);
+		w3(0); w2(0x24);
+                for (k=0;k<count/4;k++) ((u32 *)buf)[k] = r4l();
+                w2(4); WR(0x84,0);
+                break;
+
+        }
+}
+
+static void epia_write_block( PIA *pi, char * buf, int count )
+
+{       int     ph, k, last, d;
+
+        switch (pi->mode) {
+
+        case 0:
+        case 1:
+        case 2: w0(0xa1); w2(1); w2(3); w2(1); w2(5);
+                ph = 0;  last = 0x8000;
+                for (k=0;k<count;k++) {
+                        d = buf[k];
+                        if (d != last) { last = d; w0(d); }
+                        w2(4+ph);
+                        ph = 1 - ph;
+                }
+                w2(7); w2(4);
+                break;
+
+        case 3: if (count < 512) WR(0x84,1);
+		w3(0x40);
+                for (k=0;k<count;k++) w4(buf[k]);
+		if (count < 512) WR(0x84,0);
+                break;
+
+        case 4: if (count < 512) WR(0x84,1);
+		w3(0x40);
+                for (k=0;k<count/2;k++) w4w(((u16 *)buf)[k]);
+		if (count < 512) WR(0x84,0);
+                break;
+
+        case 5: if (count < 512) WR(0x84,1);
+		w3(0x40);
+                for (k=0;k<count/4;k++) w4l(((u32 *)buf)[k]);
+		if (count < 512) WR(0x84,0);
+                break;
+
+        }
+
+}
+
+static int epia_test_proto( PIA *pi, char * scratch, int verbose )
+
+{       int     j, k, f;
+	int	e[2] = {0,0};
+
+        epia_connect(pi);
+        for (j=0;j<2;j++) {
+            WR(6,0xa0+j*0x10);
+            for (k=0;k<256;k++) {
+                WR(2,k^0xaa);
+                WR(3,k^0x55);
+                if (RR(2) != (k^0xaa)) e[j]++;
+                }
+	    WR(2,1); WR(3,1);
+            }
+        epia_disconnect(pi);
+
+        f = 0;
+        epia_connect(pi);
+        WR(0x84,8);
+        epia_read_block(pi,scratch,512);
+        for (k=0;k<256;k++) {
+            if ((scratch[2*k] & 0xff) != ((k+1) & 0xff)) f++;
+            if ((scratch[2*k+1] & 0xff) != ((-2-k) & 0xff)) f++;
+        }
+        WR(0x84,0);
+        epia_disconnect(pi);
+
+        if (verbose)  {
+            printk("%s: epia: port 0x%x, mode %d, test=(%d,%d,%d)\n",
+                   pi->device,pi->port,pi->mode,e[0],e[1],f);
+        }
+        
+        return (e[0] && e[1]) || f;
+
+}
+
+
+static void epia_log_adapter( PIA *pi, char * scratch, int verbose )
+
+{       char    *mode_string[6] = {"4-bit","5/3","8-bit",
+				   "EPP-8","EPP-16","EPP-32"};
+
+        printk("%s: epia %s, Shuttle EPIA at 0x%x, ",
+                pi->device,EPIA_VERSION,pi->port);
+        printk("mode %d (%s), delay %d\n",pi->mode,
+		mode_string[pi->mode],pi->delay);
+
+}
+
+static struct pi_protocol epia = {
+	.owner		= THIS_MODULE,
+	.name		= "epia",
+	.max_mode	= 6,
+	.epp_first	= 3,
+	.default_delay	= 1,
+	.max_units	= 1,
+	.write_regr	= epia_write_regr,
+	.read_regr	= epia_read_regr,
+	.write_block	= epia_write_block,
+	.read_block	= epia_read_block,
+	.connect	= epia_connect,
+	.disconnect	= epia_disconnect,
+	.test_proto	= epia_test_proto,
+	.log_adapter	= epia_log_adapter,
+};
+
+static int __init epia_init(void)
+{
+	return paride_register(&epia);
+}
+
+static void __exit epia_exit(void)
+{
+	paride_unregister(&epia);
+}
+
+MODULE_LICENSE("GPL");
+module_init(epia_init)
+module_exit(epia_exit)
diff --git a/drivers/block/paride/fit2.c b/drivers/block/paride/fit2.c
new file mode 100644
index 00000000..38128375
--- /dev/null
+++ b/drivers/block/paride/fit2.c
@@ -0,0 +1,151 @@
+/* 
+        fit2.c        (c) 1998  Grant R. Guenther <grant@torque.net>
+                          Under the terms of the GNU General Public License.
+
+	fit2.c is a low-level protocol driver for the older version
+        of the Fidelity International Technology parallel port adapter.  
+	This adapter is used in their TransDisk 2000 and older TransDisk
+	3000 portable hard-drives.  As far as I can tell, this device
+	supports 4-bit mode _only_.  
+
+	Newer models of the FIT products use an enhanced protocol.
+	The "fit3" protocol module should support current drives.
+
+*/
+
+#define FIT2_VERSION      "1.0"
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/wait.h>
+#include <asm/io.h>
+
+#include "paride.h"
+
+#define j44(a,b)                (((a>>4)&0x0f)|(b&0xf0))
+
+/* cont = 0 - access the IDE register file 
+   cont = 1 - access the IDE command set 
+
+NB:  The FIT adapter does not appear to use the control registers.
+So, we map ALT_STATUS to STATUS and NO-OP writes to the device
+control register - this means that IDE reset will not work on these
+devices.
+
+*/
+
+static void  fit2_write_regr( PIA *pi, int cont, int regr, int val)
+
+{	if (cont == 1) return;
+	w2(0xc); w0(regr); w2(4); w0(val); w2(5); w0(0); w2(4);
+}
+
+static int fit2_read_regr( PIA *pi, int cont, int regr )
+
+{	int  a, b, r;
+
+	if (cont) {
+	  if (regr != 6) return 0xff;
+	  r = 7;
+	} else r = regr + 0x10;
+
+	w2(0xc); w0(r); w2(4); w2(5); 
+	         w0(0); a = r1();
+	         w0(1); b = r1();
+	w2(4);
+
+	return j44(a,b);
+
+}
+
+static void fit2_read_block( PIA *pi, char * buf, int count )
+
+{	int  k, a, b, c, d;
+
+	w2(0xc); w0(0x10);
+
+	for (k=0;k<count/4;k++) {
+
+		w2(4); w2(5);
+		w0(0); a = r1(); w0(1); b = r1();
+		w0(3); c = r1(); w0(2); d = r1(); 
+		buf[4*k+0] = j44(a,b);
+		buf[4*k+1] = j44(d,c);
+
+                w2(4); w2(5);
+                       a = r1(); w0(3); b = r1();
+                w0(1); c = r1(); w0(0); d = r1(); 
+                buf[4*k+2] = j44(d,c);
+                buf[4*k+3] = j44(a,b);
+
+	}
+
+	w2(4);
+
+}
+
+static void fit2_write_block( PIA *pi, char * buf, int count )
+
+{	int k;
+
+
+	w2(0xc); w0(0); 
+	for (k=0;k<count/2;k++) {
+		w2(4); w0(buf[2*k]); 
+		w2(5); w0(buf[2*k+1]);
+	}
+	w2(4);
+}
+
+static void fit2_connect ( PIA *pi  )
+
+{       pi->saved_r0 = r0();
+        pi->saved_r2 = r2();
+	w2(0xcc); 
+}
+
+static void fit2_disconnect ( PIA *pi )
+
+{       w0(pi->saved_r0);
+        w2(pi->saved_r2);
+} 
+
+static void fit2_log_adapter( PIA *pi, char * scratch, int verbose )
+
+{       printk("%s: fit2 %s, FIT 2000 adapter at 0x%x, delay %d\n",
+                pi->device,FIT2_VERSION,pi->port,pi->delay);
+
+}
+
+static struct pi_protocol fit2 = {
+	.owner		= THIS_MODULE,
+	.name		= "fit2",
+	.max_mode	= 1,
+	.epp_first	= 2,
+	.default_delay	= 1,
+	.max_units	= 1,
+	.write_regr	= fit2_write_regr,
+	.read_regr	= fit2_read_regr,
+	.write_block	= fit2_write_block,
+	.read_block	= fit2_read_block,
+	.connect	= fit2_connect,
+	.disconnect	= fit2_disconnect,
+	.log_adapter	= fit2_log_adapter,
+};
+
+static int __init fit2_init(void)
+{
+	return paride_register(&fit2);
+}
+
+static void __exit fit2_exit(void)
+{
+	paride_unregister(&fit2);
+}
+
+MODULE_LICENSE("GPL");
+module_init(fit2_init)
+module_exit(fit2_exit)
diff --git a/drivers/block/paride/fit3.c b/drivers/block/paride/fit3.c
new file mode 100644
index 00000000..275d2694
--- /dev/null
+++ b/drivers/block/paride/fit3.c
@@ -0,0 +1,211 @@
+/* 
+        fit3.c        (c) 1998  Grant R. Guenther <grant@torque.net>
+                          Under the terms of the GNU General Public License.
+
+	fit3.c is a low-level protocol driver for newer models 
+        of the Fidelity International Technology parallel port adapter.  
+	This adapter is used in their TransDisk 3000 portable 
+	hard-drives, as well as CD-ROM, PD-CD and other devices.
+
+	The TD-2000 and certain older devices use a different protocol.
+	Try the fit2 protocol module with them.
+
+        NB:  The FIT adapters do not appear to support the control 
+	registers.  So, we map ALT_STATUS to STATUS and NO-OP writes 
+	to the device control register - this means that IDE reset 
+	will not work on these devices.
+
+*/
+
+#define FIT3_VERSION      "1.0"
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/wait.h>
+#include <asm/io.h>
+
+#include "paride.h"
+
+#define j44(a,b)                (((a>>3)&0x0f)|((b<<1)&0xf0))
+
+#define w7(byte)                {out_p(7,byte);}
+#define r7()                    (in_p(7) & 0xff)
+
+/* cont = 0 - access the IDE register file 
+   cont = 1 - access the IDE command set 
+
+*/
+
+static void  fit3_write_regr( PIA *pi, int cont, int regr, int val)
+
+{	if (cont == 1) return;
+
+	switch (pi->mode) {
+
+	case 0:
+	case 1: w2(0xc); w0(regr); w2(0x8); w2(0xc); 
+		w0(val); w2(0xd); 
+		w0(0);   w2(0xc);
+		break;
+
+	case 2: w2(0xc); w0(regr); w2(0x8); w2(0xc);
+		w4(val); w4(0);
+		w2(0xc);
+		break;
+
+	}
+}
+
+static int fit3_read_regr( PIA *pi, int cont, int regr )
+
+{	int  a, b;
+
+	if (cont) {
+	  if (regr != 6) return 0xff;
+	  regr = 7;
+	} 
+
+	switch (pi->mode) {
+
+	case 0: w2(0xc); w0(regr + 0x10); w2(0x8); w2(0xc);
+		w2(0xd); a = r1();
+		w2(0xf); b = r1(); 
+		w2(0xc);
+		return j44(a,b);
+
+	case 1: w2(0xc); w0(regr + 0x90); w2(0x8); w2(0xc);
+		w2(0xec); w2(0xee); w2(0xef); a = r0(); 
+		w2(0xc);
+		return a;
+
+	case 2: w2(0xc); w0(regr + 0x90); w2(0x8); w2(0xc); 
+		w2(0xec); 
+		a = r4(); b = r4(); 
+		w2(0xc);
+		return a;
+
+	}
+	return -1; 
+
+}
+
+static void fit3_read_block( PIA *pi, char * buf, int count )
+
+{	int  k, a, b, c, d;
+
+	switch (pi->mode) {
+
+	case 0: w2(0xc); w0(0x10); w2(0x8); w2(0xc);
+		for (k=0;k<count/2;k++) {
+		    w2(0xd); a = r1();
+		    w2(0xf); b = r1();
+		    w2(0xc); c = r1();
+		    w2(0xe); d = r1();
+		    buf[2*k  ] = j44(a,b);
+		    buf[2*k+1] = j44(c,d);
+		}
+		w2(0xc);
+		break;
+
+	case 1: w2(0xc); w0(0x90); w2(0x8); w2(0xc); 
+		w2(0xec); w2(0xee);
+		for (k=0;k<count/2;k++) {
+		    w2(0xef); a = r0();
+		    w2(0xee); b = r0();
+                    buf[2*k  ] = a;
+                    buf[2*k+1] = b;
+		}
+		w2(0xec); 
+		w2(0xc);
+		break;
+
+	case 2: w2(0xc); w0(0x90); w2(0x8); w2(0xc); 
+                w2(0xec);
+		for (k=0;k<count;k++) buf[k] = r4();
+                w2(0xc);
+		break;
+
+	}
+}
+
+static void fit3_write_block( PIA *pi, char * buf, int count )
+
+{	int k;
+
+        switch (pi->mode) {
+
+	case 0:
+        case 1: w2(0xc); w0(0); w2(0x8); w2(0xc);
+                for (k=0;k<count/2;k++) {
+ 		    w0(buf[2*k  ]); w2(0xd);
+ 		    w0(buf[2*k+1]); w2(0xc);
+		}
+		break;
+
+        case 2: w2(0xc); w0(0); w2(0x8); w2(0xc); 
+                for (k=0;k<count;k++) w4(buf[k]);
+                w2(0xc);
+		break;
+	}
+}
+
+static void fit3_connect ( PIA *pi  )
+
+{       pi->saved_r0 = r0();
+        pi->saved_r2 = r2();
+	w2(0xc); w0(0); w2(0xa);
+	if (pi->mode == 2) { 
+		w2(0xc); w0(0x9); w2(0x8); w2(0xc); 
+		}
+}
+
+static void fit3_disconnect ( PIA *pi )
+
+{       w2(0xc); w0(0xa); w2(0x8); w2(0xc);
+	w0(pi->saved_r0);
+        w2(pi->saved_r2);
+} 
+
+static void fit3_log_adapter( PIA *pi, char * scratch, int verbose )
+
+{       char    *mode_string[3] = {"4-bit","8-bit","EPP"};
+
+	printk("%s: fit3 %s, FIT 3000 adapter at 0x%x, "
+	       "mode %d (%s), delay %d\n",
+                pi->device,FIT3_VERSION,pi->port,
+		pi->mode,mode_string[pi->mode],pi->delay);
+
+}
+
+static struct pi_protocol fit3 = {
+	.owner		= THIS_MODULE,
+	.name		= "fit3",
+	.max_mode	= 3,
+	.epp_first	= 2,
+	.default_delay	= 1,
+	.max_units	= 1,
+	.write_regr	= fit3_write_regr,
+	.read_regr	= fit3_read_regr,
+	.write_block	= fit3_write_block,
+	.read_block	= fit3_read_block,
+	.connect	= fit3_connect,
+	.disconnect	= fit3_disconnect,
+	.log_adapter	= fit3_log_adapter,
+};
+
+static int __init fit3_init(void)
+{
+	return paride_register(&fit3);
+}
+
+static void __exit fit3_exit(void)
+{
+	paride_unregister(&fit3);
+}
+
+MODULE_LICENSE("GPL");
+module_init(fit3_init)
+module_exit(fit3_exit)
diff --git a/drivers/block/paride/friq.c b/drivers/block/paride/friq.c
new file mode 100644
index 00000000..4f2ba244
--- /dev/null
+++ b/drivers/block/paride/friq.c
@@ -0,0 +1,276 @@
+/* 
+	friq.c	(c) 1998    Grant R. Guenther <grant@torque.net>
+		            Under the terms of the GNU General Public License
+
+	friq.c is a low-level protocol driver for the Freecom "IQ"
+	parallel port IDE adapter.   Early versions of this adapter
+	use the 'frpw' protocol.
+	
+	Freecom uses this adapter in a battery powered external 
+	CD-ROM drive.  It is also used in LS-120 drives by
+	Maxell and Panasonic, and other devices.
+
+	The battery powered drive requires software support to
+	control the power to the drive.  This module enables the
+	drive power when the high level driver (pcd) is loaded
+	and disables it when the module is unloaded.  Note, if
+	the friq module is built in to the kernel, the power
+	will never be switched off, so other means should be
+	used to conserve battery power.
+
+*/
+
+/* Changes:
+
+	1.01	GRG 1998.12.20	 Added support for soft power switch
+*/
+
+#define	FRIQ_VERSION	"1.01" 
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/wait.h>
+#include <asm/io.h>
+
+#include "paride.h"
+
+#define CMD(x)		w2(4);w0(0xff);w0(0xff);w0(0x73);w0(0x73);\
+			w0(0xc9);w0(0xc9);w0(0x26);w0(0x26);w0(x);w0(x);
+
+#define j44(l,h)	(((l>>4)&0x0f)|(h&0xf0))
+
+/* cont = 0 - access the IDE register file 
+   cont = 1 - access the IDE command set 
+*/
+
+static int  cont_map[2] = { 0x08, 0x10 };
+
+static int friq_read_regr( PIA *pi, int cont, int regr )
+
+{	int	h,l,r;
+
+	r = regr + cont_map[cont];
+
+	CMD(r);
+	w2(6); l = r1();
+	w2(4); h = r1();
+	w2(4); 
+
+	return j44(l,h);
+
+}
+
+static void friq_write_regr( PIA *pi, int cont, int regr, int val)
+
+{	int r;
+
+        r = regr + cont_map[cont];
+
+	CMD(r);
+	w0(val);
+	w2(5);w2(7);w2(5);w2(4);
+}
+
+static void friq_read_block_int( PIA *pi, char * buf, int count, int regr )
+
+{       int     h, l, k, ph;
+
+        switch(pi->mode) {
+
+        case 0: CMD(regr); 
+                for (k=0;k<count;k++) {
+                        w2(6); l = r1();
+                        w2(4); h = r1();
+                        buf[k] = j44(l,h);
+                }
+                w2(4);
+                break;
+
+        case 1: ph = 2;
+                CMD(regr+0xc0); 
+                w0(0xff);
+                for (k=0;k<count;k++) {
+                        w2(0xa4 + ph); 
+                        buf[k] = r0();
+                        ph = 2 - ph;
+                } 
+                w2(0xac); w2(0xa4); w2(4);
+                break;
+
+	case 2: CMD(regr+0x80);
+		for (k=0;k<count-2;k++) buf[k] = r4();
+		w2(0xac); w2(0xa4);
+		buf[count-2] = r4();
+		buf[count-1] = r4();
+		w2(4);
+		break;
+
+	case 3: CMD(regr+0x80);
+                for (k=0;k<(count/2)-1;k++) ((u16 *)buf)[k] = r4w();
+                w2(0xac); w2(0xa4);
+                buf[count-2] = r4();
+                buf[count-1] = r4();
+                w2(4);
+                break;
+
+	case 4: CMD(regr+0x80);
+                for (k=0;k<(count/4)-1;k++) ((u32 *)buf)[k] = r4l();
+                buf[count-4] = r4();
+                buf[count-3] = r4();
+                w2(0xac); w2(0xa4);
+                buf[count-2] = r4();
+                buf[count-1] = r4();
+                w2(4);
+                break;
+
+        }
+}
+
+static void friq_read_block( PIA *pi, char * buf, int count)
+
+{	friq_read_block_int(pi,buf,count,0x08);
+}
+
+static void friq_write_block( PIA *pi, char * buf, int count )
+ 
+{	int	k;
+
+	switch(pi->mode) {
+
+	case 0:
+	case 1: CMD(8); w2(5);
+        	for (k=0;k<count;k++) {
+			w0(buf[k]);
+			w2(7);w2(5);
+		}
+		w2(4);
+		break;
+
+	case 2: CMD(0xc8); w2(5);
+		for (k=0;k<count;k++) w4(buf[k]);
+		w2(4);
+		break;
+
+        case 3: CMD(0xc8); w2(5);
+                for (k=0;k<count/2;k++) w4w(((u16 *)buf)[k]);
+                w2(4);
+                break;
+
+        case 4: CMD(0xc8); w2(5);
+                for (k=0;k<count/4;k++) w4l(((u32 *)buf)[k]);
+                w2(4);
+                break;
+	}
+}
+
+static void friq_connect ( PIA *pi  )
+
+{       pi->saved_r0 = r0();
+        pi->saved_r2 = r2();
+	w2(4);
+}
+
+static void friq_disconnect ( PIA *pi )
+
+{       CMD(0x20);
+	w0(pi->saved_r0);
+        w2(pi->saved_r2);
+} 
+
+static int friq_test_proto( PIA *pi, char * scratch, int verbose )
+
+{       int     j, k, r;
+	int	e[2] = {0,0};
+
+	pi->saved_r0 = r0();	
+	w0(0xff); udelay(20); CMD(0x3d); /* turn the power on */
+	udelay(500);
+	w0(pi->saved_r0);
+
+	friq_connect(pi);
+	for (j=0;j<2;j++) {
+                friq_write_regr(pi,0,6,0xa0+j*0x10);
+                for (k=0;k<256;k++) {
+                        friq_write_regr(pi,0,2,k^0xaa);
+                        friq_write_regr(pi,0,3,k^0x55);
+                        if (friq_read_regr(pi,0,2) != (k^0xaa)) e[j]++;
+                        }
+                }
+	friq_disconnect(pi);
+
+	friq_connect(pi);
+        friq_read_block_int(pi,scratch,512,0x10);
+        r = 0;
+        for (k=0;k<128;k++) if (scratch[k] != k) r++;
+	friq_disconnect(pi);
+
+        if (verbose)  {
+            printk("%s: friq: port 0x%x, mode %d, test=(%d,%d,%d)\n",
+                   pi->device,pi->port,pi->mode,e[0],e[1],r);
+        }
+
+        return (r || (e[0] && e[1]));
+}
+
+
+static void friq_log_adapter( PIA *pi, char * scratch, int verbose )
+
+{       char    *mode_string[6] = {"4-bit","8-bit",
+				   "EPP-8","EPP-16","EPP-32"};
+
+        printk("%s: friq %s, Freecom IQ ASIC-2 adapter at 0x%x, ", pi->device,
+		FRIQ_VERSION,pi->port);
+        printk("mode %d (%s), delay %d\n",pi->mode,
+		mode_string[pi->mode],pi->delay);
+
+	pi->private = 1;
+	friq_connect(pi);
+	CMD(0x9e);  		/* disable sleep timer */
+	friq_disconnect(pi);
+
+}
+
+static void friq_release_proto( PIA *pi)
+{
+	if (pi->private) {		/* turn off the power */
+		friq_connect(pi);
+		CMD(0x1d); CMD(0x1e);
+		friq_disconnect(pi);
+		pi->private = 0;
+	}
+}
+
+static struct pi_protocol friq = {
+	.owner		= THIS_MODULE,
+	.name		= "friq",
+	.max_mode	= 5,
+	.epp_first	= 2,
+	.default_delay	= 1,
+	.max_units	= 1,
+	.write_regr	= friq_write_regr,
+	.read_regr	= friq_read_regr,
+	.write_block	= friq_write_block,
+	.read_block	= friq_read_block,
+	.connect	= friq_connect,
+	.disconnect	= friq_disconnect,
+	.test_proto	= friq_test_proto,
+	.log_adapter	= friq_log_adapter,
+	.release_proto	= friq_release_proto,
+};
+
+static int __init friq_init(void)
+{
+	return paride_register(&friq);
+}
+
+static void __exit friq_exit(void)
+{
+	paride_unregister(&friq);
+}
+
+MODULE_LICENSE("GPL");
+module_init(friq_init)
+module_exit(friq_exit)
diff --git a/drivers/block/paride/frpw.c b/drivers/block/paride/frpw.c
new file mode 100644
index 00000000..c3cde364
--- /dev/null
+++ b/drivers/block/paride/frpw.c
@@ -0,0 +1,313 @@
+/* 
+	frpw.c	(c) 1996-8  Grant R. Guenther <grant@torque.net>
+		            Under the terms of the GNU General Public License
+
+	frpw.c is a low-level protocol driver for the Freecom "Power"
+	parallel port IDE adapter.
+	
+	Some applications of this adapter may require a "printer" reset
+	prior to loading the driver.  This can be done by loading and
+	unloading the "lp" driver, or it can be done by this driver
+	if you define FRPW_HARD_RESET.  The latter is not recommended
+	as it may upset devices on other ports.
+
+*/
+
+/* Changes:
+
+        1.01    GRG 1998.05.06 init_proto, release_proto
+			       fix chip detect
+			       added EPP-16 and EPP-32
+	1.02    GRG 1998.09.23 added hard reset to initialisation process
+	1.03    GRG 1998.12.14 made hard reset conditional
+
+*/
+
+#define	FRPW_VERSION	"1.03" 
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/wait.h>
+#include <asm/io.h>
+
+#include "paride.h"
+
+#define cec4		w2(0xc);w2(0xe);w2(0xe);w2(0xc);w2(4);w2(4);w2(4);
+#define j44(l,h)	(((l>>4)&0x0f)|(h&0xf0))
+
+/* cont = 0 - access the IDE register file 
+   cont = 1 - access the IDE command set 
+*/
+
+static int  cont_map[2] = { 0x08, 0x10 };
+
+static int frpw_read_regr( PIA *pi, int cont, int regr )
+
+{	int	h,l,r;
+
+	r = regr + cont_map[cont];
+
+	w2(4);
+	w0(r); cec4;
+	w2(6); l = r1();
+	w2(4); h = r1();
+	w2(4); 
+
+	return j44(l,h);
+
+}
+
+static void frpw_write_regr( PIA *pi, int cont, int regr, int val)
+
+{	int r;
+
+        r = regr + cont_map[cont];
+
+	w2(4); w0(r); cec4; 
+	w0(val);
+	w2(5);w2(7);w2(5);w2(4);
+}
+
+static void frpw_read_block_int( PIA *pi, char * buf, int count, int regr )
+
+{       int     h, l, k, ph;
+
+        switch(pi->mode) {
+
+        case 0: w2(4); w0(regr); cec4;
+                for (k=0;k<count;k++) {
+                        w2(6); l = r1();
+                        w2(4); h = r1();
+                        buf[k] = j44(l,h);
+                }
+                w2(4);
+                break;
+
+        case 1: ph = 2;
+                w2(4); w0(regr + 0xc0); cec4;
+                w0(0xff);
+                for (k=0;k<count;k++) {
+                        w2(0xa4 + ph); 
+                        buf[k] = r0();
+                        ph = 2 - ph;
+                } 
+                w2(0xac); w2(0xa4); w2(4);
+                break;
+
+        case 2: w2(4); w0(regr + 0x80); cec4;
+                for (k=0;k<count;k++) buf[k] = r4();
+                w2(0xac); w2(0xa4);
+                w2(4);
+                break;
+
+	case 3: w2(4); w0(regr + 0x80); cec4;
+		for (k=0;k<count-2;k++) buf[k] = r4();
+		w2(0xac); w2(0xa4);
+		buf[count-2] = r4();
+		buf[count-1] = r4();
+		w2(4);
+		break;
+
+	case 4: w2(4); w0(regr + 0x80); cec4;
+                for (k=0;k<(count/2)-1;k++) ((u16 *)buf)[k] = r4w();
+                w2(0xac); w2(0xa4);
+                buf[count-2] = r4();
+                buf[count-1] = r4();
+                w2(4);
+                break;
+
+	case 5: w2(4); w0(regr + 0x80); cec4;
+                for (k=0;k<(count/4)-1;k++) ((u32 *)buf)[k] = r4l();
+                buf[count-4] = r4();
+                buf[count-3] = r4();
+                w2(0xac); w2(0xa4);
+                buf[count-2] = r4();
+                buf[count-1] = r4();
+                w2(4);
+                break;
+
+        }
+}
+
+static void frpw_read_block( PIA *pi, char * buf, int count)
+
+{	frpw_read_block_int(pi,buf,count,0x08);
+}
+
+static void frpw_write_block( PIA *pi, char * buf, int count )
+ 
+{	int	k;
+
+	switch(pi->mode) {
+
+	case 0:
+	case 1:
+	case 2: w2(4); w0(8); cec4; w2(5);
+        	for (k=0;k<count;k++) {
+			w0(buf[k]);
+			w2(7);w2(5);
+		}
+		w2(4);
+		break;
+
+	case 3: w2(4); w0(0xc8); cec4; w2(5);
+		for (k=0;k<count;k++) w4(buf[k]);
+		w2(4);
+		break;
+
+        case 4: w2(4); w0(0xc8); cec4; w2(5);
+                for (k=0;k<count/2;k++) w4w(((u16 *)buf)[k]);
+                w2(4);
+                break;
+
+        case 5: w2(4); w0(0xc8); cec4; w2(5);
+                for (k=0;k<count/4;k++) w4l(((u32 *)buf)[k]);
+                w2(4);
+                break;
+	}
+}
+
+static void frpw_connect ( PIA *pi  )
+
+{       pi->saved_r0 = r0();
+        pi->saved_r2 = r2();
+	w2(4);
+}
+
+static void frpw_disconnect ( PIA *pi )
+
+{       w2(4); w0(0x20); cec4;
+	w0(pi->saved_r0);
+        w2(pi->saved_r2);
+} 
+
+/* Stub logic to see if PNP string is available - used to distinguish
+   between the Xilinx and ASIC implementations of the Freecom adapter.
+*/
+
+static int frpw_test_pnp ( PIA *pi )
+
+/*  returns chip_type:   0 = Xilinx, 1 = ASIC   */
+
+{	int olddelay, a, b;
+
+#ifdef FRPW_HARD_RESET
+        w0(0); w2(8); udelay(50); w2(0xc);   /* parallel bus reset */
+        mdelay(1500);
+#endif
+
+	olddelay = pi->delay;
+	pi->delay = 10;
+
+	pi->saved_r0 = r0();
+        pi->saved_r2 = r2();
+	
+	w2(4); w0(4); w2(6); w2(7);
+	a = r1() & 0xff; w2(4); b = r1() & 0xff;
+	w2(0xc); w2(0xe); w2(4);
+
+	pi->delay = olddelay;
+        w0(pi->saved_r0);
+        w2(pi->saved_r2);
+
+	return ((~a&0x40) && (b&0x40));
+} 
+
+/* We use the pi->private to remember the result of the PNP test.
+   To make this work, private = port*2 + chip.  Yes, I know it's
+   a hack :-(
+*/
+
+static int frpw_test_proto( PIA *pi, char * scratch, int verbose )
+
+{       int     j, k, r;
+	int	e[2] = {0,0};
+
+	if ((pi->private>>1) != pi->port)
+	   pi->private = frpw_test_pnp(pi) + 2*pi->port;
+
+	if (((pi->private%2) == 0) && (pi->mode > 2)) {
+	   if (verbose) 
+		printk("%s: frpw: Xilinx does not support mode %d\n",
+			pi->device, pi->mode);
+	   return 1;
+	}
+
+	if (((pi->private%2) == 1) && (pi->mode == 2)) {
+	   if (verbose)
+		printk("%s: frpw: ASIC does not support mode 2\n",
+			pi->device);
+	   return 1;
+	}
+
+	frpw_connect(pi);
+	for (j=0;j<2;j++) {
+                frpw_write_regr(pi,0,6,0xa0+j*0x10);
+                for (k=0;k<256;k++) {
+                        frpw_write_regr(pi,0,2,k^0xaa);
+                        frpw_write_regr(pi,0,3,k^0x55);
+                        if (frpw_read_regr(pi,0,2) != (k^0xaa)) e[j]++;
+                        }
+                }
+	frpw_disconnect(pi);
+
+	frpw_connect(pi);
+        frpw_read_block_int(pi,scratch,512,0x10);
+        r = 0;
+        for (k=0;k<128;k++) if (scratch[k] != k) r++;
+	frpw_disconnect(pi);
+
+        if (verbose)  {
+            printk("%s: frpw: port 0x%x, chip %ld, mode %d, test=(%d,%d,%d)\n",
+                   pi->device,pi->port,(pi->private%2),pi->mode,e[0],e[1],r);
+        }
+
+        return (r || (e[0] && e[1]));
+}
+
+
+static void frpw_log_adapter( PIA *pi, char * scratch, int verbose )
+
+{       char    *mode_string[6] = {"4-bit","8-bit","EPP",
+				   "EPP-8","EPP-16","EPP-32"};
+
+        printk("%s: frpw %s, Freecom (%s) adapter at 0x%x, ", pi->device,
+		FRPW_VERSION,((pi->private%2) == 0)?"Xilinx":"ASIC",pi->port);
+        printk("mode %d (%s), delay %d\n",pi->mode,
+		mode_string[pi->mode],pi->delay);
+
+}
+
+static struct pi_protocol frpw = {
+	.owner		= THIS_MODULE,
+	.name		= "frpw",
+	.max_mode	= 6,
+	.epp_first	= 2,
+	.default_delay	= 2,
+	.max_units	= 1,
+	.write_regr	= frpw_write_regr,
+	.read_regr	= frpw_read_regr,
+	.write_block	= frpw_write_block,
+	.read_block	= frpw_read_block,
+	.connect	= frpw_connect,
+	.disconnect	= frpw_disconnect,
+	.test_proto	= frpw_test_proto,
+	.log_adapter	= frpw_log_adapter,
+};
+
+static int __init frpw_init(void)
+{
+	return paride_register(&frpw);
+}
+
+static void __exit frpw_exit(void)
+{
+	paride_unregister(&frpw);
+}
+
+MODULE_LICENSE("GPL");
+module_init(frpw_init)
+module_exit(frpw_exit)
diff --git a/drivers/block/paride/kbic.c b/drivers/block/paride/kbic.c
new file mode 100644
index 00000000..35999c41
--- /dev/null
+++ b/drivers/block/paride/kbic.c
@@ -0,0 +1,305 @@
+/*
+        kbic.c    (c) 1997-8  Grant R. Guenther <grant@torque.net>
+                              Under the terms of the GNU General Public License.
+
+        This is a low-level driver for the KBIC-951A and KBIC-971A
+        parallel to IDE adapter chips from KingByte Information Systems.
+
+	The chips are almost identical, however, the wakeup code 
+	required for the 971A interferes with the correct operation of
+        the 951A, so this driver registers itself twice, once for
+	each chip.
+
+*/
+
+/* Changes:
+
+        1.01    GRG 1998.05.06 init_proto, release_proto
+
+*/
+
+#define KBIC_VERSION      "1.01"
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/wait.h>
+#include <asm/io.h>
+
+#include "paride.h"
+
+#define r12w()			(delay_p,inw(pi->port+1)&0xffff) 
+
+#define j44(a,b)                ((((a>>4)&0x0f)|(b&0xf0))^0x88)
+#define j53(w)                  (((w>>3)&0x1f)|((w>>4)&0xe0))
+
+
+/* cont = 0 - access the IDE register file 
+   cont = 1 - access the IDE command set 
+*/
+
+static int  cont_map[2] = { 0x80, 0x40 };
+
+static int kbic_read_regr( PIA *pi, int cont, int regr )
+
+{       int     a, b, s;
+
+        s = cont_map[cont];
+
+	switch (pi->mode) {
+
+	case 0: w0(regr|0x18|s); w2(4); w2(6); w2(4); w2(1); w0(8);
+	        a = r1(); w0(0x28); b = r1(); w2(4);
+		return j44(a,b);
+
+	case 1: w0(regr|0x38|s); w2(4); w2(6); w2(4); w2(5); w0(8);
+		a = r12w(); w2(4);
+		return j53(a);
+
+	case 2: w0(regr|0x08|s); w2(4); w2(6); w2(4); w2(0xa5); w2(0xa1);
+		a = r0(); w2(4);
+       		return a;
+
+	case 3:
+	case 4:
+	case 5: w0(0x20|s); w2(4); w2(6); w2(4); w3(regr);
+		a = r4(); b = r4(); w2(4); w2(0); w2(4);
+		return a;
+
+	}
+	return -1;
+}       
+
+static void  kbic_write_regr( PIA *pi, int cont, int regr, int val)
+
+{       int  s;
+
+        s = cont_map[cont];
+
+        switch (pi->mode) {
+
+	case 0: 
+        case 1:
+	case 2:	w0(regr|0x10|s); w2(4); w2(6); w2(4); 
+		w0(val); w2(5); w2(4);
+		break;
+
+	case 3:
+	case 4:
+	case 5: w0(0x20|s); w2(4); w2(6); w2(4); w3(regr);
+		w4(val); w4(val);
+		w2(4); w2(0); w2(4);
+                break;
+
+	}
+}
+
+static void k951_connect ( PIA *pi  )
+
+{ 	pi->saved_r0 = r0();
+        pi->saved_r2 = r2();
+        w2(4); 
+}
+
+static void k951_disconnect ( PIA *pi )
+
+{      	w0(pi->saved_r0);
+        w2(pi->saved_r2);
+}
+
+#define	CCP(x)	w2(0xc4);w0(0xaa);w0(0x55);w0(0);w0(0xff);w0(0x87);\
+		w0(0x78);w0(x);w2(0xc5);w2(0xc4);w0(0xff);
+
+static void k971_connect ( PIA *pi  )
+
+{ 	pi->saved_r0 = r0();
+        pi->saved_r2 = r2();
+	CCP(0x20);
+        w2(4); 
+}
+
+static void k971_disconnect ( PIA *pi )
+
+{       CCP(0x30);
+	w0(pi->saved_r0);
+        w2(pi->saved_r2);
+}
+
+/* counts must be congruent to 0 MOD 4, but all known applications
+   have this property.
+*/
+
+static void kbic_read_block( PIA *pi, char * buf, int count )
+
+{       int     k, a, b;
+
+        switch (pi->mode) {
+
+        case 0: w0(0x98); w2(4); w2(6); w2(4);
+                for (k=0;k<count/2;k++) {
+			w2(1); w0(8);    a = r1();
+			       w0(0x28); b = r1();
+			buf[2*k]   = j44(a,b);
+			w2(5);           b = r1();
+			       w0(8);    a = r1();
+			buf[2*k+1] = j44(a,b);
+			w2(4);
+                } 
+                break;
+
+        case 1: w0(0xb8); w2(4); w2(6); w2(4); 
+                for (k=0;k<count/4;k++) {
+                        w0(0xb8); 
+			w2(4); w2(5); 
+                        w0(8);    buf[4*k]   = j53(r12w());
+			w0(0xb8); buf[4*k+1] = j53(r12w());
+			w2(4); w2(5);
+			          buf[4*k+3] = j53(r12w());
+			w0(8);    buf[4*k+2] = j53(r12w());
+                }
+                w2(4);
+                break;
+
+        case 2: w0(0x88); w2(4); w2(6); w2(4);
+                for (k=0;k<count/2;k++) {
+                        w2(0xa0); w2(0xa1); buf[2*k] = r0();
+                        w2(0xa5); buf[2*k+1] = r0();
+                }
+                w2(4);
+                break;
+
+        case 3: w0(0xa0); w2(4); w2(6); w2(4); w3(0);
+                for (k=0;k<count;k++) buf[k] = r4();
+                w2(4); w2(0); w2(4);
+                break;
+
+	case 4: w0(0xa0); w2(4); w2(6); w2(4); w3(0);
+                for (k=0;k<count/2;k++) ((u16 *)buf)[k] = r4w();
+                w2(4); w2(0); w2(4);
+                break;
+
+        case 5: w0(0xa0); w2(4); w2(6); w2(4); w3(0);
+                for (k=0;k<count/4;k++) ((u32 *)buf)[k] = r4l();
+                w2(4); w2(0); w2(4);
+                break;
+
+
+        }
+}
+
+static void kbic_write_block( PIA *pi, char * buf, int count )
+
+{       int     k;
+
+        switch (pi->mode) {
+
+        case 0:
+        case 1:
+        case 2: w0(0x90); w2(4); w2(6); w2(4); 
+		for(k=0;k<count/2;k++) {
+			w0(buf[2*k+1]); w2(0); w2(4); 
+			w0(buf[2*k]);   w2(5); w2(4); 
+		}
+		break;
+
+        case 3: w0(0xa0); w2(4); w2(6); w2(4); w3(0);
+		for(k=0;k<count/2;k++) {
+			w4(buf[2*k+1]); 
+                        w4(buf[2*k]);
+                }
+		w2(4); w2(0); w2(4);
+		break;
+
+	case 4: w0(0xa0); w2(4); w2(6); w2(4); w3(0);
+                for(k=0;k<count/2;k++) w4w(pi_swab16(buf,k));
+                w2(4); w2(0); w2(4);
+                break;
+
+        case 5: w0(0xa0); w2(4); w2(6); w2(4); w3(0);
+                for(k=0;k<count/4;k++) w4l(pi_swab32(buf,k));
+                w2(4); w2(0); w2(4);
+                break;
+
+        }
+
+}
+
+static void kbic_log_adapter( PIA *pi, char * scratch, 
+			      int verbose, char * chip )
+
+{       char    *mode_string[6] = {"4-bit","5/3","8-bit",
+				   "EPP-8","EPP_16","EPP-32"};
+
+        printk("%s: kbic %s, KingByte %s at 0x%x, ",
+                pi->device,KBIC_VERSION,chip,pi->port);
+        printk("mode %d (%s), delay %d\n",pi->mode,
+		mode_string[pi->mode],pi->delay);
+
+}
+
+static void k951_log_adapter( PIA *pi, char * scratch, int verbose )
+
+{	kbic_log_adapter(pi,scratch,verbose,"KBIC-951A");
+}
+
+static void k971_log_adapter( PIA *pi, char * scratch, int verbose )
+
+{       kbic_log_adapter(pi,scratch,verbose,"KBIC-971A");
+}
+
+static struct pi_protocol k951 = {
+	.owner		= THIS_MODULE,
+	.name		= "k951",
+	.max_mode	= 6,
+	.epp_first	= 3,
+	.default_delay	= 1,
+	.max_units	= 1,
+	.write_regr	= kbic_write_regr,
+	.read_regr	= kbic_read_regr,
+	.write_block	= kbic_write_block,
+	.read_block	= kbic_read_block,
+	.connect	= k951_connect,
+	.disconnect	= k951_disconnect,
+	.log_adapter	= k951_log_adapter,
+};
+
+static struct pi_protocol k971 = {
+	.owner		= THIS_MODULE,
+	.name		= "k971",
+	.max_mode	= 6,
+	.epp_first	= 3,
+	.default_delay	= 1,
+	.max_units	= 1,
+	.write_regr	= kbic_write_regr,
+	.read_regr	= kbic_read_regr,
+	.write_block	= kbic_write_block,
+	.read_block	= kbic_read_block,
+	.connect	= k971_connect,
+	.disconnect	= k971_disconnect,
+	.log_adapter	= k971_log_adapter,
+};
+
+static int __init kbic_init(void)
+{
+	int rv;
+
+	rv = paride_register(&k951);
+	if (rv < 0)
+		return rv;
+	rv = paride_register(&k971);
+	if (rv < 0)
+		paride_unregister(&k951);
+	return rv;
+}
+
+static void __exit kbic_exit(void)
+{
+	paride_unregister(&k951);
+	paride_unregister(&k971);
+}
+
+MODULE_LICENSE("GPL");
+module_init(kbic_init)
+module_exit(kbic_exit)
diff --git a/drivers/block/paride/ktti.c b/drivers/block/paride/ktti.c
new file mode 100644
index 00000000..117ab0e8
--- /dev/null
+++ b/drivers/block/paride/ktti.c
@@ -0,0 +1,128 @@
+/* 
+        ktti.c        (c) 1998  Grant R. Guenther <grant@torque.net>
+                          Under the terms of the GNU General Public License.
+
+	ktti.c is a low-level protocol driver for the KT Technology
+	parallel port adapter.  This adapter is used in the "PHd" 
+        portable hard-drives.  As far as I can tell, this device
+	supports 4-bit mode _only_.  
+
+*/
+
+#define KTTI_VERSION      "1.0"
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/wait.h>
+#include <asm/io.h>
+
+#include "paride.h"
+
+#define j44(a,b)                (((a>>4)&0x0f)|(b&0xf0))
+
+/* cont = 0 - access the IDE register file 
+   cont = 1 - access the IDE command set 
+*/
+
+static int  cont_map[2] = { 0x10, 0x08 };
+
+static void  ktti_write_regr( PIA *pi, int cont, int regr, int val)
+
+{	int r;
+
+	r = regr + cont_map[cont];
+
+	w0(r); w2(0xb); w2(0xa); w2(3); w2(6); 
+	w0(val); w2(3); w0(0); w2(6); w2(0xb);
+}
+
+static int ktti_read_regr( PIA *pi, int cont, int regr )
+
+{	int  a, b, r;
+
+        r = regr + cont_map[cont];
+
+        w0(r); w2(0xb); w2(0xa); w2(9); w2(0xc); w2(9); 
+	a = r1(); w2(0xc);  b = r1(); w2(9); w2(0xc); w2(9);
+	return j44(a,b);
+
+}
+
+static void ktti_read_block( PIA *pi, char * buf, int count )
+
+{	int  k, a, b;
+
+	for (k=0;k<count/2;k++) {
+		w0(0x10); w2(0xb); w2(0xa); w2(9); w2(0xc); w2(9);
+		a = r1(); w2(0xc); b = r1(); w2(9);
+		buf[2*k] = j44(a,b);
+		a = r1(); w2(0xc); b = r1(); w2(9);
+		buf[2*k+1] = j44(a,b);
+	}
+}
+
+static void ktti_write_block( PIA *pi, char * buf, int count )
+
+{	int k;
+
+	for (k=0;k<count/2;k++) {
+		w0(0x10); w2(0xb); w2(0xa); w2(3); w2(6);
+		w0(buf[2*k]); w2(3);
+		w0(buf[2*k+1]); w2(6);
+		w2(0xb);
+	}
+}
+
+static void ktti_connect ( PIA *pi  )
+
+{       pi->saved_r0 = r0();
+        pi->saved_r2 = r2();
+	w2(0xb); w2(0xa); w0(0); w2(3); w2(6);	
+}
+
+static void ktti_disconnect ( PIA *pi )
+
+{       w2(0xb); w2(0xa); w0(0xa0); w2(3); w2(4);
+	w0(pi->saved_r0);
+        w2(pi->saved_r2);
+} 
+
+static void ktti_log_adapter( PIA *pi, char * scratch, int verbose )
+
+{       printk("%s: ktti %s, KT adapter at 0x%x, delay %d\n",
+                pi->device,KTTI_VERSION,pi->port,pi->delay);
+
+}
+
+static struct pi_protocol ktti = {
+	.owner		= THIS_MODULE,
+	.name		= "ktti",
+	.max_mode	= 1,
+	.epp_first	= 2,
+	.default_delay	= 1,
+	.max_units	= 1,
+	.write_regr	= ktti_write_regr,
+	.read_regr	= ktti_read_regr,
+	.write_block	= ktti_write_block,
+	.read_block	= ktti_read_block,
+	.connect	= ktti_connect,
+	.disconnect	= ktti_disconnect,
+	.log_adapter	= ktti_log_adapter,
+};
+
+static int __init ktti_init(void)
+{
+	return paride_register(&ktti);
+}
+
+static void __exit ktti_exit(void)
+{
+	paride_unregister(&ktti);
+}
+
+MODULE_LICENSE("GPL");
+module_init(ktti_init)
+module_exit(ktti_exit)
diff --git a/drivers/block/paride/mkd b/drivers/block/paride/mkd
new file mode 100644
index 00000000..971f099b
--- /dev/null
+++ b/drivers/block/paride/mkd
@@ -0,0 +1,30 @@
+#!/bin/bash
+#
+# mkd -- a script to create the device special files for the PARIDE subsystem
+#
+#  block devices:  	pd (45), pcd (46), pf (47)
+#  character devices:	pt (96), pg (97)
+#
+function mkdev {
+  mknod $1 $2 $3 $4 ; chmod 0660 $1 ; chown root:disk $1
+}
+#
+function pd {
+  D=$( printf \\$( printf "x%03x" $[ $1 + 97 ] ) )
+  mkdev pd$D b 45 $[ $1 * 16 ]
+  for P in 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
+  do mkdev pd$D$P b 45 $[ $1 * 16 + $P ]
+  done
+}
+#
+cd /dev
+#
+for u in 0 1 2 3 ; do pd $u ; done
+for u in 0 1 2 3 ; do mkdev pcd$u b 46 $u ; done 
+for u in 0 1 2 3 ; do mkdev pf$u  b 47 $u ; done 
+for u in 0 1 2 3 ; do mkdev pt$u  c 96 $u ; done 
+for u in 0 1 2 3 ; do mkdev npt$u c 96 $[ $u + 128 ] ; done 
+for u in 0 1 2 3 ; do mkdev pg$u  c 97 $u ; done
+#
+# end of mkd
+
diff --git a/drivers/block/paride/on20.c b/drivers/block/paride/on20.c
new file mode 100644
index 00000000..0173697a
--- /dev/null
+++ b/drivers/block/paride/on20.c
@@ -0,0 +1,153 @@
+/* 
+	on20.c	(c) 1996-8  Grant R. Guenther <grant@torque.net>
+		            Under the terms of the GNU General Public License.
+
+        on20.c is a low-level protocol driver for the
+        Onspec 90c20 parallel to IDE adapter. 
+*/
+
+/* Changes:
+
+        1.01    GRG 1998.05.06 init_proto, release_proto
+
+*/
+
+#define	ON20_VERSION	"1.01"
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/wait.h>
+#include <asm/io.h>
+
+#include "paride.h"
+
+#define op(f)	w2(4);w0(f);w2(5);w2(0xd);w2(5);w2(0xd);w2(5);w2(4);
+#define vl(v)	w2(4);w0(v);w2(5);w2(7);w2(5);w2(4);
+
+#define j44(a,b)  (((a>>4)&0x0f)|(b&0xf0))
+
+/* cont = 0 - access the IDE register file 
+   cont = 1 - access the IDE command set 
+*/
+
+static int on20_read_regr( PIA *pi, int cont, int regr )
+
+{	int h,l, r ;
+
+        r = (regr<<2) + 1 + cont;
+
+        op(1); vl(r); op(0);
+
+	switch (pi->mode)  {
+
+        case 0:  w2(4); w2(6); l = r1();
+                 w2(4); w2(6); h = r1();
+                 w2(4); w2(6); w2(4); w2(6); w2(4);
+		 return j44(l,h);
+
+	case 1:  w2(4); w2(0x26); r = r0(); 
+                 w2(4); w2(0x26); w2(4);
+		 return r;
+
+	}
+	return -1;
+}	
+
+static void on20_write_regr( PIA *pi, int cont, int regr, int val )
+
+{	int r;
+
+	r = (regr<<2) + 1 + cont;
+
+	op(1); vl(r); 
+	op(0); vl(val); 
+	op(0); vl(val);
+}
+
+static void on20_connect ( PIA *pi)
+
+{	pi->saved_r0 = r0();
+        pi->saved_r2 = r2();
+
+	w2(4);w0(0);w2(0xc);w2(4);w2(6);w2(4);w2(6);w2(4); 
+	if (pi->mode) { op(2); vl(8); op(2); vl(9); }
+	       else   { op(2); vl(0); op(2); vl(8); }
+}
+
+static void on20_disconnect ( PIA *pi )
+
+{	w2(4);w0(7);w2(4);w2(0xc);w2(4);
+        w0(pi->saved_r0);
+        w2(pi->saved_r2);
+} 
+
+static void on20_read_block( PIA *pi, char * buf, int count )
+
+{	int     k, l, h; 
+
+	op(1); vl(1); op(0);
+
+	for (k=0;k<count;k++) 
+	    if (pi->mode) {
+		w2(4); w2(0x26); buf[k] = r0();
+	    } else {
+		w2(6); l = r1(); w2(4);
+		w2(6); h = r1(); w2(4);
+		buf[k] = j44(l,h);
+	    }
+	w2(4);
+}
+
+static void on20_write_block(  PIA *pi, char * buf, int count )
+
+{	int	k;
+
+	op(1); vl(1); op(0);
+
+	for (k=0;k<count;k++) { w2(5); w0(buf[k]); w2(7); }
+	w2(4);
+}
+
+static void on20_log_adapter( PIA *pi, char * scratch, int verbose )
+
+{       char    *mode_string[2] = {"4-bit","8-bit"};
+
+        printk("%s: on20 %s, OnSpec 90c20 at 0x%x, ",
+                pi->device,ON20_VERSION,pi->port);
+        printk("mode %d (%s), delay %d\n",pi->mode,
+		mode_string[pi->mode],pi->delay);
+
+}
+
+static struct pi_protocol on20 = {
+	.owner		= THIS_MODULE,
+	.name		= "on20",
+	.max_mode	= 2,
+	.epp_first	= 2,
+	.default_delay	= 1,
+	.max_units	= 1,
+	.write_regr	= on20_write_regr,
+	.read_regr	= on20_read_regr,
+	.write_block	= on20_write_block,
+	.read_block	= on20_read_block,
+	.connect	= on20_connect,
+	.disconnect	= on20_disconnect,
+	.log_adapter	= on20_log_adapter,
+};
+
+static int __init on20_init(void)
+{
+	return paride_register(&on20);
+}
+
+static void __exit on20_exit(void)
+{
+	paride_unregister(&on20);
+}
+
+MODULE_LICENSE("GPL");
+module_init(on20_init)
+module_exit(on20_exit)
diff --git a/drivers/block/paride/on26.c b/drivers/block/paride/on26.c
new file mode 100644
index 00000000..95ba2569
--- /dev/null
+++ b/drivers/block/paride/on26.c
@@ -0,0 +1,319 @@
+/* 
+        on26.c    (c) 1997-8  Grant R. Guenther <grant@torque.net>
+                              Under the terms of the GNU General Public License.
+
+        on26.c is a low-level protocol driver for the 
+        OnSpec 90c26 parallel to IDE adapter chip.
+
+*/
+
+/* Changes:
+
+        1.01    GRG 1998.05.06 init_proto, release_proto
+	1.02    GRG 1998.09.23 updates for the -E rev chip
+	1.03    GRG 1998.12.14 fix for slave drives
+	1.04    GRG 1998.12.20 yet another bug fix
+
+*/
+
+#define ON26_VERSION      "1.04"
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/wait.h>
+#include <asm/io.h>
+
+#include "paride.h"
+
+/* mode codes:  0  nybble reads, 8-bit writes
+                1  8-bit reads and writes
+                2  8-bit EPP mode
+		3  EPP-16
+		4  EPP-32
+*/
+
+#define j44(a,b)  (((a>>4)&0x0f)|(b&0xf0))
+
+#define P1	w2(5);w2(0xd);w2(5);w2(0xd);w2(5);w2(4);
+#define P2	w2(5);w2(7);w2(5);w2(4);
+
+/* cont = 0 - access the IDE register file 
+   cont = 1 - access the IDE command set 
+*/
+
+static int on26_read_regr( PIA *pi, int cont, int regr )
+
+{       int     a, b, r;
+
+	r = (regr<<2) + 1 + cont;
+
+        switch (pi->mode)  {
+
+        case 0: w0(1); P1; w0(r); P2; w0(0); P1; 
+		w2(6); a = r1(); w2(4);
+		w2(6); b = r1(); w2(4);
+		w2(6); w2(4); w2(6); w2(4);
+                return j44(a,b);
+
+        case 1: w0(1); P1; w0(r); P2; w0(0); P1;
+		w2(0x26); a = r0(); w2(4); w2(0x26); w2(4);
+                return a;
+
+	case 2:
+	case 3:
+        case 4: w3(1); w3(1); w2(5); w4(r); w2(4);
+		w3(0); w3(0); w2(0x24); a = r4(); w2(4);
+		w2(0x24); (void)r4(); w2(4);
+                return a;
+
+        }
+        return -1;
+}       
+
+static void on26_write_regr( PIA *pi, int cont, int regr, int val )
+
+{       int  r;
+
+        r = (regr<<2) + 1 + cont;
+
+        switch (pi->mode)  {
+
+        case 0:
+        case 1: w0(1); P1; w0(r); P2; w0(0); P1;
+		w0(val); P2; w0(val); P2;
+		break;
+
+	case 2:
+	case 3:
+        case 4: w3(1); w3(1); w2(5); w4(r); w2(4);
+		w3(0); w3(0); 
+		w2(5); w4(val); w2(4);
+		w2(5); w4(val); w2(4);
+                break;
+        }
+}
+
+#define  CCP(x)  w0(0xfe);w0(0xaa);w0(0x55);w0(0);w0(0xff);\
+		 w0(0x87);w0(0x78);w0(x);w2(4);w2(5);w2(4);w0(0xff);
+
+static void on26_connect ( PIA *pi )
+
+{       int	x;
+
+	pi->saved_r0 = r0();
+        pi->saved_r2 = r2();
+
+        CCP(0x20);
+	x = 8; if (pi->mode) x = 9;
+
+	w0(2); P1; w0(8); P2;
+	w0(2); P1; w0(x); P2;
+}
+
+static void on26_disconnect ( PIA *pi )
+
+{       if (pi->mode >= 2) { w3(4); w3(4); w3(4); w3(4); }
+	              else { w0(4); P1; w0(4); P1; }
+	CCP(0x30);
+        w0(pi->saved_r0);
+        w2(pi->saved_r2);
+} 
+
+#define	RESET_WAIT  200
+
+static int on26_test_port( PIA *pi)  /* hard reset */
+
+{       int     i, m, d, x=0, y=0;
+
+        pi->saved_r0 = r0();
+        pi->saved_r2 = r2();
+
+        d = pi->delay;
+        m = pi->mode;
+        pi->delay = 5;
+        pi->mode = 0;
+
+        w2(0xc);
+
+        CCP(0x30); CCP(0); 
+
+        w0(0xfe);w0(0xaa);w0(0x55);w0(0);w0(0xff);
+        i = ((r1() & 0xf0) << 4); w0(0x87);
+        i |= (r1() & 0xf0); w0(0x78);
+        w0(0x20);w2(4);w2(5);
+        i |= ((r1() & 0xf0) >> 4);
+        w2(4);w0(0xff);
+
+        if (i == 0xb5f) {
+
+            w0(2); P1; w0(0);   P2;
+            w0(3); P1; w0(0);   P2;
+            w0(2); P1; w0(8);   P2; udelay(100);
+            w0(2); P1; w0(0xa); P2; udelay(100);
+            w0(2); P1; w0(8);   P2; udelay(1000);
+            
+            on26_write_regr(pi,0,6,0xa0);
+
+            for (i=0;i<RESET_WAIT;i++) {
+                on26_write_regr(pi,0,6,0xa0);
+                x = on26_read_regr(pi,0,7);
+                on26_write_regr(pi,0,6,0xb0);
+                y = on26_read_regr(pi,0,7);
+                if (!((x&0x80)||(y&0x80))) break;
+                mdelay(100);
+            }
+
+	    if (i == RESET_WAIT) 
+		printk("on26: Device reset failed (%x,%x)\n",x,y);
+
+            w0(4); P1; w0(4); P1;
+        }
+
+        CCP(0x30);
+
+        pi->delay = d;
+        pi->mode = m;
+        w0(pi->saved_r0);
+        w2(pi->saved_r2);
+
+        return 5;
+}
+
+
+static void on26_read_block( PIA *pi, char * buf, int count )
+
+{       int     k, a, b;
+
+        switch (pi->mode) {
+
+        case 0: w0(1); P1; w0(1); P2; w0(2); P1; w0(0x18); P2; w0(0); P1;
+		udelay(10);
+		for (k=0;k<count;k++) {
+                        w2(6); a = r1();
+                        w2(4); b = r1();
+                        buf[k] = j44(a,b);
+                }
+		w0(2); P1; w0(8); P2; 
+                break;
+
+        case 1: w0(1); P1; w0(1); P2; w0(2); P1; w0(0x19); P2; w0(0); P1;
+		udelay(10);
+                for (k=0;k<count/2;k++) {
+                        w2(0x26); buf[2*k] = r0();  
+			w2(0x24); buf[2*k+1] = r0();
+                }
+                w0(2); P1; w0(9); P2;
+                break;
+
+        case 2: w3(1); w3(1); w2(5); w4(1); w2(4);
+		w3(0); w3(0); w2(0x24);
+		udelay(10);
+                for (k=0;k<count;k++) buf[k] = r4();
+                w2(4);
+                break;
+
+        case 3: w3(1); w3(1); w2(5); w4(1); w2(4);
+                w3(0); w3(0); w2(0x24);
+                udelay(10);
+                for (k=0;k<count/2;k++) ((u16 *)buf)[k] = r4w();
+                w2(4);
+                break;
+
+        case 4: w3(1); w3(1); w2(5); w4(1); w2(4);
+                w3(0); w3(0); w2(0x24);
+                udelay(10);
+                for (k=0;k<count/4;k++) ((u32 *)buf)[k] = r4l();
+                w2(4);
+                break;
+
+        }
+}
+
+static void on26_write_block( PIA *pi, char * buf, int count )
+
+{       int	k;
+
+        switch (pi->mode) {
+
+        case 0: 
+        case 1: w0(1); P1; w0(1); P2; 
+		w0(2); P1; w0(0x18+pi->mode); P2; w0(0); P1;
+		udelay(10);
+		for (k=0;k<count/2;k++) {
+                        w2(5); w0(buf[2*k]); 
+			w2(7); w0(buf[2*k+1]);
+                }
+                w2(5); w2(4);
+		w0(2); P1; w0(8+pi->mode); P2;
+                break;
+
+        case 2: w3(1); w3(1); w2(5); w4(1); w2(4);
+		w3(0); w3(0); w2(0xc5);
+		udelay(10);
+                for (k=0;k<count;k++) w4(buf[k]);
+		w2(0xc4);
+                break;
+
+        case 3: w3(1); w3(1); w2(5); w4(1); w2(4);
+                w3(0); w3(0); w2(0xc5);
+                udelay(10);
+                for (k=0;k<count/2;k++) w4w(((u16 *)buf)[k]);
+                w2(0xc4);
+                break;
+
+        case 4: w3(1); w3(1); w2(5); w4(1); w2(4);
+                w3(0); w3(0); w2(0xc5);
+                udelay(10);
+                for (k=0;k<count/4;k++) w4l(((u32 *)buf)[k]);
+                w2(0xc4);
+                break;
+
+        }
+
+}
+
+static void on26_log_adapter( PIA *pi, char * scratch, int verbose )
+
+{       char    *mode_string[5] = {"4-bit","8-bit","EPP-8",
+				   "EPP-16","EPP-32"};
+
+        printk("%s: on26 %s, OnSpec 90c26 at 0x%x, ",
+                pi->device,ON26_VERSION,pi->port);
+        printk("mode %d (%s), delay %d\n",pi->mode,
+		mode_string[pi->mode],pi->delay);
+
+}
+
+static struct pi_protocol on26 = {
+	.owner		= THIS_MODULE,
+	.name		= "on26",
+	.max_mode	= 5,
+	.epp_first	= 2,
+	.default_delay	= 1,
+	.max_units	= 1,
+	.write_regr	= on26_write_regr,
+	.read_regr	= on26_read_regr,
+	.write_block	= on26_write_block,
+	.read_block	= on26_read_block,
+	.connect	= on26_connect,
+	.disconnect	= on26_disconnect,
+	.test_port	= on26_test_port,
+	.log_adapter	= on26_log_adapter,
+};
+
+static int __init on26_init(void)
+{
+	return paride_register(&on26);
+}
+
+static void __exit on26_exit(void)
+{
+	paride_unregister(&on26);
+}
+
+MODULE_LICENSE("GPL");
+module_init(on26_init)
+module_exit(on26_exit)
diff --git a/drivers/block/paride/paride.c b/drivers/block/paride/paride.c
new file mode 100644
index 00000000..48c50f11
--- /dev/null
+++ b/drivers/block/paride/paride.c
@@ -0,0 +1,434 @@
+/* 
+        paride.c  (c) 1997-8  Grant R. Guenther <grant@torque.net>
+                              Under the terms of the GNU General Public License.
+
+	This is the base module for the family of device drivers
+        that support parallel port IDE devices.  
+
+*/
+
+/* Changes:
+
+	1.01	GRG 1998.05.03	Use spinlocks
+	1.02	GRG 1998.05.05  init_proto, release_proto, ktti
+	1.03	GRG 1998.08.15  eliminate compiler warning
+	1.04    GRG 1998.11.28  added support for FRIQ 
+	1.05    TMW 2000.06.06  use parport_find_number instead of
+				parport_enumerate
+	1.06    TMW 2001.03.26  more sane parport-or-not resource management
+*/
+
+#define PI_VERSION      "1.06"
+
+#include <linux/module.h>
+#include <linux/kmod.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/ioport.h>
+#include <linux/string.h>
+#include <linux/spinlock.h>
+#include <linux/wait.h>
+#include <linux/sched.h>	/* TASK_* */
+#include <linux/parport.h>
+
+#include "paride.h"
+
+MODULE_LICENSE("GPL");
+
+#define MAX_PROTOS	32
+
+static struct pi_protocol *protocols[MAX_PROTOS];
+
+static DEFINE_SPINLOCK(pi_spinlock);
+
+void pi_write_regr(PIA * pi, int cont, int regr, int val)
+{
+	pi->proto->write_regr(pi, cont, regr, val);
+}
+
+EXPORT_SYMBOL(pi_write_regr);
+
+int pi_read_regr(PIA * pi, int cont, int regr)
+{
+	return pi->proto->read_regr(pi, cont, regr);
+}
+
+EXPORT_SYMBOL(pi_read_regr);
+
+void pi_write_block(PIA * pi, char *buf, int count)
+{
+	pi->proto->write_block(pi, buf, count);
+}
+
+EXPORT_SYMBOL(pi_write_block);
+
+void pi_read_block(PIA * pi, char *buf, int count)
+{
+	pi->proto->read_block(pi, buf, count);
+}
+
+EXPORT_SYMBOL(pi_read_block);
+
+static void pi_wake_up(void *p)
+{
+	PIA *pi = (PIA *) p;
+	unsigned long flags;
+	void (*cont) (void) = NULL;
+
+	spin_lock_irqsave(&pi_spinlock, flags);
+
+	if (pi->claim_cont && !parport_claim(pi->pardev)) {
+		cont = pi->claim_cont;
+		pi->claim_cont = NULL;
+		pi->claimed = 1;
+	}
+
+	spin_unlock_irqrestore(&pi_spinlock, flags);
+
+	wake_up(&(pi->parq));
+
+	if (cont)
+		cont();
+}
+
+int pi_schedule_claimed(PIA * pi, void (*cont) (void))
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&pi_spinlock, flags);
+	if (pi->pardev && parport_claim(pi->pardev)) {
+		pi->claim_cont = cont;
+		spin_unlock_irqrestore(&pi_spinlock, flags);
+		return 0;
+	}
+	pi->claimed = 1;
+	spin_unlock_irqrestore(&pi_spinlock, flags);
+	return 1;
+}
+EXPORT_SYMBOL(pi_schedule_claimed);
+
+void pi_do_claimed(PIA * pi, void (*cont) (void))
+{
+	if (pi_schedule_claimed(pi, cont))
+		cont();
+}
+
+EXPORT_SYMBOL(pi_do_claimed);
+
+static void pi_claim(PIA * pi)
+{
+	if (pi->claimed)
+		return;
+	pi->claimed = 1;
+	if (pi->pardev)
+		wait_event(pi->parq,
+			   !parport_claim((struct pardevice *) pi->pardev));
+}
+
+static void pi_unclaim(PIA * pi)
+{
+	pi->claimed = 0;
+	if (pi->pardev)
+		parport_release((struct pardevice *) (pi->pardev));
+}
+
+void pi_connect(PIA * pi)
+{
+	pi_claim(pi);
+	pi->proto->connect(pi);
+}
+
+EXPORT_SYMBOL(pi_connect);
+
+void pi_disconnect(PIA * pi)
+{
+	pi->proto->disconnect(pi);
+	pi_unclaim(pi);
+}
+
+EXPORT_SYMBOL(pi_disconnect);
+
+static void pi_unregister_parport(PIA * pi)
+{
+	if (pi->pardev) {
+		parport_unregister_device((struct pardevice *) (pi->pardev));
+		pi->pardev = NULL;
+	}
+}
+
+void pi_release(PIA * pi)
+{
+	pi_unregister_parport(pi);
+	if (pi->proto->release_proto)
+		pi->proto->release_proto(pi);
+	module_put(pi->proto->owner);
+}
+
+EXPORT_SYMBOL(pi_release);
+
+static int default_test_proto(PIA * pi, char *scratch, int verbose)
+{
+	int j, k;
+	int e[2] = { 0, 0 };
+
+	pi->proto->connect(pi);
+
+	for (j = 0; j < 2; j++) {
+		pi_write_regr(pi, 0, 6, 0xa0 + j * 0x10);
+		for (k = 0; k < 256; k++) {
+			pi_write_regr(pi, 0, 2, k ^ 0xaa);
+			pi_write_regr(pi, 0, 3, k ^ 0x55);
+			if (pi_read_regr(pi, 0, 2) != (k ^ 0xaa))
+				e[j]++;
+		}
+	}
+	pi->proto->disconnect(pi);
+
+	if (verbose)
+		printk("%s: %s: port 0x%x, mode  %d, test=(%d,%d)\n",
+		       pi->device, pi->proto->name, pi->port,
+		       pi->mode, e[0], e[1]);
+
+	return (e[0] && e[1]);	/* not here if both > 0 */
+}
+
+static int pi_test_proto(PIA * pi, char *scratch, int verbose)
+{
+	int res;
+
+	pi_claim(pi);
+	if (pi->proto->test_proto)
+		res = pi->proto->test_proto(pi, scratch, verbose);
+	else
+		res = default_test_proto(pi, scratch, verbose);
+	pi_unclaim(pi);
+
+	return res;
+}
+
+int paride_register(PIP * pr)
+{
+	int k;
+
+	for (k = 0; k < MAX_PROTOS; k++)
+		if (protocols[k] && !strcmp(pr->name, protocols[k]->name)) {
+			printk("paride: %s protocol already registered\n",
+			       pr->name);
+			return -1;
+		}
+	k = 0;
+	while ((k < MAX_PROTOS) && (protocols[k]))
+		k++;
+	if (k == MAX_PROTOS) {
+		printk("paride: protocol table full\n");
+		return -1;
+	}
+	protocols[k] = pr;
+	pr->index = k;
+	printk("paride: %s registered as protocol %d\n", pr->name, k);
+	return 0;
+}
+
+EXPORT_SYMBOL(paride_register);
+
+void paride_unregister(PIP * pr)
+{
+	if (!pr)
+		return;
+	if (protocols[pr->index] != pr) {
+		printk("paride: %s not registered\n", pr->name);
+		return;
+	}
+	protocols[pr->index] = NULL;
+}
+
+EXPORT_SYMBOL(paride_unregister);
+
+static int pi_register_parport(PIA * pi, int verbose)
+{
+	struct parport *port;
+
+	port = parport_find_base(pi->port);
+	if (!port)
+		return 0;
+
+	pi->pardev = parport_register_device(port,
+					     pi->device, NULL,
+					     pi_wake_up, NULL, 0, (void *) pi);
+	parport_put_port(port);
+	if (!pi->pardev)
+		return 0;
+
+	init_waitqueue_head(&pi->parq);
+
+	if (verbose)
+		printk("%s: 0x%x is %s\n", pi->device, pi->port, port->name);
+
+	pi->parname = (char *) port->name;
+
+	return 1;
+}
+
+static int pi_probe_mode(PIA * pi, int max, char *scratch, int verbose)
+{
+	int best, range;
+
+	if (pi->mode != -1) {
+		if (pi->mode >= max)
+			return 0;
+		range = 3;
+		if (pi->mode >= pi->proto->epp_first)
+			range = 8;
+		if ((range == 8) && (pi->port % 8))
+			return 0;
+		pi->reserved = range;
+		return (!pi_test_proto(pi, scratch, verbose));
+	}
+	best = -1;
+	for (pi->mode = 0; pi->mode < max; pi->mode++) {
+		range = 3;
+		if (pi->mode >= pi->proto->epp_first)
+			range = 8;
+		if ((range == 8) && (pi->port % 8))
+			break;
+		pi->reserved = range;
+		if (!pi_test_proto(pi, scratch, verbose))
+			best = pi->mode;
+	}
+	pi->mode = best;
+	return (best > -1);
+}
+
+static int pi_probe_unit(PIA * pi, int unit, char *scratch, int verbose)
+{
+	int max, s, e;
+
+	s = unit;
+	e = s + 1;
+
+	if (s == -1) {
+		s = 0;
+		e = pi->proto->max_units;
+	}
+
+	if (!pi_register_parport(pi, verbose))
+		return 0;
+
+	if (pi->proto->test_port) {
+		pi_claim(pi);
+		max = pi->proto->test_port(pi);
+		pi_unclaim(pi);
+	} else
+		max = pi->proto->max_mode;
+
+	if (pi->proto->probe_unit) {
+		pi_claim(pi);
+		for (pi->unit = s; pi->unit < e; pi->unit++)
+			if (pi->proto->probe_unit(pi)) {
+				pi_unclaim(pi);
+				if (pi_probe_mode(pi, max, scratch, verbose))
+					return 1;
+				pi_unregister_parport(pi);
+				return 0;
+			}
+		pi_unclaim(pi);
+		pi_unregister_parport(pi);
+		return 0;
+	}
+
+	if (!pi_probe_mode(pi, max, scratch, verbose)) {
+		pi_unregister_parport(pi);
+		return 0;
+	}
+	return 1;
+
+}
+
+int pi_init(PIA * pi, int autoprobe, int port, int mode,
+	int unit, int protocol, int delay, char *scratch,
+	int devtype, int verbose, char *device)
+{
+	int p, k, s, e;
+	int lpts[7] = { 0x3bc, 0x378, 0x278, 0x268, 0x27c, 0x26c, 0 };
+
+	s = protocol;
+	e = s + 1;
+
+	if (!protocols[0])
+		request_module("paride_protocol");
+
+	if (autoprobe) {
+		s = 0;
+		e = MAX_PROTOS;
+	} else if ((s < 0) || (s >= MAX_PROTOS) || (port <= 0) ||
+		   (!protocols[s]) || (unit < 0) ||
+		   (unit >= protocols[s]->max_units)) {
+		printk("%s: Invalid parameters\n", device);
+		return 0;
+	}
+
+	for (p = s; p < e; p++) {
+		struct pi_protocol *proto = protocols[p];
+		if (!proto)
+			continue;
+		/* still racy */
+		if (!try_module_get(proto->owner))
+			continue;
+		pi->proto = proto;
+		pi->private = 0;
+		if (proto->init_proto && proto->init_proto(pi) < 0) {
+			pi->proto = NULL;
+			module_put(proto->owner);
+			continue;
+		}
+		if (delay == -1)
+			pi->delay = pi->proto->default_delay;
+		else
+			pi->delay = delay;
+		pi->devtype = devtype;
+		pi->device = device;
+
+		pi->parname = NULL;
+		pi->pardev = NULL;
+		init_waitqueue_head(&pi->parq);
+		pi->claimed = 0;
+		pi->claim_cont = NULL;
+
+		pi->mode = mode;
+		if (port != -1) {
+			pi->port = port;
+			if (pi_probe_unit(pi, unit, scratch, verbose))
+				break;
+			pi->port = 0;
+		} else {
+			k = 0;
+			while ((pi->port = lpts[k++]))
+				if (pi_probe_unit
+				    (pi, unit, scratch, verbose))
+					break;
+			if (pi->port)
+				break;
+		}
+		if (pi->proto->release_proto)
+			pi->proto->release_proto(pi);
+		module_put(proto->owner);
+	}
+
+	if (!pi->port) {
+		if (autoprobe)
+			printk("%s: Autoprobe failed\n", device);
+		else
+			printk("%s: Adapter not found\n", device);
+		return 0;
+	}
+
+	if (pi->parname)
+		printk("%s: Sharing %s at 0x%x\n", pi->device,
+		       pi->parname, pi->port);
+
+	pi->proto->log_adapter(pi, scratch, verbose);
+
+	return 1;
+}
+
+EXPORT_SYMBOL(pi_init);
diff --git a/drivers/block/paride/paride.h b/drivers/block/paride/paride.h
new file mode 100644
index 00000000..2bddbf45
--- /dev/null
+++ b/drivers/block/paride/paride.h
@@ -0,0 +1,170 @@
+#ifndef __DRIVERS_PARIDE_H__
+#define __DRIVERS_PARIDE_H__
+
+/* 
+	paride.h	(c) 1997-8  Grant R. Guenther <grant@torque.net>
+   		                    Under the terms of the GPL.
+
+   This file defines the interface between the high-level parallel
+   IDE device drivers (pd, pf, pcd, pt) and the adapter chips.
+
+*/
+
+/* Changes:
+
+	1.01	GRG 1998.05.05	init_proto, release_proto
+*/
+
+#define PARIDE_H_VERSION 	"1.01"
+
+/* Some adapters need to know what kind of device they are in
+
+   Values for devtype:
+*/
+
+#define	PI_PD	0	/* IDE disk */
+#define PI_PCD	1	/* ATAPI CDrom */
+#define PI_PF   2	/* ATAPI disk */
+#define PI_PT	3	/* ATAPI tape */
+#define PI_PG   4       /* ATAPI generic */
+
+/* The paride module contains no state, instead the drivers allocate
+   a pi_adapter data structure and pass it to paride in every operation.
+
+*/
+
+struct pi_adapter  {
+
+	struct pi_protocol *proto;   /* adapter protocol */
+	int	port;		     /* base address of parallel port */
+	int	mode;		     /* transfer mode in use */
+	int     delay;		     /* adapter delay setting */
+	int	devtype;	     /* device type: PI_PD etc. */
+	char    *device;	     /* name of driver */
+	int     unit;		     /* unit number for chained adapters */
+	int	saved_r0;	     /* saved port state */
+	int	saved_r2;	     /* saved port state */
+	int	reserved;	     /* number of ports reserved */
+	unsigned long	private;     /* for protocol module */
+
+	wait_queue_head_t parq;     /* semaphore for parport sharing */
+	void	*pardev;	     /* pointer to pardevice */
+	char	*parname;	     /* parport name */
+	int	claimed;	     /* parport has already been claimed */
+	void (*claim_cont)(void);    /* continuation for parport wait */
+};
+
+typedef struct pi_adapter PIA;
+
+/* functions exported by paride to the high level drivers */
+
+extern int pi_init(PIA *pi, 
+	int autoprobe,		/* 1 to autoprobe */
+	int port, 		/* base port address */
+	int mode, 		/* -1 for autoprobe */
+	int unit,		/* unit number, if supported */
+	int protocol, 		/* protocol to use */
+	int delay, 		/* -1 to use adapter specific default */
+	char * scratch, 	/* address of 512 byte buffer */
+	int devtype,		/* device type: PI_PD, PI_PCD, etc ... */
+	int verbose,		/* log verbose data while probing */
+	char *device		/* name of the driver */
+	);			/* returns 0 on failure, 1 on success */
+
+extern void pi_release(PIA *pi);
+
+/* registers are addressed as (cont,regr)
+
+       	cont: 0 for command register file, 1 for control register(s)
+	regr: 0-7 for register number.
+
+*/
+
+extern void pi_write_regr(PIA *pi, int cont, int regr, int val);
+
+extern int pi_read_regr(PIA *pi, int cont, int regr);
+
+extern void pi_write_block(PIA *pi, char * buf, int count);
+
+extern void pi_read_block(PIA *pi, char * buf, int count);
+
+extern void pi_connect(PIA *pi);
+
+extern void pi_disconnect(PIA *pi);
+
+extern void pi_do_claimed(PIA *pi, void (*cont)(void));
+extern int pi_schedule_claimed(PIA *pi, void (*cont)(void));
+
+/* macros and functions exported to the protocol modules */
+
+#define delay_p			(pi->delay?udelay(pi->delay):(void)0)
+#define out_p(offs,byte)	outb(byte,pi->port+offs); delay_p;
+#define in_p(offs)		(delay_p,inb(pi->port+offs))
+
+#define w0(byte)                {out_p(0,byte);}
+#define r0()                    (in_p(0) & 0xff)
+#define w1(byte)                {out_p(1,byte);}
+#define r1()                    (in_p(1) & 0xff)
+#define w2(byte)                {out_p(2,byte);}
+#define r2()                    (in_p(2) & 0xff)
+#define w3(byte)                {out_p(3,byte);}
+#define w4(byte)                {out_p(4,byte);}
+#define r4()                    (in_p(4) & 0xff)
+#define w4w(data)     		{outw(data,pi->port+4); delay_p;}
+#define w4l(data)     		{outl(data,pi->port+4); delay_p;}
+#define r4w()         		(delay_p,inw(pi->port+4)&0xffff)
+#define r4l()         		(delay_p,inl(pi->port+4)&0xffffffff)
+
+static inline u16 pi_swab16( char *b, int k)
+
+{ 	union { u16 u; char t[2]; } r;
+
+	r.t[0]=b[2*k+1]; r.t[1]=b[2*k];
+        return r.u;
+}
+
+static inline u32 pi_swab32( char *b, int k)
+
+{ 	union { u32 u; char f[4]; } r;
+
+	r.f[0]=b[4*k+1]; r.f[1]=b[4*k];
+	r.f[2]=b[4*k+3]; r.f[3]=b[4*k+2];
+        return r.u;
+}
+
+struct pi_protocol {
+
+	char	name[8];	/* name for this protocol */
+	int	index;		/* index into protocol table */
+
+	int	max_mode;	/* max mode number */
+	int	epp_first;	/* modes >= this use 8 ports */
+	
+	int	default_delay;  /* delay parameter if not specified */
+	int	max_units;	/* max chained units probed for */
+
+	void (*write_regr)(PIA *,int,int,int);
+	int  (*read_regr)(PIA *,int,int);
+	void (*write_block)(PIA *,char *,int);
+	void (*read_block)(PIA *,char *,int);
+
+	void (*connect)(PIA *);
+	void (*disconnect)(PIA *);
+	
+	int  (*test_port)(PIA *);
+	int  (*probe_unit)(PIA *);
+	int  (*test_proto)(PIA *,char *,int);
+	void (*log_adapter)(PIA *,char *,int);
+	
+	int (*init_proto)(PIA *);
+	void (*release_proto)(PIA *);
+	struct module *owner;
+};
+
+typedef struct pi_protocol PIP;
+
+extern int paride_register( PIP * );
+extern void paride_unregister ( PIP * );
+
+#endif /* __DRIVERS_PARIDE_H__ */
+/* end of paride.h */
diff --git a/drivers/block/paride/pcd.c b/drivers/block/paride/pcd.c
new file mode 100644
index 00000000..46b8136c
--- /dev/null
+++ b/drivers/block/paride/pcd.c
@@ -0,0 +1,992 @@
+/* 
+	pcd.c	(c) 1997-8  Grant R. Guenther <grant@torque.net>
+		            Under the terms of the GNU General Public License.
+
+	This is a high-level driver for parallel port ATAPI CD-ROM
+        drives based on chips supported by the paride module.
+
+        By default, the driver will autoprobe for a single parallel
+        port ATAPI CD-ROM drive, but if their individual parameters are
+        specified, the driver can handle up to 4 drives.
+
+        The behaviour of the pcd driver can be altered by setting
+        some parameters from the insmod command line.  The following
+        parameters are adjustable:
+
+            drive0      These four arguments can be arrays of       
+            drive1      1-6 integers as follows:
+            drive2
+            drive3      <prt>,<pro>,<uni>,<mod>,<slv>,<dly>
+
+                        Where,
+
+                <prt>   is the base of the parallel port address for
+                        the corresponding drive.  (required)
+
+                <pro>   is the protocol number for the adapter that
+                        supports this drive.  These numbers are
+                        logged by 'paride' when the protocol modules
+                        are initialised.  (0 if not given)
+
+                <uni>   for those adapters that support chained
+                        devices, this is the unit selector for the
+                        chain of devices on the given port.  It should
+                        be zero for devices that don't support chaining.
+                        (0 if not given)
+
+                <mod>   this can be -1 to choose the best mode, or one
+                        of the mode numbers supported by the adapter.
+                        (-1 if not given)
+
+		<slv>   ATAPI CD-ROMs can be jumpered to master or slave.
+			Set this to 0 to choose the master drive, 1 to
+                        choose the slave, -1 (the default) to choose the
+			first drive found.
+
+                <dly>   some parallel ports require the driver to 
+                        go more slowly.  -1 sets a default value that
+                        should work with the chosen protocol.  Otherwise,
+                        set this to a small integer, the larger it is
+                        the slower the port i/o.  In some cases, setting
+                        this to zero will speed up the device. (default -1)
+                        
+            major       You may use this parameter to overide the
+                        default major number (46) that this driver
+                        will use.  Be sure to change the device
+                        name as well.
+
+            name        This parameter is a character string that
+                        contains the name the kernel will use for this
+                        device (in /proc output, for instance).
+                        (default "pcd")
+
+            verbose     This parameter controls the amount of logging
+                        that the driver will do.  Set it to 0 for
+                        normal operation, 1 to see autoprobe progress
+                        messages, or 2 to see additional debugging
+                        output.  (default 0)
+  
+            nice        This parameter controls the driver's use of
+                        idle CPU time, at the expense of some speed.
+ 
+	If this driver is built into the kernel, you can use kernel
+        the following command line parameters, with the same values
+        as the corresponding module parameters listed above:
+
+	    pcd.drive0
+	    pcd.drive1
+	    pcd.drive2
+	    pcd.drive3
+	    pcd.nice
+
+        In addition, you can use the parameter pcd.disable to disable
+        the driver entirely.
+
+*/
+
+/* Changes:
+
+	1.01	GRG 1998.01.24	Added test unit ready support
+	1.02    GRG 1998.05.06  Changes to pcd_completion, ready_wait,
+				and loosen interpretation of ATAPI
+			        standard for clearing error status.
+				Use spinlocks. Eliminate sti().
+	1.03    GRG 1998.06.16  Eliminated an Ugh
+	1.04	GRG 1998.08.15  Added extra debugging, improvements to
+				pcd_completion, use HZ in loop timing
+	1.05	GRG 1998.08.16	Conformed to "Uniform CD-ROM" standard
+	1.06    GRG 1998.08.19  Added audio ioctl support
+	1.07    GRG 1998.09.24  Increased reset timeout, added jumbo support
+
+*/
+
+#define	PCD_VERSION	"1.07"
+#define PCD_MAJOR	46
+#define PCD_NAME	"pcd"
+#define PCD_UNITS	4
+
+/* Here are things one can override from the insmod command.
+   Most are autoprobed by paride unless set here.  Verbose is off
+   by default.
+
+*/
+
+static int verbose = 0;
+static int major = PCD_MAJOR;
+static char *name = PCD_NAME;
+static int nice = 0;
+static int disable = 0;
+
+static int drive0[6] = { 0, 0, 0, -1, -1, -1 };
+static int drive1[6] = { 0, 0, 0, -1, -1, -1 };
+static int drive2[6] = { 0, 0, 0, -1, -1, -1 };
+static int drive3[6] = { 0, 0, 0, -1, -1, -1 };
+
+static int (*drives[4])[6] = {&drive0, &drive1, &drive2, &drive3};
+static int pcd_drive_count;
+
+enum {D_PRT, D_PRO, D_UNI, D_MOD, D_SLV, D_DLY};
+
+/* end of parameters */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/kernel.h>
+#include <linux/delay.h>
+#include <linux/cdrom.h>
+#include <linux/spinlock.h>
+#include <linux/blkdev.h>
+#include <linux/mutex.h>
+#include <asm/uaccess.h>
+
+static DEFINE_MUTEX(pcd_mutex);
+static DEFINE_SPINLOCK(pcd_lock);
+
+module_param(verbose, bool, 0644);
+module_param(major, int, 0);
+module_param(name, charp, 0);
+module_param(nice, int, 0);
+module_param_array(drive0, int, NULL, 0);
+module_param_array(drive1, int, NULL, 0);
+module_param_array(drive2, int, NULL, 0);
+module_param_array(drive3, int, NULL, 0);
+
+#include "paride.h"
+#include "pseudo.h"
+
+#define PCD_RETRIES	     5
+#define PCD_TMO		   800	/* timeout in jiffies */
+#define PCD_DELAY           50	/* spin delay in uS */
+#define PCD_READY_TMO	    20	/* in seconds */
+#define PCD_RESET_TMO	   100	/* in tenths of a second */
+
+#define PCD_SPIN	(1000000*PCD_TMO)/(HZ*PCD_DELAY)
+
+#define IDE_ERR		0x01
+#define IDE_DRQ         0x08
+#define IDE_READY       0x40
+#define IDE_BUSY        0x80
+
+static int pcd_open(struct cdrom_device_info *cdi, int purpose);
+static void pcd_release(struct cdrom_device_info *cdi);
+static int pcd_drive_status(struct cdrom_device_info *cdi, int slot_nr);
+static unsigned int pcd_check_events(struct cdrom_device_info *cdi,
+				     unsigned int clearing, int slot_nr);
+static int pcd_tray_move(struct cdrom_device_info *cdi, int position);
+static int pcd_lock_door(struct cdrom_device_info *cdi, int lock);
+static int pcd_drive_reset(struct cdrom_device_info *cdi);
+static int pcd_get_mcn(struct cdrom_device_info *cdi, struct cdrom_mcn *mcn);
+static int pcd_audio_ioctl(struct cdrom_device_info *cdi,
+			   unsigned int cmd, void *arg);
+static int pcd_packet(struct cdrom_device_info *cdi,
+		      struct packet_command *cgc);
+
+static int pcd_detect(void);
+static void pcd_probe_capabilities(void);
+static void do_pcd_read_drq(void);
+static void do_pcd_request(struct request_queue * q);
+static void do_pcd_read(void);
+
+struct pcd_unit {
+	struct pi_adapter pia;	/* interface to paride layer */
+	struct pi_adapter *pi;
+	int drive;		/* master/slave */
+	int last_sense;		/* result of last request sense */
+	int changed;		/* media change seen */
+	int present;		/* does this unit exist ? */
+	char *name;		/* pcd0, pcd1, etc */
+	struct cdrom_device_info info;	/* uniform cdrom interface */
+	struct gendisk *disk;
+};
+
+static struct pcd_unit pcd[PCD_UNITS];
+
+static char pcd_scratch[64];
+static char pcd_buffer[2048];	/* raw block buffer */
+static int pcd_bufblk = -1;	/* block in buffer, in CD units,
+				   -1 for nothing there. See also
+				   pd_unit.
+				 */
+
+/* the variables below are used mainly in the I/O request engine, which
+   processes only one request at a time.
+*/
+
+static struct pcd_unit *pcd_current; /* current request's drive */
+static struct request *pcd_req;
+static int pcd_retries;		/* retries on current request */
+static int pcd_busy;		/* request being processed ? */
+static int pcd_sector;		/* address of next requested sector */
+static int pcd_count;		/* number of blocks still to do */
+static char *pcd_buf;		/* buffer for request in progress */
+
+/* kernel glue structures */
+
+static int pcd_block_open(struct block_device *bdev, fmode_t mode)
+{
+	struct pcd_unit *cd = bdev->bd_disk->private_data;
+	int ret;
+
+	mutex_lock(&pcd_mutex);
+	ret = cdrom_open(&cd->info, bdev, mode);
+	mutex_unlock(&pcd_mutex);
+
+	return ret;
+}
+
+static int pcd_block_release(struct gendisk *disk, fmode_t mode)
+{
+	struct pcd_unit *cd = disk->private_data;
+	mutex_lock(&pcd_mutex);
+	cdrom_release(&cd->info, mode);
+	mutex_unlock(&pcd_mutex);
+	return 0;
+}
+
+static int pcd_block_ioctl(struct block_device *bdev, fmode_t mode,
+				unsigned cmd, unsigned long arg)
+{
+	struct pcd_unit *cd = bdev->bd_disk->private_data;
+	int ret;
+
+	mutex_lock(&pcd_mutex);
+	ret = cdrom_ioctl(&cd->info, bdev, mode, cmd, arg);
+	mutex_unlock(&pcd_mutex);
+
+	return ret;
+}
+
+static unsigned int pcd_block_check_events(struct gendisk *disk,
+					   unsigned int clearing)
+{
+	struct pcd_unit *cd = disk->private_data;
+	return cdrom_check_events(&cd->info, clearing);
+}
+
+static const struct block_device_operations pcd_bdops = {
+	.owner		= THIS_MODULE,
+	.open		= pcd_block_open,
+	.release	= pcd_block_release,
+	.ioctl		= pcd_block_ioctl,
+	.check_events	= pcd_block_check_events,
+};
+
+static struct cdrom_device_ops pcd_dops = {
+	.open		= pcd_open,
+	.release	= pcd_release,
+	.drive_status	= pcd_drive_status,
+	.check_events	= pcd_check_events,
+	.tray_move	= pcd_tray_move,
+	.lock_door	= pcd_lock_door,
+	.get_mcn	= pcd_get_mcn,
+	.reset		= pcd_drive_reset,
+	.audio_ioctl	= pcd_audio_ioctl,
+	.generic_packet	= pcd_packet,
+	.capability	= CDC_CLOSE_TRAY | CDC_OPEN_TRAY | CDC_LOCK |
+			  CDC_MCN | CDC_MEDIA_CHANGED | CDC_RESET |
+			  CDC_PLAY_AUDIO | CDC_GENERIC_PACKET | CDC_CD_R |
+			  CDC_CD_RW,
+};
+
+static void pcd_init_units(void)
+{
+	struct pcd_unit *cd;
+	int unit;
+
+	pcd_drive_count = 0;
+	for (unit = 0, cd = pcd; unit < PCD_UNITS; unit++, cd++) {
+		struct gendisk *disk = alloc_disk(1);
+		if (!disk)
+			continue;
+		cd->disk = disk;
+		cd->pi = &cd->pia;
+		cd->present = 0;
+		cd->last_sense = 0;
+		cd->changed = 1;
+		cd->drive = (*drives[unit])[D_SLV];
+		if ((*drives[unit])[D_PRT])
+			pcd_drive_count++;
+
+		cd->name = &cd->info.name[0];
+		snprintf(cd->name, sizeof(cd->info.name), "%s%d", name, unit);
+		cd->info.ops = &pcd_dops;
+		cd->info.handle = cd;
+		cd->info.speed = 0;
+		cd->info.capacity = 1;
+		cd->info.mask = 0;
+		disk->major = major;
+		disk->first_minor = unit;
+		strcpy(disk->disk_name, cd->name);	/* umm... */
+		disk->fops = &pcd_bdops;
+		disk->flags = GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE;
+	}
+}
+
+static int pcd_open(struct cdrom_device_info *cdi, int purpose)
+{
+	struct pcd_unit *cd = cdi->handle;
+	if (!cd->present)
+		return -ENODEV;
+	return 0;
+}
+
+static void pcd_release(struct cdrom_device_info *cdi)
+{
+}
+
+static inline int status_reg(struct pcd_unit *cd)
+{
+	return pi_read_regr(cd->pi, 1, 6);
+}
+
+static inline int read_reg(struct pcd_unit *cd, int reg)
+{
+	return pi_read_regr(cd->pi, 0, reg);
+}
+
+static inline void write_reg(struct pcd_unit *cd, int reg, int val)
+{
+	pi_write_regr(cd->pi, 0, reg, val);
+}
+
+static int pcd_wait(struct pcd_unit *cd, int go, int stop, char *fun, char *msg)
+{
+	int j, r, e, s, p;
+
+	j = 0;
+	while ((((r = status_reg(cd)) & go) || (stop && (!(r & stop))))
+	       && (j++ < PCD_SPIN))
+		udelay(PCD_DELAY);
+
+	if ((r & (IDE_ERR & stop)) || (j > PCD_SPIN)) {
+		s = read_reg(cd, 7);
+		e = read_reg(cd, 1);
+		p = read_reg(cd, 2);
+		if (j > PCD_SPIN)
+			e |= 0x100;
+		if (fun)
+			printk("%s: %s %s: alt=0x%x stat=0x%x err=0x%x"
+			       " loop=%d phase=%d\n",
+			       cd->name, fun, msg, r, s, e, j, p);
+		return (s << 8) + r;
+	}
+	return 0;
+}
+
+static int pcd_command(struct pcd_unit *cd, char *cmd, int dlen, char *fun)
+{
+	pi_connect(cd->pi);
+
+	write_reg(cd, 6, 0xa0 + 0x10 * cd->drive);
+
+	if (pcd_wait(cd, IDE_BUSY | IDE_DRQ, 0, fun, "before command")) {
+		pi_disconnect(cd->pi);
+		return -1;
+	}
+
+	write_reg(cd, 4, dlen % 256);
+	write_reg(cd, 5, dlen / 256);
+	write_reg(cd, 7, 0xa0);	/* ATAPI packet command */
+
+	if (pcd_wait(cd, IDE_BUSY, IDE_DRQ, fun, "command DRQ")) {
+		pi_disconnect(cd->pi);
+		return -1;
+	}
+
+	if (read_reg(cd, 2) != 1) {
+		printk("%s: %s: command phase error\n", cd->name, fun);
+		pi_disconnect(cd->pi);
+		return -1;
+	}
+
+	pi_write_block(cd->pi, cmd, 12);
+
+	return 0;
+}
+
+static int pcd_completion(struct pcd_unit *cd, char *buf, char *fun)
+{
+	int r, d, p, n, k, j;
+
+	r = -1;
+	k = 0;
+	j = 0;
+
+	if (!pcd_wait(cd, IDE_BUSY, IDE_DRQ | IDE_READY | IDE_ERR,
+		      fun, "completion")) {
+		r = 0;
+		while (read_reg(cd, 7) & IDE_DRQ) {
+			d = read_reg(cd, 4) + 256 * read_reg(cd, 5);
+			n = (d + 3) & 0xfffc;
+			p = read_reg(cd, 2) & 3;
+
+			if ((p == 2) && (n > 0) && (j == 0)) {
+				pi_read_block(cd->pi, buf, n);
+				if (verbose > 1)
+					printk("%s: %s: Read %d bytes\n",
+					       cd->name, fun, n);
+				r = 0;
+				j++;
+			} else {
+				if (verbose > 1)
+					printk
+					    ("%s: %s: Unexpected phase %d, d=%d, k=%d\n",
+					     cd->name, fun, p, d, k);
+				if (verbose < 2)
+					printk_once(
+					    "%s: WARNING: ATAPI phase errors\n",
+					    cd->name);
+				mdelay(1);
+			}
+			if (k++ > PCD_TMO) {
+				printk("%s: Stuck DRQ\n", cd->name);
+				break;
+			}
+			if (pcd_wait
+			    (cd, IDE_BUSY, IDE_DRQ | IDE_READY | IDE_ERR, fun,
+			     "completion")) {
+				r = -1;
+				break;
+			}
+		}
+	}
+
+	pi_disconnect(cd->pi);
+
+	return r;
+}
+
+static void pcd_req_sense(struct pcd_unit *cd, char *fun)
+{
+	char rs_cmd[12] = { 0x03, 0, 0, 0, 16, 0, 0, 0, 0, 0, 0, 0 };
+	char buf[16];
+	int r, c;
+
+	r = pcd_command(cd, rs_cmd, 16, "Request sense");
+	mdelay(1);
+	if (!r)
+		pcd_completion(cd, buf, "Request sense");
+
+	cd->last_sense = -1;
+	c = 2;
+	if (!r) {
+		if (fun)
+			printk("%s: %s: Sense key: %x, ASC: %x, ASQ: %x\n",
+			       cd->name, fun, buf[2] & 0xf, buf[12], buf[13]);
+		c = buf[2] & 0xf;
+		cd->last_sense =
+		    c | ((buf[12] & 0xff) << 8) | ((buf[13] & 0xff) << 16);
+	}
+	if ((c == 2) || (c == 6))
+		cd->changed = 1;
+}
+
+static int pcd_atapi(struct pcd_unit *cd, char *cmd, int dlen, char *buf, char *fun)
+{
+	int r;
+
+	r = pcd_command(cd, cmd, dlen, fun);
+	mdelay(1);
+	if (!r)
+		r = pcd_completion(cd, buf, fun);
+	if (r)
+		pcd_req_sense(cd, fun);
+
+	return r;
+}
+
+static int pcd_packet(struct cdrom_device_info *cdi, struct packet_command *cgc)
+{
+	return pcd_atapi(cdi->handle, cgc->cmd, cgc->buflen, cgc->buffer,
+			 "generic packet");
+}
+
+#define DBMSG(msg)	((verbose>1)?(msg):NULL)
+
+static unsigned int pcd_check_events(struct cdrom_device_info *cdi,
+				     unsigned int clearing, int slot_nr)
+{
+	struct pcd_unit *cd = cdi->handle;
+	int res = cd->changed;
+	if (res)
+		cd->changed = 0;
+	return res ? DISK_EVENT_MEDIA_CHANGE : 0;
+}
+
+static int pcd_lock_door(struct cdrom_device_info *cdi, int lock)
+{
+	char un_cmd[12] = { 0x1e, 0, 0, 0, lock, 0, 0, 0, 0, 0, 0, 0 };
+
+	return pcd_atapi(cdi->handle, un_cmd, 0, pcd_scratch,
+			 lock ? "lock door" : "unlock door");
+}
+
+static int pcd_tray_move(struct cdrom_device_info *cdi, int position)
+{
+	char ej_cmd[12] = { 0x1b, 0, 0, 0, 3 - position, 0, 0, 0, 0, 0, 0, 0 };
+
+	return pcd_atapi(cdi->handle, ej_cmd, 0, pcd_scratch,
+			 position ? "eject" : "close tray");
+}
+
+static void pcd_sleep(int cs)
+{
+	schedule_timeout_interruptible(cs);
+}
+
+static int pcd_reset(struct pcd_unit *cd)
+{
+	int i, k, flg;
+	int expect[5] = { 1, 1, 1, 0x14, 0xeb };
+
+	pi_connect(cd->pi);
+	write_reg(cd, 6, 0xa0 + 0x10 * cd->drive);
+	write_reg(cd, 7, 8);
+
+	pcd_sleep(20 * HZ / 1000);	/* delay a bit */
+
+	k = 0;
+	while ((k++ < PCD_RESET_TMO) && (status_reg(cd) & IDE_BUSY))
+		pcd_sleep(HZ / 10);
+
+	flg = 1;
+	for (i = 0; i < 5; i++)
+		flg &= (read_reg(cd, i + 1) == expect[i]);
+
+	if (verbose) {
+		printk("%s: Reset (%d) signature = ", cd->name, k);
+		for (i = 0; i < 5; i++)
+			printk("%3x", read_reg(cd, i + 1));
+		if (!flg)
+			printk(" (incorrect)");
+		printk("\n");
+	}
+
+	pi_disconnect(cd->pi);
+	return flg - 1;
+}
+
+static int pcd_drive_reset(struct cdrom_device_info *cdi)
+{
+	return pcd_reset(cdi->handle);
+}
+
+static int pcd_ready_wait(struct pcd_unit *cd, int tmo)
+{
+	char tr_cmd[12] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+	int k, p;
+
+	k = 0;
+	while (k < tmo) {
+		cd->last_sense = 0;
+		pcd_atapi(cd, tr_cmd, 0, NULL, DBMSG("test unit ready"));
+		p = cd->last_sense;
+		if (!p)
+			return 0;
+		if (!(((p & 0xffff) == 0x0402) || ((p & 0xff) == 6)))
+			return p;
+		k++;
+		pcd_sleep(HZ);
+	}
+	return 0x000020;	/* timeout */
+}
+
+static int pcd_drive_status(struct cdrom_device_info *cdi, int slot_nr)
+{
+	char rc_cmd[12] = { 0x25, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+	struct pcd_unit *cd = cdi->handle;
+
+	if (pcd_ready_wait(cd, PCD_READY_TMO))
+		return CDS_DRIVE_NOT_READY;
+	if (pcd_atapi(cd, rc_cmd, 8, pcd_scratch, DBMSG("check media")))
+		return CDS_NO_DISC;
+	return CDS_DISC_OK;
+}
+
+static int pcd_identify(struct pcd_unit *cd, char *id)
+{
+	int k, s;
+	char id_cmd[12] = { 0x12, 0, 0, 0, 36, 0, 0, 0, 0, 0, 0, 0 };
+
+	pcd_bufblk = -1;
+
+	s = pcd_atapi(cd, id_cmd, 36, pcd_buffer, "identify");
+
+	if (s)
+		return -1;
+	if ((pcd_buffer[0] & 0x1f) != 5) {
+		if (verbose)
+			printk("%s: %s is not a CD-ROM\n",
+			       cd->name, cd->drive ? "Slave" : "Master");
+		return -1;
+	}
+	memcpy(id, pcd_buffer + 16, 16);
+	id[16] = 0;
+	k = 16;
+	while ((k >= 0) && (id[k] <= 0x20)) {
+		id[k] = 0;
+		k--;
+	}
+
+	printk("%s: %s: %s\n", cd->name, cd->drive ? "Slave" : "Master", id);
+
+	return 0;
+}
+
+/*
+ * returns  0, with id set if drive is detected
+ *	    -1, if drive detection failed
+ */
+static int pcd_probe(struct pcd_unit *cd, int ms, char *id)
+{
+	if (ms == -1) {
+		for (cd->drive = 0; cd->drive <= 1; cd->drive++)
+			if (!pcd_reset(cd) && !pcd_identify(cd, id))
+				return 0;
+	} else {
+		cd->drive = ms;
+		if (!pcd_reset(cd) && !pcd_identify(cd, id))
+			return 0;
+	}
+	return -1;
+}
+
+static void pcd_probe_capabilities(void)
+{
+	int unit, r;
+	char buffer[32];
+	char cmd[12] = { 0x5a, 1 << 3, 0x2a, 0, 0, 0, 0, 18, 0, 0, 0, 0 };
+	struct pcd_unit *cd;
+
+	for (unit = 0, cd = pcd; unit < PCD_UNITS; unit++, cd++) {
+		if (!cd->present)
+			continue;
+		r = pcd_atapi(cd, cmd, 18, buffer, "mode sense capabilities");
+		if (r)
+			continue;
+		/* we should now have the cap page */
+		if ((buffer[11] & 1) == 0)
+			cd->info.mask |= CDC_CD_R;
+		if ((buffer[11] & 2) == 0)
+			cd->info.mask |= CDC_CD_RW;
+		if ((buffer[12] & 1) == 0)
+			cd->info.mask |= CDC_PLAY_AUDIO;
+		if ((buffer[14] & 1) == 0)
+			cd->info.mask |= CDC_LOCK;
+		if ((buffer[14] & 8) == 0)
+			cd->info.mask |= CDC_OPEN_TRAY;
+		if ((buffer[14] >> 6) == 0)
+			cd->info.mask |= CDC_CLOSE_TRAY;
+	}
+}
+
+static int pcd_detect(void)
+{
+	char id[18];
+	int k, unit;
+	struct pcd_unit *cd;
+
+	printk("%s: %s version %s, major %d, nice %d\n",
+	       name, name, PCD_VERSION, major, nice);
+
+	k = 0;
+	if (pcd_drive_count == 0) { /* nothing spec'd - so autoprobe for 1 */
+		cd = pcd;
+		if (pi_init(cd->pi, 1, -1, -1, -1, -1, -1, pcd_buffer,
+			    PI_PCD, verbose, cd->name)) {
+			if (!pcd_probe(cd, -1, id) && cd->disk) {
+				cd->present = 1;
+				k++;
+			} else
+				pi_release(cd->pi);
+		}
+	} else {
+		for (unit = 0, cd = pcd; unit < PCD_UNITS; unit++, cd++) {
+			int *conf = *drives[unit];
+			if (!conf[D_PRT])
+				continue;
+			if (!pi_init(cd->pi, 0, conf[D_PRT], conf[D_MOD],
+				     conf[D_UNI], conf[D_PRO], conf[D_DLY],
+				     pcd_buffer, PI_PCD, verbose, cd->name)) 
+				continue;
+			if (!pcd_probe(cd, conf[D_SLV], id) && cd->disk) {
+				cd->present = 1;
+				k++;
+			} else
+				pi_release(cd->pi);
+		}
+	}
+	if (k)
+		return 0;
+
+	printk("%s: No CD-ROM drive found\n", name);
+	for (unit = 0, cd = pcd; unit < PCD_UNITS; unit++, cd++)
+		put_disk(cd->disk);
+	return -1;
+}
+
+/* I/O request processing */
+static struct request_queue *pcd_queue;
+
+static void do_pcd_request(struct request_queue * q)
+{
+	if (pcd_busy)
+		return;
+	while (1) {
+		if (!pcd_req) {
+			pcd_req = blk_fetch_request(q);
+			if (!pcd_req)
+				return;
+		}
+
+		if (rq_data_dir(pcd_req) == READ) {
+			struct pcd_unit *cd = pcd_req->rq_disk->private_data;
+			if (cd != pcd_current)
+				pcd_bufblk = -1;
+			pcd_current = cd;
+			pcd_sector = blk_rq_pos(pcd_req);
+			pcd_count = blk_rq_cur_sectors(pcd_req);
+			pcd_buf = pcd_req->buffer;
+			pcd_busy = 1;
+			ps_set_intr(do_pcd_read, NULL, 0, nice);
+			return;
+		} else {
+			__blk_end_request_all(pcd_req, -EIO);
+			pcd_req = NULL;
+		}
+	}
+}
+
+static inline void next_request(int err)
+{
+	unsigned long saved_flags;
+
+	spin_lock_irqsave(&pcd_lock, saved_flags);
+	if (!__blk_end_request_cur(pcd_req, err))
+		pcd_req = NULL;
+	pcd_busy = 0;
+	do_pcd_request(pcd_queue);
+	spin_unlock_irqrestore(&pcd_lock, saved_flags);
+}
+
+static int pcd_ready(void)
+{
+	return (((status_reg(pcd_current) & (IDE_BUSY | IDE_DRQ)) == IDE_DRQ));
+}
+
+static void pcd_transfer(void)
+{
+
+	while (pcd_count && (pcd_sector / 4 == pcd_bufblk)) {
+		int o = (pcd_sector % 4) * 512;
+		memcpy(pcd_buf, pcd_buffer + o, 512);
+		pcd_count--;
+		pcd_buf += 512;
+		pcd_sector++;
+	}
+}
+
+static void pcd_start(void)
+{
+	int b, i;
+	char rd_cmd[12] = { 0xa8, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 };
+
+	pcd_bufblk = pcd_sector / 4;
+	b = pcd_bufblk;
+	for (i = 0; i < 4; i++) {
+		rd_cmd[5 - i] = b & 0xff;
+		b = b >> 8;
+	}
+
+	if (pcd_command(pcd_current, rd_cmd, 2048, "read block")) {
+		pcd_bufblk = -1;
+		next_request(-EIO);
+		return;
+	}
+
+	mdelay(1);
+
+	ps_set_intr(do_pcd_read_drq, pcd_ready, PCD_TMO, nice);
+}
+
+static void do_pcd_read(void)
+{
+	pcd_busy = 1;
+	pcd_retries = 0;
+	pcd_transfer();
+	if (!pcd_count) {
+		next_request(0);
+		return;
+	}
+
+	pi_do_claimed(pcd_current->pi, pcd_start);
+}
+
+static void do_pcd_read_drq(void)
+{
+	unsigned long saved_flags;
+
+	if (pcd_completion(pcd_current, pcd_buffer, "read block")) {
+		if (pcd_retries < PCD_RETRIES) {
+			mdelay(1);
+			pcd_retries++;
+			pi_do_claimed(pcd_current->pi, pcd_start);
+			return;
+		}
+		pcd_bufblk = -1;
+		next_request(-EIO);
+		return;
+	}
+
+	do_pcd_read();
+	spin_lock_irqsave(&pcd_lock, saved_flags);
+	do_pcd_request(pcd_queue);
+	spin_unlock_irqrestore(&pcd_lock, saved_flags);
+}
+
+/* the audio_ioctl stuff is adapted from sr_ioctl.c */
+
+static int pcd_audio_ioctl(struct cdrom_device_info *cdi, unsigned int cmd, void *arg)
+{
+	struct pcd_unit *cd = cdi->handle;
+
+	switch (cmd) {
+
+	case CDROMREADTOCHDR:
+
+		{
+			char cmd[12] =
+			    { GPCMD_READ_TOC_PMA_ATIP, 0, 0, 0, 0, 0, 0, 0, 12,
+			 0, 0, 0 };
+			struct cdrom_tochdr *tochdr =
+			    (struct cdrom_tochdr *) arg;
+			char buffer[32];
+			int r;
+
+			r = pcd_atapi(cd, cmd, 12, buffer, "read toc header");
+
+			tochdr->cdth_trk0 = buffer[2];
+			tochdr->cdth_trk1 = buffer[3];
+
+			return r ? -EIO : 0;
+		}
+
+	case CDROMREADTOCENTRY:
+
+		{
+			char cmd[12] =
+			    { GPCMD_READ_TOC_PMA_ATIP, 0, 0, 0, 0, 0, 0, 0, 12,
+			 0, 0, 0 };
+
+			struct cdrom_tocentry *tocentry =
+			    (struct cdrom_tocentry *) arg;
+			unsigned char buffer[32];
+			int r;
+
+			cmd[1] =
+			    (tocentry->cdte_format == CDROM_MSF ? 0x02 : 0);
+			cmd[6] = tocentry->cdte_track;
+
+			r = pcd_atapi(cd, cmd, 12, buffer, "read toc entry");
+
+			tocentry->cdte_ctrl = buffer[5] & 0xf;
+			tocentry->cdte_adr = buffer[5] >> 4;
+			tocentry->cdte_datamode =
+			    (tocentry->cdte_ctrl & 0x04) ? 1 : 0;
+			if (tocentry->cdte_format == CDROM_MSF) {
+				tocentry->cdte_addr.msf.minute = buffer[9];
+				tocentry->cdte_addr.msf.second = buffer[10];
+				tocentry->cdte_addr.msf.frame = buffer[11];
+			} else
+				tocentry->cdte_addr.lba =
+				    (((((buffer[8] << 8) + buffer[9]) << 8)
+				      + buffer[10]) << 8) + buffer[11];
+
+			return r ? -EIO : 0;
+		}
+
+	default:
+
+		return -ENOSYS;
+	}
+}
+
+static int pcd_get_mcn(struct cdrom_device_info *cdi, struct cdrom_mcn *mcn)
+{
+	char cmd[12] =
+	    { GPCMD_READ_SUBCHANNEL, 0, 0x40, 2, 0, 0, 0, 0, 24, 0, 0, 0 };
+	char buffer[32];
+
+	if (pcd_atapi(cdi->handle, cmd, 24, buffer, "get mcn"))
+		return -EIO;
+
+	memcpy(mcn->medium_catalog_number, buffer + 9, 13);
+	mcn->medium_catalog_number[13] = 0;
+
+	return 0;
+}
+
+static int __init pcd_init(void)
+{
+	struct pcd_unit *cd;
+	int unit;
+
+	if (disable)
+		return -EINVAL;
+
+	pcd_init_units();
+
+	if (pcd_detect())
+		return -ENODEV;
+
+	/* get the atapi capabilities page */
+	pcd_probe_capabilities();
+
+	if (register_blkdev(major, name)) {
+		for (unit = 0, cd = pcd; unit < PCD_UNITS; unit++, cd++)
+			put_disk(cd->disk);
+		return -EBUSY;
+	}
+
+	pcd_queue = blk_init_queue(do_pcd_request, &pcd_lock);
+	if (!pcd_queue) {
+		unregister_blkdev(major, name);
+		for (unit = 0, cd = pcd; unit < PCD_UNITS; unit++, cd++)
+			put_disk(cd->disk);
+		return -ENOMEM;
+	}
+
+	for (unit = 0, cd = pcd; unit < PCD_UNITS; unit++, cd++) {
+		if (cd->present) {
+			register_cdrom(&cd->info);
+			cd->disk->private_data = cd;
+			cd->disk->queue = pcd_queue;
+			add_disk(cd->disk);
+		}
+	}
+
+	return 0;
+}
+
+static void __exit pcd_exit(void)
+{
+	struct pcd_unit *cd;
+	int unit;
+
+	for (unit = 0, cd = pcd; unit < PCD_UNITS; unit++, cd++) {
+		if (cd->present) {
+			del_gendisk(cd->disk);
+			pi_release(cd->pi);
+			unregister_cdrom(&cd->info);
+		}
+		put_disk(cd->disk);
+	}
+	blk_cleanup_queue(pcd_queue);
+	unregister_blkdev(major, name);
+}
+
+MODULE_LICENSE("GPL");
+module_init(pcd_init)
+module_exit(pcd_exit)
diff --git a/drivers/block/paride/pd.c b/drivers/block/paride/pd.c
new file mode 100644
index 00000000..869e7676
--- /dev/null
+++ b/drivers/block/paride/pd.c
@@ -0,0 +1,957 @@
+/* 
+        pd.c    (c) 1997-8  Grant R. Guenther <grant@torque.net>
+                            Under the terms of the GNU General Public License.
+
+        This is the high-level driver for parallel port IDE hard
+        drives based on chips supported by the paride module.
+
+	By default, the driver will autoprobe for a single parallel
+	port IDE drive, but if their individual parameters are
+        specified, the driver can handle up to 4 drives.
+
+        The behaviour of the pd driver can be altered by setting
+        some parameters from the insmod command line.  The following
+        parameters are adjustable:
+ 
+	    drive0  	These four arguments can be arrays of	    
+	    drive1	1-8 integers as follows:
+	    drive2
+	    drive3	<prt>,<pro>,<uni>,<mod>,<geo>,<sby>,<dly>,<slv>
+
+			Where,
+
+		<prt>	is the base of the parallel port address for
+			the corresponding drive.  (required)
+
+		<pro>   is the protocol number for the adapter that
+			supports this drive.  These numbers are
+                        logged by 'paride' when the protocol modules
+			are initialised.  (0 if not given)
+
+		<uni>   for those adapters that support chained
+			devices, this is the unit selector for the
+		        chain of devices on the given port.  It should
+			be zero for devices that don't support chaining.
+			(0 if not given)
+
+		<mod>   this can be -1 to choose the best mode, or one
+		        of the mode numbers supported by the adapter.
+			(-1 if not given)
+
+		<geo>   this defaults to 0 to indicate that the driver
+			should use the CHS geometry provided by the drive
+			itself.  If set to 1, the driver will provide
+			a logical geometry with 64 heads and 32 sectors
+			per track, to be consistent with most SCSI
+		        drivers.  (0 if not given)
+
+		<sby>   set this to zero to disable the power saving
+			standby mode, if needed.  (1 if not given)
+
+		<dly>   some parallel ports require the driver to 
+			go more slowly.  -1 sets a default value that
+			should work with the chosen protocol.  Otherwise,
+			set this to a small integer, the larger it is
+			the slower the port i/o.  In some cases, setting
+			this to zero will speed up the device. (default -1)
+
+		<slv>   IDE disks can be jumpered to master or slave.
+                        Set this to 0 to choose the master drive, 1 to
+                        choose the slave, -1 (the default) to choose the
+                        first drive found.
+			
+
+            major       You may use this parameter to overide the
+                        default major number (45) that this driver
+                        will use.  Be sure to change the device
+                        name as well.
+
+            name        This parameter is a character string that
+                        contains the name the kernel will use for this
+                        device (in /proc output, for instance).
+			(default "pd")
+
+	    cluster	The driver will attempt to aggregate requests
+			for adjacent blocks into larger multi-block
+			clusters.  The maximum cluster size (in 512
+			byte sectors) is set with this parameter.
+			(default 64)
+
+	    verbose	This parameter controls the amount of logging
+			that the driver will do.  Set it to 0 for 
+			normal operation, 1 to see autoprobe progress
+			messages, or 2 to see additional debugging
+			output.  (default 0)
+
+            nice        This parameter controls the driver's use of
+                        idle CPU time, at the expense of some speed.
+
+        If this driver is built into the kernel, you can use kernel
+        the following command line parameters, with the same values
+        as the corresponding module parameters listed above:
+
+            pd.drive0
+            pd.drive1
+            pd.drive2
+            pd.drive3
+            pd.cluster
+            pd.nice
+
+        In addition, you can use the parameter pd.disable to disable
+        the driver entirely.
+ 
+*/
+
+/* Changes:
+
+	1.01	GRG 1997.01.24	Restored pd_reset()
+				Added eject ioctl
+	1.02    GRG 1998.05.06  SMP spinlock changes, 
+				Added slave support
+	1.03    GRG 1998.06.16  Eliminate an Ugh.
+	1.04	GRG 1998.08.15  Extra debugging, use HZ in loop timing
+	1.05    GRG 1998.09.24  Added jumbo support
+
+*/
+
+#define PD_VERSION      "1.05"
+#define PD_MAJOR	45
+#define PD_NAME		"pd"
+#define PD_UNITS	4
+
+/* Here are things one can override from the insmod command.
+   Most are autoprobed by paride unless set here.  Verbose is off
+   by default.
+
+*/
+
+static int verbose = 0;
+static int major = PD_MAJOR;
+static char *name = PD_NAME;
+static int cluster = 64;
+static int nice = 0;
+static int disable = 0;
+
+static int drive0[8] = { 0, 0, 0, -1, 0, 1, -1, -1 };
+static int drive1[8] = { 0, 0, 0, -1, 0, 1, -1, -1 };
+static int drive2[8] = { 0, 0, 0, -1, 0, 1, -1, -1 };
+static int drive3[8] = { 0, 0, 0, -1, 0, 1, -1, -1 };
+
+static int (*drives[4])[8] = {&drive0, &drive1, &drive2, &drive3};
+
+enum {D_PRT, D_PRO, D_UNI, D_MOD, D_GEO, D_SBY, D_DLY, D_SLV};
+
+/* end of parameters */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/gfp.h>
+#include <linux/fs.h>
+#include <linux/delay.h>
+#include <linux/hdreg.h>
+#include <linux/cdrom.h>	/* for the eject ioctl */
+#include <linux/blkdev.h>
+#include <linux/blkpg.h>
+#include <linux/kernel.h>
+#include <linux/mutex.h>
+#include <asm/uaccess.h>
+#include <linux/workqueue.h>
+
+static DEFINE_MUTEX(pd_mutex);
+static DEFINE_SPINLOCK(pd_lock);
+
+module_param(verbose, bool, 0);
+module_param(major, int, 0);
+module_param(name, charp, 0);
+module_param(cluster, int, 0);
+module_param(nice, int, 0);
+module_param_array(drive0, int, NULL, 0);
+module_param_array(drive1, int, NULL, 0);
+module_param_array(drive2, int, NULL, 0);
+module_param_array(drive3, int, NULL, 0);
+
+#include "paride.h"
+
+#define PD_BITS    4
+
+/* numbers for "SCSI" geometry */
+
+#define PD_LOG_HEADS    64
+#define PD_LOG_SECTS    32
+
+#define PD_ID_OFF       54
+#define PD_ID_LEN       14
+
+#define PD_MAX_RETRIES  5
+#define PD_TMO          800	/* interrupt timeout in jiffies */
+#define PD_SPIN_DEL     50	/* spin delay in micro-seconds  */
+
+#define PD_SPIN         (1000000*PD_TMO)/(HZ*PD_SPIN_DEL)
+
+#define STAT_ERR        0x00001
+#define STAT_INDEX      0x00002
+#define STAT_ECC        0x00004
+#define STAT_DRQ        0x00008
+#define STAT_SEEK       0x00010
+#define STAT_WRERR      0x00020
+#define STAT_READY      0x00040
+#define STAT_BUSY       0x00080
+
+#define ERR_AMNF        0x00100
+#define ERR_TK0NF       0x00200
+#define ERR_ABRT        0x00400
+#define ERR_MCR         0x00800
+#define ERR_IDNF        0x01000
+#define ERR_MC          0x02000
+#define ERR_UNC         0x04000
+#define ERR_TMO         0x10000
+
+#define IDE_READ        	0x20
+#define IDE_WRITE       	0x30
+#define IDE_READ_VRFY		0x40
+#define IDE_INIT_DEV_PARMS	0x91
+#define IDE_STANDBY     	0x96
+#define IDE_ACKCHANGE   	0xdb
+#define IDE_DOORLOCK    	0xde
+#define IDE_DOORUNLOCK  	0xdf
+#define IDE_IDENTIFY    	0xec
+#define IDE_EJECT		0xed
+
+#define PD_NAMELEN	8
+
+struct pd_unit {
+	struct pi_adapter pia;	/* interface to paride layer */
+	struct pi_adapter *pi;
+	int access;		/* count of active opens ... */
+	int capacity;		/* Size of this volume in sectors */
+	int heads;		/* physical geometry */
+	int sectors;
+	int cylinders;
+	int can_lba;
+	int drive;		/* master=0 slave=1 */
+	int changed;		/* Have we seen a disk change ? */
+	int removable;		/* removable media device  ?  */
+	int standby;
+	int alt_geom;
+	char name[PD_NAMELEN];	/* pda, pdb, etc ... */
+	struct gendisk *gd;
+};
+
+static struct pd_unit pd[PD_UNITS];
+
+static char pd_scratch[512];	/* scratch block buffer */
+
+static char *pd_errs[17] = { "ERR", "INDEX", "ECC", "DRQ", "SEEK", "WRERR",
+	"READY", "BUSY", "AMNF", "TK0NF", "ABRT", "MCR",
+	"IDNF", "MC", "UNC", "???", "TMO"
+};
+
+static inline int status_reg(struct pd_unit *disk)
+{
+	return pi_read_regr(disk->pi, 1, 6);
+}
+
+static inline int read_reg(struct pd_unit *disk, int reg)
+{
+	return pi_read_regr(disk->pi, 0, reg);
+}
+
+static inline void write_status(struct pd_unit *disk, int val)
+{
+	pi_write_regr(disk->pi, 1, 6, val);
+}
+
+static inline void write_reg(struct pd_unit *disk, int reg, int val)
+{
+	pi_write_regr(disk->pi, 0, reg, val);
+}
+
+static inline u8 DRIVE(struct pd_unit *disk)
+{
+	return 0xa0+0x10*disk->drive;
+}
+
+/*  ide command interface */
+
+static void pd_print_error(struct pd_unit *disk, char *msg, int status)
+{
+	int i;
+
+	printk("%s: %s: status = 0x%x =", disk->name, msg, status);
+	for (i = 0; i < ARRAY_SIZE(pd_errs); i++)
+		if (status & (1 << i))
+			printk(" %s", pd_errs[i]);
+	printk("\n");
+}
+
+static void pd_reset(struct pd_unit *disk)
+{				/* called only for MASTER drive */
+	write_status(disk, 4);
+	udelay(50);
+	write_status(disk, 0);
+	udelay(250);
+}
+
+#define DBMSG(msg)	((verbose>1)?(msg):NULL)
+
+static int pd_wait_for(struct pd_unit *disk, int w, char *msg)
+{				/* polled wait */
+	int k, r, e;
+
+	k = 0;
+	while (k < PD_SPIN) {
+		r = status_reg(disk);
+		k++;
+		if (((r & w) == w) && !(r & STAT_BUSY))
+			break;
+		udelay(PD_SPIN_DEL);
+	}
+	e = (read_reg(disk, 1) << 8) + read_reg(disk, 7);
+	if (k >= PD_SPIN)
+		e |= ERR_TMO;
+	if ((e & (STAT_ERR | ERR_TMO)) && (msg != NULL))
+		pd_print_error(disk, msg, e);
+	return e;
+}
+
+static void pd_send_command(struct pd_unit *disk, int n, int s, int h, int c0, int c1, int func)
+{
+	write_reg(disk, 6, DRIVE(disk) + h);
+	write_reg(disk, 1, 0);		/* the IDE task file */
+	write_reg(disk, 2, n);
+	write_reg(disk, 3, s);
+	write_reg(disk, 4, c0);
+	write_reg(disk, 5, c1);
+	write_reg(disk, 7, func);
+
+	udelay(1);
+}
+
+static void pd_ide_command(struct pd_unit *disk, int func, int block, int count)
+{
+	int c1, c0, h, s;
+
+	if (disk->can_lba) {
+		s = block & 255;
+		c0 = (block >>= 8) & 255;
+		c1 = (block >>= 8) & 255;
+		h = ((block >>= 8) & 15) + 0x40;
+	} else {
+		s = (block % disk->sectors) + 1;
+		h = (block /= disk->sectors) % disk->heads;
+		c0 = (block /= disk->heads) % 256;
+		c1 = (block >>= 8);
+	}
+	pd_send_command(disk, count, s, h, c0, c1, func);
+}
+
+/* The i/o request engine */
+
+enum action {Fail = 0, Ok = 1, Hold, Wait};
+
+static struct request *pd_req;	/* current request */
+static enum action (*phase)(void);
+
+static void run_fsm(void);
+
+static void ps_tq_int(struct work_struct *work);
+
+static DECLARE_DELAYED_WORK(fsm_tq, ps_tq_int);
+
+static void schedule_fsm(void)
+{
+	if (!nice)
+		schedule_delayed_work(&fsm_tq, 0);
+	else
+		schedule_delayed_work(&fsm_tq, nice-1);
+}
+
+static void ps_tq_int(struct work_struct *work)
+{
+	run_fsm();
+}
+
+static enum action do_pd_io_start(void);
+static enum action pd_special(void);
+static enum action do_pd_read_start(void);
+static enum action do_pd_write_start(void);
+static enum action do_pd_read_drq(void);
+static enum action do_pd_write_done(void);
+
+static struct request_queue *pd_queue;
+static int pd_claimed;
+
+static struct pd_unit *pd_current; /* current request's drive */
+static PIA *pi_current; /* current request's PIA */
+
+static void run_fsm(void)
+{
+	while (1) {
+		enum action res;
+		unsigned long saved_flags;
+		int stop = 0;
+
+		if (!phase) {
+			pd_current = pd_req->rq_disk->private_data;
+			pi_current = pd_current->pi;
+			phase = do_pd_io_start;
+		}
+
+		switch (pd_claimed) {
+			case 0:
+				pd_claimed = 1;
+				if (!pi_schedule_claimed(pi_current, run_fsm))
+					return;
+			case 1:
+				pd_claimed = 2;
+				pi_current->proto->connect(pi_current);
+		}
+
+		switch(res = phase()) {
+			case Ok: case Fail:
+				pi_disconnect(pi_current);
+				pd_claimed = 0;
+				phase = NULL;
+				spin_lock_irqsave(&pd_lock, saved_flags);
+				if (!__blk_end_request_cur(pd_req,
+						res == Ok ? 0 : -EIO)) {
+					pd_req = blk_fetch_request(pd_queue);
+					if (!pd_req)
+						stop = 1;
+				}
+				spin_unlock_irqrestore(&pd_lock, saved_flags);
+				if (stop)
+					return;
+			case Hold:
+				schedule_fsm();
+				return;
+			case Wait:
+				pi_disconnect(pi_current);
+				pd_claimed = 0;
+		}
+	}
+}
+
+static int pd_retries = 0;	/* i/o error retry count */
+static int pd_block;		/* address of next requested block */
+static int pd_count;		/* number of blocks still to do */
+static int pd_run;		/* sectors in current cluster */
+static int pd_cmd;		/* current command READ/WRITE */
+static char *pd_buf;		/* buffer for request in progress */
+
+static enum action do_pd_io_start(void)
+{
+	if (pd_req->cmd_type == REQ_TYPE_SPECIAL) {
+		phase = pd_special;
+		return pd_special();
+	}
+
+	pd_cmd = rq_data_dir(pd_req);
+	if (pd_cmd == READ || pd_cmd == WRITE) {
+		pd_block = blk_rq_pos(pd_req);
+		pd_count = blk_rq_cur_sectors(pd_req);
+		if (pd_block + pd_count > get_capacity(pd_req->rq_disk))
+			return Fail;
+		pd_run = blk_rq_sectors(pd_req);
+		pd_buf = pd_req->buffer;
+		pd_retries = 0;
+		if (pd_cmd == READ)
+			return do_pd_read_start();
+		else
+			return do_pd_write_start();
+	}
+	return Fail;
+}
+
+static enum action pd_special(void)
+{
+	enum action (*func)(struct pd_unit *) = pd_req->special;
+	return func(pd_current);
+}
+
+static int pd_next_buf(void)
+{
+	unsigned long saved_flags;
+
+	pd_count--;
+	pd_run--;
+	pd_buf += 512;
+	pd_block++;
+	if (!pd_run)
+		return 1;
+	if (pd_count)
+		return 0;
+	spin_lock_irqsave(&pd_lock, saved_flags);
+	__blk_end_request_cur(pd_req, 0);
+	pd_count = blk_rq_cur_sectors(pd_req);
+	pd_buf = pd_req->buffer;
+	spin_unlock_irqrestore(&pd_lock, saved_flags);
+	return 0;
+}
+
+static unsigned long pd_timeout;
+
+static enum action do_pd_read_start(void)
+{
+	if (pd_wait_for(pd_current, STAT_READY, "do_pd_read") & STAT_ERR) {
+		if (pd_retries < PD_MAX_RETRIES) {
+			pd_retries++;
+			return Wait;
+		}
+		return Fail;
+	}
+	pd_ide_command(pd_current, IDE_READ, pd_block, pd_run);
+	phase = do_pd_read_drq;
+	pd_timeout = jiffies + PD_TMO;
+	return Hold;
+}
+
+static enum action do_pd_write_start(void)
+{
+	if (pd_wait_for(pd_current, STAT_READY, "do_pd_write") & STAT_ERR) {
+		if (pd_retries < PD_MAX_RETRIES) {
+			pd_retries++;
+			return Wait;
+		}
+		return Fail;
+	}
+	pd_ide_command(pd_current, IDE_WRITE, pd_block, pd_run);
+	while (1) {
+		if (pd_wait_for(pd_current, STAT_DRQ, "do_pd_write_drq") & STAT_ERR) {
+			if (pd_retries < PD_MAX_RETRIES) {
+				pd_retries++;
+				return Wait;
+			}
+			return Fail;
+		}
+		pi_write_block(pd_current->pi, pd_buf, 512);
+		if (pd_next_buf())
+			break;
+	}
+	phase = do_pd_write_done;
+	pd_timeout = jiffies + PD_TMO;
+	return Hold;
+}
+
+static inline int pd_ready(void)
+{
+	return !(status_reg(pd_current) & STAT_BUSY);
+}
+
+static enum action do_pd_read_drq(void)
+{
+	if (!pd_ready() && !time_after_eq(jiffies, pd_timeout))
+		return Hold;
+
+	while (1) {
+		if (pd_wait_for(pd_current, STAT_DRQ, "do_pd_read_drq") & STAT_ERR) {
+			if (pd_retries < PD_MAX_RETRIES) {
+				pd_retries++;
+				phase = do_pd_read_start;
+				return Wait;
+			}
+			return Fail;
+		}
+		pi_read_block(pd_current->pi, pd_buf, 512);
+		if (pd_next_buf())
+			break;
+	}
+	return Ok;
+}
+
+static enum action do_pd_write_done(void)
+{
+	if (!pd_ready() && !time_after_eq(jiffies, pd_timeout))
+		return Hold;
+
+	if (pd_wait_for(pd_current, STAT_READY, "do_pd_write_done") & STAT_ERR) {
+		if (pd_retries < PD_MAX_RETRIES) {
+			pd_retries++;
+			phase = do_pd_write_start;
+			return Wait;
+		}
+		return Fail;
+	}
+	return Ok;
+}
+
+/* special io requests */
+
+/* According to the ATA standard, the default CHS geometry should be
+   available following a reset.  Some Western Digital drives come up
+   in a mode where only LBA addresses are accepted until the device
+   parameters are initialised.
+*/
+
+static void pd_init_dev_parms(struct pd_unit *disk)
+{
+	pd_wait_for(disk, 0, DBMSG("before init_dev_parms"));
+	pd_send_command(disk, disk->sectors, 0, disk->heads - 1, 0, 0,
+			IDE_INIT_DEV_PARMS);
+	udelay(300);
+	pd_wait_for(disk, 0, "Initialise device parameters");
+}
+
+static enum action pd_door_lock(struct pd_unit *disk)
+{
+	if (!(pd_wait_for(disk, STAT_READY, "Lock") & STAT_ERR)) {
+		pd_send_command(disk, 1, 0, 0, 0, 0, IDE_DOORLOCK);
+		pd_wait_for(disk, STAT_READY, "Lock done");
+	}
+	return Ok;
+}
+
+static enum action pd_door_unlock(struct pd_unit *disk)
+{
+	if (!(pd_wait_for(disk, STAT_READY, "Lock") & STAT_ERR)) {
+		pd_send_command(disk, 1, 0, 0, 0, 0, IDE_DOORUNLOCK);
+		pd_wait_for(disk, STAT_READY, "Lock done");
+	}
+	return Ok;
+}
+
+static enum action pd_eject(struct pd_unit *disk)
+{
+	pd_wait_for(disk, 0, DBMSG("before unlock on eject"));
+	pd_send_command(disk, 1, 0, 0, 0, 0, IDE_DOORUNLOCK);
+	pd_wait_for(disk, 0, DBMSG("after unlock on eject"));
+	pd_wait_for(disk, 0, DBMSG("before eject"));
+	pd_send_command(disk, 0, 0, 0, 0, 0, IDE_EJECT);
+	pd_wait_for(disk, 0, DBMSG("after eject"));
+	return Ok;
+}
+
+static enum action pd_media_check(struct pd_unit *disk)
+{
+	int r = pd_wait_for(disk, STAT_READY, DBMSG("before media_check"));
+	if (!(r & STAT_ERR)) {
+		pd_send_command(disk, 1, 1, 0, 0, 0, IDE_READ_VRFY);
+		r = pd_wait_for(disk, STAT_READY, DBMSG("RDY after READ_VRFY"));
+	} else
+		disk->changed = 1;	/* say changed if other error */
+	if (r & ERR_MC) {
+		disk->changed = 1;
+		pd_send_command(disk, 1, 0, 0, 0, 0, IDE_ACKCHANGE);
+		pd_wait_for(disk, STAT_READY, DBMSG("RDY after ACKCHANGE"));
+		pd_send_command(disk, 1, 1, 0, 0, 0, IDE_READ_VRFY);
+		r = pd_wait_for(disk, STAT_READY, DBMSG("RDY after VRFY"));
+	}
+	return Ok;
+}
+
+static void pd_standby_off(struct pd_unit *disk)
+{
+	pd_wait_for(disk, 0, DBMSG("before STANDBY"));
+	pd_send_command(disk, 0, 0, 0, 0, 0, IDE_STANDBY);
+	pd_wait_for(disk, 0, DBMSG("after STANDBY"));
+}
+
+static enum action pd_identify(struct pd_unit *disk)
+{
+	int j;
+	char id[PD_ID_LEN + 1];
+
+/* WARNING:  here there may be dragons.  reset() applies to both drives,
+   but we call it only on probing the MASTER. This should allow most
+   common configurations to work, but be warned that a reset can clear
+   settings on the SLAVE drive.
+*/
+
+	if (disk->drive == 0)
+		pd_reset(disk);
+
+	write_reg(disk, 6, DRIVE(disk));
+	pd_wait_for(disk, 0, DBMSG("before IDENT"));
+	pd_send_command(disk, 1, 0, 0, 0, 0, IDE_IDENTIFY);
+
+	if (pd_wait_for(disk, STAT_DRQ, DBMSG("IDENT DRQ")) & STAT_ERR)
+		return Fail;
+	pi_read_block(disk->pi, pd_scratch, 512);
+	disk->can_lba = pd_scratch[99] & 2;
+	disk->sectors = le16_to_cpu(*(__le16 *) (pd_scratch + 12));
+	disk->heads = le16_to_cpu(*(__le16 *) (pd_scratch + 6));
+	disk->cylinders = le16_to_cpu(*(__le16 *) (pd_scratch + 2));
+	if (disk->can_lba)
+		disk->capacity = le32_to_cpu(*(__le32 *) (pd_scratch + 120));
+	else
+		disk->capacity = disk->sectors * disk->heads * disk->cylinders;
+
+	for (j = 0; j < PD_ID_LEN; j++)
+		id[j ^ 1] = pd_scratch[j + PD_ID_OFF];
+	j = PD_ID_LEN - 1;
+	while ((j >= 0) && (id[j] <= 0x20))
+		j--;
+	j++;
+	id[j] = 0;
+
+	disk->removable = pd_scratch[0] & 0x80;
+
+	printk("%s: %s, %s, %d blocks [%dM], (%d/%d/%d), %s media\n",
+	       disk->name, id,
+	       disk->drive ? "slave" : "master",
+	       disk->capacity, disk->capacity / 2048,
+	       disk->cylinders, disk->heads, disk->sectors,
+	       disk->removable ? "removable" : "fixed");
+
+	if (disk->capacity)
+		pd_init_dev_parms(disk);
+	if (!disk->standby)
+		pd_standby_off(disk);
+
+	return Ok;
+}
+
+/* end of io request engine */
+
+static void do_pd_request(struct request_queue * q)
+{
+	if (pd_req)
+		return;
+	pd_req = blk_fetch_request(q);
+	if (!pd_req)
+		return;
+
+	schedule_fsm();
+}
+
+static int pd_special_command(struct pd_unit *disk,
+		      enum action (*func)(struct pd_unit *disk))
+{
+	struct request *rq;
+	int err = 0;
+
+	rq = blk_get_request(disk->gd->queue, READ, __GFP_WAIT);
+
+	rq->cmd_type = REQ_TYPE_SPECIAL;
+	rq->special = func;
+
+	err = blk_execute_rq(disk->gd->queue, disk->gd, rq, 0);
+
+	blk_put_request(rq);
+	return err;
+}
+
+/* kernel glue structures */
+
+static int pd_open(struct block_device *bdev, fmode_t mode)
+{
+	struct pd_unit *disk = bdev->bd_disk->private_data;
+
+	mutex_lock(&pd_mutex);
+	disk->access++;
+
+	if (disk->removable) {
+		pd_special_command(disk, pd_media_check);
+		pd_special_command(disk, pd_door_lock);
+	}
+	mutex_unlock(&pd_mutex);
+	return 0;
+}
+
+static int pd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
+{
+	struct pd_unit *disk = bdev->bd_disk->private_data;
+
+	if (disk->alt_geom) {
+		geo->heads = PD_LOG_HEADS;
+		geo->sectors = PD_LOG_SECTS;
+		geo->cylinders = disk->capacity / (geo->heads * geo->sectors);
+	} else {
+		geo->heads = disk->heads;
+		geo->sectors = disk->sectors;
+		geo->cylinders = disk->cylinders;
+	}
+
+	return 0;
+}
+
+static int pd_ioctl(struct block_device *bdev, fmode_t mode,
+	 unsigned int cmd, unsigned long arg)
+{
+	struct pd_unit *disk = bdev->bd_disk->private_data;
+
+	switch (cmd) {
+	case CDROMEJECT:
+		mutex_lock(&pd_mutex);
+		if (disk->access == 1)
+			pd_special_command(disk, pd_eject);
+		mutex_unlock(&pd_mutex);
+		return 0;
+	default:
+		return -EINVAL;
+	}
+}
+
+static int pd_release(struct gendisk *p, fmode_t mode)
+{
+	struct pd_unit *disk = p->private_data;
+
+	mutex_lock(&pd_mutex);
+	if (!--disk->access && disk->removable)
+		pd_special_command(disk, pd_door_unlock);
+	mutex_unlock(&pd_mutex);
+
+	return 0;
+}
+
+static unsigned int pd_check_events(struct gendisk *p, unsigned int clearing)
+{
+	struct pd_unit *disk = p->private_data;
+	int r;
+	if (!disk->removable)
+		return 0;
+	pd_special_command(disk, pd_media_check);
+	r = disk->changed;
+	disk->changed = 0;
+	return r ? DISK_EVENT_MEDIA_CHANGE : 0;
+}
+
+static int pd_revalidate(struct gendisk *p)
+{
+	struct pd_unit *disk = p->private_data;
+	if (pd_special_command(disk, pd_identify) == 0)
+		set_capacity(p, disk->capacity);
+	else
+		set_capacity(p, 0);
+	return 0;
+}
+
+static const struct block_device_operations pd_fops = {
+	.owner		= THIS_MODULE,
+	.open		= pd_open,
+	.release	= pd_release,
+	.ioctl		= pd_ioctl,
+	.getgeo		= pd_getgeo,
+	.check_events	= pd_check_events,
+	.revalidate_disk= pd_revalidate
+};
+
+/* probing */
+
+static void pd_probe_drive(struct pd_unit *disk)
+{
+	struct gendisk *p = alloc_disk(1 << PD_BITS);
+	if (!p)
+		return;
+	strcpy(p->disk_name, disk->name);
+	p->fops = &pd_fops;
+	p->major = major;
+	p->first_minor = (disk - pd) << PD_BITS;
+	disk->gd = p;
+	p->private_data = disk;
+	p->queue = pd_queue;
+
+	if (disk->drive == -1) {
+		for (disk->drive = 0; disk->drive <= 1; disk->drive++)
+			if (pd_special_command(disk, pd_identify) == 0)
+				return;
+	} else if (pd_special_command(disk, pd_identify) == 0)
+		return;
+	disk->gd = NULL;
+	put_disk(p);
+}
+
+static int pd_detect(void)
+{
+	int found = 0, unit, pd_drive_count = 0;
+	struct pd_unit *disk;
+
+	for (unit = 0; unit < PD_UNITS; unit++) {
+		int *parm = *drives[unit];
+		struct pd_unit *disk = pd + unit;
+		disk->pi = &disk->pia;
+		disk->access = 0;
+		disk->changed = 1;
+		disk->capacity = 0;
+		disk->drive = parm[D_SLV];
+		snprintf(disk->name, PD_NAMELEN, "%s%c", name, 'a'+unit);
+		disk->alt_geom = parm[D_GEO];
+		disk->standby = parm[D_SBY];
+		if (parm[D_PRT])
+			pd_drive_count++;
+	}
+
+	if (pd_drive_count == 0) { /* nothing spec'd - so autoprobe for 1 */
+		disk = pd;
+		if (pi_init(disk->pi, 1, -1, -1, -1, -1, -1, pd_scratch,
+			    PI_PD, verbose, disk->name)) {
+			pd_probe_drive(disk);
+			if (!disk->gd)
+				pi_release(disk->pi);
+		}
+
+	} else {
+		for (unit = 0, disk = pd; unit < PD_UNITS; unit++, disk++) {
+			int *parm = *drives[unit];
+			if (!parm[D_PRT])
+				continue;
+			if (pi_init(disk->pi, 0, parm[D_PRT], parm[D_MOD],
+				     parm[D_UNI], parm[D_PRO], parm[D_DLY],
+				     pd_scratch, PI_PD, verbose, disk->name)) {
+				pd_probe_drive(disk);
+				if (!disk->gd)
+					pi_release(disk->pi);
+			}
+		}
+	}
+	for (unit = 0, disk = pd; unit < PD_UNITS; unit++, disk++) {
+		if (disk->gd) {
+			set_capacity(disk->gd, disk->capacity);
+			add_disk(disk->gd);
+			found = 1;
+		}
+	}
+	if (!found)
+		printk("%s: no valid drive found\n", name);
+	return found;
+}
+
+static int __init pd_init(void)
+{
+	if (disable)
+		goto out1;
+
+	pd_queue = blk_init_queue(do_pd_request, &pd_lock);
+	if (!pd_queue)
+		goto out1;
+
+	blk_queue_max_hw_sectors(pd_queue, cluster);
+
+	if (register_blkdev(major, name))
+		goto out2;
+
+	printk("%s: %s version %s, major %d, cluster %d, nice %d\n",
+	       name, name, PD_VERSION, major, cluster, nice);
+	if (!pd_detect())
+		goto out3;
+
+	return 0;
+
+out3:
+	unregister_blkdev(major, name);
+out2:
+	blk_cleanup_queue(pd_queue);
+out1:
+	return -ENODEV;
+}
+
+static void __exit pd_exit(void)
+{
+	struct pd_unit *disk;
+	int unit;
+	unregister_blkdev(major, name);
+	for (unit = 0, disk = pd; unit < PD_UNITS; unit++, disk++) {
+		struct gendisk *p = disk->gd;
+		if (p) {
+			disk->gd = NULL;
+			del_gendisk(p);
+			put_disk(p);
+			pi_release(disk->pi);
+		}
+	}
+	blk_cleanup_queue(pd_queue);
+}
+
+MODULE_LICENSE("GPL");
+module_init(pd_init)
+module_exit(pd_exit)
diff --git a/drivers/block/paride/pf.c b/drivers/block/paride/pf.c
new file mode 100644
index 00000000..f21b520e
--- /dev/null
+++ b/drivers/block/paride/pf.c
@@ -0,0 +1,1006 @@
+/* 
+        pf.c    (c) 1997-8  Grant R. Guenther <grant@torque.net>
+                            Under the terms of the GNU General Public License.
+
+        This is the high-level driver for parallel port ATAPI disk
+        drives based on chips supported by the paride module.
+
+        By default, the driver will autoprobe for a single parallel
+        port ATAPI disk drive, but if their individual parameters are
+        specified, the driver can handle up to 4 drives.
+
+        The behaviour of the pf driver can be altered by setting
+        some parameters from the insmod command line.  The following
+        parameters are adjustable:
+
+            drive0      These four arguments can be arrays of       
+            drive1      1-7 integers as follows:
+            drive2
+            drive3      <prt>,<pro>,<uni>,<mod>,<slv>,<lun>,<dly>
+
+                        Where,
+
+                <prt>   is the base of the parallel port address for
+                        the corresponding drive.  (required)
+
+                <pro>   is the protocol number for the adapter that
+                        supports this drive.  These numbers are
+                        logged by 'paride' when the protocol modules
+                        are initialised.  (0 if not given)
+
+                <uni>   for those adapters that support chained
+                        devices, this is the unit selector for the
+                        chain of devices on the given port.  It should
+                        be zero for devices that don't support chaining.
+                        (0 if not given)
+
+                <mod>   this can be -1 to choose the best mode, or one
+                        of the mode numbers supported by the adapter.
+                        (-1 if not given)
+
+                <slv>   ATAPI CDroms can be jumpered to master or slave.
+                        Set this to 0 to choose the master drive, 1 to
+                        choose the slave, -1 (the default) to choose the
+                        first drive found.
+
+		<lun>   Some ATAPI devices support multiple LUNs.
+                        One example is the ATAPI PD/CD drive from
+                        Matshita/Panasonic.  This device has a 
+                        CD drive on LUN 0 and a PD drive on LUN 1.
+                        By default, the driver will search for the
+                        first LUN with a supported device.  Set 
+                        this parameter to force it to use a specific
+                        LUN.  (default -1)
+
+                <dly>   some parallel ports require the driver to 
+                        go more slowly.  -1 sets a default value that
+                        should work with the chosen protocol.  Otherwise,
+                        set this to a small integer, the larger it is
+                        the slower the port i/o.  In some cases, setting
+                        this to zero will speed up the device. (default -1)
+
+	    major	You may use this parameter to overide the
+			default major number (47) that this driver
+			will use.  Be sure to change the device
+			name as well.
+
+	    name	This parameter is a character string that
+			contains the name the kernel will use for this
+			device (in /proc output, for instance).
+			(default "pf").
+
+            cluster     The driver will attempt to aggregate requests
+                        for adjacent blocks into larger multi-block
+                        clusters.  The maximum cluster size (in 512
+                        byte sectors) is set with this parameter.
+                        (default 64)
+
+            verbose     This parameter controls the amount of logging
+                        that the driver will do.  Set it to 0 for
+                        normal operation, 1 to see autoprobe progress
+                        messages, or 2 to see additional debugging
+                        output.  (default 0)
+ 
+	    nice        This parameter controls the driver's use of
+			idle CPU time, at the expense of some speed.
+
+        If this driver is built into the kernel, you can use the
+        following command line parameters, with the same values
+        as the corresponding module parameters listed above:
+
+            pf.drive0
+            pf.drive1
+            pf.drive2
+            pf.drive3
+	    pf.cluster
+            pf.nice
+
+        In addition, you can use the parameter pf.disable to disable
+        the driver entirely.
+
+*/
+
+/* Changes:
+
+	1.01	GRG 1998.05.03  Changes for SMP.  Eliminate sti().
+				Fix for drives that don't clear STAT_ERR
+			        until after next CDB delivered.
+				Small change in pf_completion to round
+				up transfer size.
+	1.02    GRG 1998.06.16  Eliminated an Ugh
+	1.03    GRG 1998.08.16  Use HZ in loop timings, extra debugging
+	1.04    GRG 1998.09.24  Added jumbo support
+
+*/
+
+#define PF_VERSION      "1.04"
+#define PF_MAJOR	47
+#define PF_NAME		"pf"
+#define PF_UNITS	4
+
+/* Here are things one can override from the insmod command.
+   Most are autoprobed by paride unless set here.  Verbose is off
+   by default.
+
+*/
+
+static int verbose = 0;
+static int major = PF_MAJOR;
+static char *name = PF_NAME;
+static int cluster = 64;
+static int nice = 0;
+static int disable = 0;
+
+static int drive0[7] = { 0, 0, 0, -1, -1, -1, -1 };
+static int drive1[7] = { 0, 0, 0, -1, -1, -1, -1 };
+static int drive2[7] = { 0, 0, 0, -1, -1, -1, -1 };
+static int drive3[7] = { 0, 0, 0, -1, -1, -1, -1 };
+
+static int (*drives[4])[7] = {&drive0, &drive1, &drive2, &drive3};
+static int pf_drive_count;
+
+enum {D_PRT, D_PRO, D_UNI, D_MOD, D_SLV, D_LUN, D_DLY};
+
+/* end of parameters */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/fs.h>
+#include <linux/delay.h>
+#include <linux/hdreg.h>
+#include <linux/cdrom.h>
+#include <linux/spinlock.h>
+#include <linux/blkdev.h>
+#include <linux/blkpg.h>
+#include <linux/mutex.h>
+#include <asm/uaccess.h>
+
+static DEFINE_MUTEX(pf_mutex);
+static DEFINE_SPINLOCK(pf_spin_lock);
+
+module_param(verbose, bool, 0644);
+module_param(major, int, 0);
+module_param(name, charp, 0);
+module_param(cluster, int, 0);
+module_param(nice, int, 0);
+module_param_array(drive0, int, NULL, 0);
+module_param_array(drive1, int, NULL, 0);
+module_param_array(drive2, int, NULL, 0);
+module_param_array(drive3, int, NULL, 0);
+
+#include "paride.h"
+#include "pseudo.h"
+
+/* constants for faking geometry numbers */
+
+#define PF_FD_MAX	8192	/* use FD geometry under this size */
+#define PF_FD_HDS	2
+#define PF_FD_SPT	18
+#define PF_HD_HDS	64
+#define PF_HD_SPT	32
+
+#define PF_MAX_RETRIES  5
+#define PF_TMO          800	/* interrupt timeout in jiffies */
+#define PF_SPIN_DEL     50	/* spin delay in micro-seconds  */
+
+#define PF_SPIN         (1000000*PF_TMO)/(HZ*PF_SPIN_DEL)
+
+#define STAT_ERR        0x00001
+#define STAT_INDEX      0x00002
+#define STAT_ECC        0x00004
+#define STAT_DRQ        0x00008
+#define STAT_SEEK       0x00010
+#define STAT_WRERR      0x00020
+#define STAT_READY      0x00040
+#define STAT_BUSY       0x00080
+
+#define ATAPI_REQ_SENSE		0x03
+#define ATAPI_LOCK		0x1e
+#define ATAPI_DOOR		0x1b
+#define ATAPI_MODE_SENSE	0x5a
+#define ATAPI_CAPACITY		0x25
+#define ATAPI_IDENTIFY		0x12
+#define ATAPI_READ_10		0x28
+#define ATAPI_WRITE_10		0x2a
+
+static int pf_open(struct block_device *bdev, fmode_t mode);
+static void do_pf_request(struct request_queue * q);
+static int pf_ioctl(struct block_device *bdev, fmode_t mode,
+		    unsigned int cmd, unsigned long arg);
+static int pf_getgeo(struct block_device *bdev, struct hd_geometry *geo);
+
+static int pf_release(struct gendisk *disk, fmode_t mode);
+
+static int pf_detect(void);
+static void do_pf_read(void);
+static void do_pf_read_start(void);
+static void do_pf_write(void);
+static void do_pf_write_start(void);
+static void do_pf_read_drq(void);
+static void do_pf_write_done(void);
+
+#define PF_NM           0
+#define PF_RO           1
+#define PF_RW           2
+
+#define PF_NAMELEN      8
+
+struct pf_unit {
+	struct pi_adapter pia;	/* interface to paride layer */
+	struct pi_adapter *pi;
+	int removable;		/* removable media device  ?  */
+	int media_status;	/* media present ?  WP ? */
+	int drive;		/* drive */
+	int lun;
+	int access;		/* count of active opens ... */
+	int present;		/* device present ? */
+	char name[PF_NAMELEN];	/* pf0, pf1, ... */
+	struct gendisk *disk;
+};
+
+static struct pf_unit units[PF_UNITS];
+
+static int pf_identify(struct pf_unit *pf);
+static void pf_lock(struct pf_unit *pf, int func);
+static void pf_eject(struct pf_unit *pf);
+static unsigned int pf_check_events(struct gendisk *disk,
+				    unsigned int clearing);
+
+static char pf_scratch[512];	/* scratch block buffer */
+
+/* the variables below are used mainly in the I/O request engine, which
+   processes only one request at a time.
+*/
+
+static int pf_retries = 0;	/* i/o error retry count */
+static int pf_busy = 0;		/* request being processed ? */
+static struct request *pf_req;	/* current request */
+static int pf_block;		/* address of next requested block */
+static int pf_count;		/* number of blocks still to do */
+static int pf_run;		/* sectors in current cluster */
+static int pf_cmd;		/* current command READ/WRITE */
+static struct pf_unit *pf_current;/* unit of current request */
+static int pf_mask;		/* stopper for pseudo-int */
+static char *pf_buf;		/* buffer for request in progress */
+
+/* kernel glue structures */
+
+static const struct block_device_operations pf_fops = {
+	.owner		= THIS_MODULE,
+	.open		= pf_open,
+	.release	= pf_release,
+	.ioctl		= pf_ioctl,
+	.getgeo		= pf_getgeo,
+	.check_events	= pf_check_events,
+};
+
+static void __init pf_init_units(void)
+{
+	struct pf_unit *pf;
+	int unit;
+
+	pf_drive_count = 0;
+	for (unit = 0, pf = units; unit < PF_UNITS; unit++, pf++) {
+		struct gendisk *disk = alloc_disk(1);
+		if (!disk)
+			continue;
+		pf->disk = disk;
+		pf->pi = &pf->pia;
+		pf->media_status = PF_NM;
+		pf->drive = (*drives[unit])[D_SLV];
+		pf->lun = (*drives[unit])[D_LUN];
+		snprintf(pf->name, PF_NAMELEN, "%s%d", name, unit);
+		disk->major = major;
+		disk->first_minor = unit;
+		strcpy(disk->disk_name, pf->name);
+		disk->fops = &pf_fops;
+		if (!(*drives[unit])[D_PRT])
+			pf_drive_count++;
+	}
+}
+
+static int pf_open(struct block_device *bdev, fmode_t mode)
+{
+	struct pf_unit *pf = bdev->bd_disk->private_data;
+	int ret;
+
+	mutex_lock(&pf_mutex);
+	pf_identify(pf);
+
+	ret = -ENODEV;
+	if (pf->media_status == PF_NM)
+		goto out;
+
+	ret = -EROFS;
+	if ((pf->media_status == PF_RO) && (mode & FMODE_WRITE))
+		goto out;
+
+	ret = 0;
+	pf->access++;
+	if (pf->removable)
+		pf_lock(pf, 1);
+out:
+	mutex_unlock(&pf_mutex);
+	return ret;
+}
+
+static int pf_getgeo(struct block_device *bdev, struct hd_geometry *geo)
+{
+	struct pf_unit *pf = bdev->bd_disk->private_data;
+	sector_t capacity = get_capacity(pf->disk);
+
+	if (capacity < PF_FD_MAX) {
+		geo->cylinders = sector_div(capacity, PF_FD_HDS * PF_FD_SPT);
+		geo->heads = PF_FD_HDS;
+		geo->sectors = PF_FD_SPT;
+	} else {
+		geo->cylinders = sector_div(capacity, PF_HD_HDS * PF_HD_SPT);
+		geo->heads = PF_HD_HDS;
+		geo->sectors = PF_HD_SPT;
+	}
+
+	return 0;
+}
+
+static int pf_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, unsigned long arg)
+{
+	struct pf_unit *pf = bdev->bd_disk->private_data;
+
+	if (cmd != CDROMEJECT)
+		return -EINVAL;
+
+	if (pf->access != 1)
+		return -EBUSY;
+	mutex_lock(&pf_mutex);
+	pf_eject(pf);
+	mutex_unlock(&pf_mutex);
+
+	return 0;
+}
+
+static int pf_release(struct gendisk *disk, fmode_t mode)
+{
+	struct pf_unit *pf = disk->private_data;
+
+	mutex_lock(&pf_mutex);
+	if (pf->access <= 0) {
+		mutex_unlock(&pf_mutex);
+		return -EINVAL;
+	}
+
+	pf->access--;
+
+	if (!pf->access && pf->removable)
+		pf_lock(pf, 0);
+
+	mutex_unlock(&pf_mutex);
+	return 0;
+
+}
+
+static unsigned int pf_check_events(struct gendisk *disk, unsigned int clearing)
+{
+	return DISK_EVENT_MEDIA_CHANGE;
+}
+
+static inline int status_reg(struct pf_unit *pf)
+{
+	return pi_read_regr(pf->pi, 1, 6);
+}
+
+static inline int read_reg(struct pf_unit *pf, int reg)
+{
+	return pi_read_regr(pf->pi, 0, reg);
+}
+
+static inline void write_reg(struct pf_unit *pf, int reg, int val)
+{
+	pi_write_regr(pf->pi, 0, reg, val);
+}
+
+static int pf_wait(struct pf_unit *pf, int go, int stop, char *fun, char *msg)
+{
+	int j, r, e, s, p;
+
+	j = 0;
+	while ((((r = status_reg(pf)) & go) || (stop && (!(r & stop))))
+	       && (j++ < PF_SPIN))
+		udelay(PF_SPIN_DEL);
+
+	if ((r & (STAT_ERR & stop)) || (j > PF_SPIN)) {
+		s = read_reg(pf, 7);
+		e = read_reg(pf, 1);
+		p = read_reg(pf, 2);
+		if (j > PF_SPIN)
+			e |= 0x100;
+		if (fun)
+			printk("%s: %s %s: alt=0x%x stat=0x%x err=0x%x"
+			       " loop=%d phase=%d\n",
+			       pf->name, fun, msg, r, s, e, j, p);
+		return (e << 8) + s;
+	}
+	return 0;
+}
+
+static int pf_command(struct pf_unit *pf, char *cmd, int dlen, char *fun)
+{
+	pi_connect(pf->pi);
+
+	write_reg(pf, 6, 0xa0+0x10*pf->drive);
+
+	if (pf_wait(pf, STAT_BUSY | STAT_DRQ, 0, fun, "before command")) {
+		pi_disconnect(pf->pi);
+		return -1;
+	}
+
+	write_reg(pf, 4, dlen % 256);
+	write_reg(pf, 5, dlen / 256);
+	write_reg(pf, 7, 0xa0);	/* ATAPI packet command */
+
+	if (pf_wait(pf, STAT_BUSY, STAT_DRQ, fun, "command DRQ")) {
+		pi_disconnect(pf->pi);
+		return -1;
+	}
+
+	if (read_reg(pf, 2) != 1) {
+		printk("%s: %s: command phase error\n", pf->name, fun);
+		pi_disconnect(pf->pi);
+		return -1;
+	}
+
+	pi_write_block(pf->pi, cmd, 12);
+
+	return 0;
+}
+
+static int pf_completion(struct pf_unit *pf, char *buf, char *fun)
+{
+	int r, s, n;
+
+	r = pf_wait(pf, STAT_BUSY, STAT_DRQ | STAT_READY | STAT_ERR,
+		    fun, "completion");
+
+	if ((read_reg(pf, 2) & 2) && (read_reg(pf, 7) & STAT_DRQ)) {
+		n = (((read_reg(pf, 4) + 256 * read_reg(pf, 5)) +
+		      3) & 0xfffc);
+		pi_read_block(pf->pi, buf, n);
+	}
+
+	s = pf_wait(pf, STAT_BUSY, STAT_READY | STAT_ERR, fun, "data done");
+
+	pi_disconnect(pf->pi);
+
+	return (r ? r : s);
+}
+
+static void pf_req_sense(struct pf_unit *pf, int quiet)
+{
+	char rs_cmd[12] =
+	    { ATAPI_REQ_SENSE, pf->lun << 5, 0, 0, 16, 0, 0, 0, 0, 0, 0, 0 };
+	char buf[16];
+	int r;
+
+	r = pf_command(pf, rs_cmd, 16, "Request sense");
+	mdelay(1);
+	if (!r)
+		pf_completion(pf, buf, "Request sense");
+
+	if ((!r) && (!quiet))
+		printk("%s: Sense key: %x, ASC: %x, ASQ: %x\n",
+		       pf->name, buf[2] & 0xf, buf[12], buf[13]);
+}
+
+static int pf_atapi(struct pf_unit *pf, char *cmd, int dlen, char *buf, char *fun)
+{
+	int r;
+
+	r = pf_command(pf, cmd, dlen, fun);
+	mdelay(1);
+	if (!r)
+		r = pf_completion(pf, buf, fun);
+	if (r)
+		pf_req_sense(pf, !fun);
+
+	return r;
+}
+
+static void pf_lock(struct pf_unit *pf, int func)
+{
+	char lo_cmd[12] = { ATAPI_LOCK, pf->lun << 5, 0, 0, func, 0, 0, 0, 0, 0, 0, 0 };
+
+	pf_atapi(pf, lo_cmd, 0, pf_scratch, func ? "lock" : "unlock");
+}
+
+static void pf_eject(struct pf_unit *pf)
+{
+	char ej_cmd[12] = { ATAPI_DOOR, pf->lun << 5, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0 };
+
+	pf_lock(pf, 0);
+	pf_atapi(pf, ej_cmd, 0, pf_scratch, "eject");
+}
+
+#define PF_RESET_TMO   30	/* in tenths of a second */
+
+static void pf_sleep(int cs)
+{
+	schedule_timeout_interruptible(cs);
+}
+
+/* the ATAPI standard actually specifies the contents of all 7 registers
+   after a reset, but the specification is ambiguous concerning the last
+   two bytes, and different drives interpret the standard differently.
+ */
+
+static int pf_reset(struct pf_unit *pf)
+{
+	int i, k, flg;
+	int expect[5] = { 1, 1, 1, 0x14, 0xeb };
+
+	pi_connect(pf->pi);
+	write_reg(pf, 6, 0xa0+0x10*pf->drive);
+	write_reg(pf, 7, 8);
+
+	pf_sleep(20 * HZ / 1000);
+
+	k = 0;
+	while ((k++ < PF_RESET_TMO) && (status_reg(pf) & STAT_BUSY))
+		pf_sleep(HZ / 10);
+
+	flg = 1;
+	for (i = 0; i < 5; i++)
+		flg &= (read_reg(pf, i + 1) == expect[i]);
+
+	if (verbose) {
+		printk("%s: Reset (%d) signature = ", pf->name, k);
+		for (i = 0; i < 5; i++)
+			printk("%3x", read_reg(pf, i + 1));
+		if (!flg)
+			printk(" (incorrect)");
+		printk("\n");
+	}
+
+	pi_disconnect(pf->pi);
+	return flg - 1;
+}
+
+static void pf_mode_sense(struct pf_unit *pf)
+{
+	char ms_cmd[12] =
+	    { ATAPI_MODE_SENSE, pf->lun << 5, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0 };
+	char buf[8];
+
+	pf_atapi(pf, ms_cmd, 8, buf, "mode sense");
+	pf->media_status = PF_RW;
+	if (buf[3] & 0x80)
+		pf->media_status = PF_RO;
+}
+
+static void xs(char *buf, char *targ, int offs, int len)
+{
+	int j, k, l;
+
+	j = 0;
+	l = 0;
+	for (k = 0; k < len; k++)
+		if ((buf[k + offs] != 0x20) || (buf[k + offs] != l))
+			l = targ[j++] = buf[k + offs];
+	if (l == 0x20)
+		j--;
+	targ[j] = 0;
+}
+
+static int xl(char *buf, int offs)
+{
+	int v, k;
+
+	v = 0;
+	for (k = 0; k < 4; k++)
+		v = v * 256 + (buf[k + offs] & 0xff);
+	return v;
+}
+
+static void pf_get_capacity(struct pf_unit *pf)
+{
+	char rc_cmd[12] = { ATAPI_CAPACITY, pf->lun << 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+	char buf[8];
+	int bs;
+
+	if (pf_atapi(pf, rc_cmd, 8, buf, "get capacity")) {
+		pf->media_status = PF_NM;
+		return;
+	}
+	set_capacity(pf->disk, xl(buf, 0) + 1);
+	bs = xl(buf, 4);
+	if (bs != 512) {
+		set_capacity(pf->disk, 0);
+		if (verbose)
+			printk("%s: Drive %d, LUN %d,"
+			       " unsupported block size %d\n",
+			       pf->name, pf->drive, pf->lun, bs);
+	}
+}
+
+static int pf_identify(struct pf_unit *pf)
+{
+	int dt, s;
+	char *ms[2] = { "master", "slave" };
+	char mf[10], id[18];
+	char id_cmd[12] =
+	    { ATAPI_IDENTIFY, pf->lun << 5, 0, 0, 36, 0, 0, 0, 0, 0, 0, 0 };
+	char buf[36];
+
+	s = pf_atapi(pf, id_cmd, 36, buf, "identify");
+	if (s)
+		return -1;
+
+	dt = buf[0] & 0x1f;
+	if ((dt != 0) && (dt != 7)) {
+		if (verbose)
+			printk("%s: Drive %d, LUN %d, unsupported type %d\n",
+			       pf->name, pf->drive, pf->lun, dt);
+		return -1;
+	}
+
+	xs(buf, mf, 8, 8);
+	xs(buf, id, 16, 16);
+
+	pf->removable = (buf[1] & 0x80);
+
+	pf_mode_sense(pf);
+	pf_mode_sense(pf);
+	pf_mode_sense(pf);
+
+	pf_get_capacity(pf);
+
+	printk("%s: %s %s, %s LUN %d, type %d",
+	       pf->name, mf, id, ms[pf->drive], pf->lun, dt);
+	if (pf->removable)
+		printk(", removable");
+	if (pf->media_status == PF_NM)
+		printk(", no media\n");
+	else {
+		if (pf->media_status == PF_RO)
+			printk(", RO");
+		printk(", %llu blocks\n",
+			(unsigned long long)get_capacity(pf->disk));
+	}
+	return 0;
+}
+
+/*	returns  0, with id set if drive is detected
+	        -1, if drive detection failed
+*/
+static int pf_probe(struct pf_unit *pf)
+{
+	if (pf->drive == -1) {
+		for (pf->drive = 0; pf->drive <= 1; pf->drive++)
+			if (!pf_reset(pf)) {
+				if (pf->lun != -1)
+					return pf_identify(pf);
+				else
+					for (pf->lun = 0; pf->lun < 8; pf->lun++)
+						if (!pf_identify(pf))
+							return 0;
+			}
+	} else {
+		if (pf_reset(pf))
+			return -1;
+		if (pf->lun != -1)
+			return pf_identify(pf);
+		for (pf->lun = 0; pf->lun < 8; pf->lun++)
+			if (!pf_identify(pf))
+				return 0;
+	}
+	return -1;
+}
+
+static int pf_detect(void)
+{
+	struct pf_unit *pf = units;
+	int k, unit;
+
+	printk("%s: %s version %s, major %d, cluster %d, nice %d\n",
+	       name, name, PF_VERSION, major, cluster, nice);
+
+	k = 0;
+	if (pf_drive_count == 0) {
+		if (pi_init(pf->pi, 1, -1, -1, -1, -1, -1, pf_scratch, PI_PF,
+			    verbose, pf->name)) {
+			if (!pf_probe(pf) && pf->disk) {
+				pf->present = 1;
+				k++;
+			} else
+				pi_release(pf->pi);
+		}
+
+	} else
+		for (unit = 0; unit < PF_UNITS; unit++, pf++) {
+			int *conf = *drives[unit];
+			if (!conf[D_PRT])
+				continue;
+			if (pi_init(pf->pi, 0, conf[D_PRT], conf[D_MOD],
+				    conf[D_UNI], conf[D_PRO], conf[D_DLY],
+				    pf_scratch, PI_PF, verbose, pf->name)) {
+				if (pf->disk && !pf_probe(pf)) {
+					pf->present = 1;
+					k++;
+				} else
+					pi_release(pf->pi);
+			}
+		}
+	if (k)
+		return 0;
+
+	printk("%s: No ATAPI disk detected\n", name);
+	for (pf = units, unit = 0; unit < PF_UNITS; pf++, unit++)
+		put_disk(pf->disk);
+	return -1;
+}
+
+/* The i/o request engine */
+
+static int pf_start(struct pf_unit *pf, int cmd, int b, int c)
+{
+	int i;
+	char io_cmd[12] = { cmd, pf->lun << 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+
+	for (i = 0; i < 4; i++) {
+		io_cmd[5 - i] = b & 0xff;
+		b = b >> 8;
+	}
+
+	io_cmd[8] = c & 0xff;
+	io_cmd[7] = (c >> 8) & 0xff;
+
+	i = pf_command(pf, io_cmd, c * 512, "start i/o");
+
+	mdelay(1);
+
+	return i;
+}
+
+static int pf_ready(void)
+{
+	return (((status_reg(pf_current) & (STAT_BUSY | pf_mask)) == pf_mask));
+}
+
+static struct request_queue *pf_queue;
+
+static void pf_end_request(int err)
+{
+	if (pf_req && !__blk_end_request_cur(pf_req, err))
+		pf_req = NULL;
+}
+
+static void do_pf_request(struct request_queue * q)
+{
+	if (pf_busy)
+		return;
+repeat:
+	if (!pf_req) {
+		pf_req = blk_fetch_request(q);
+		if (!pf_req)
+			return;
+	}
+
+	pf_current = pf_req->rq_disk->private_data;
+	pf_block = blk_rq_pos(pf_req);
+	pf_run = blk_rq_sectors(pf_req);
+	pf_count = blk_rq_cur_sectors(pf_req);
+
+	if (pf_block + pf_count > get_capacity(pf_req->rq_disk)) {
+		pf_end_request(-EIO);
+		goto repeat;
+	}
+
+	pf_cmd = rq_data_dir(pf_req);
+	pf_buf = pf_req->buffer;
+	pf_retries = 0;
+
+	pf_busy = 1;
+	if (pf_cmd == READ)
+		pi_do_claimed(pf_current->pi, do_pf_read);
+	else if (pf_cmd == WRITE)
+		pi_do_claimed(pf_current->pi, do_pf_write);
+	else {
+		pf_busy = 0;
+		pf_end_request(-EIO);
+		goto repeat;
+	}
+}
+
+static int pf_next_buf(void)
+{
+	unsigned long saved_flags;
+
+	pf_count--;
+	pf_run--;
+	pf_buf += 512;
+	pf_block++;
+	if (!pf_run)
+		return 1;
+	if (!pf_count) {
+		spin_lock_irqsave(&pf_spin_lock, saved_flags);
+		pf_end_request(0);
+		spin_unlock_irqrestore(&pf_spin_lock, saved_flags);
+		if (!pf_req)
+			return 1;
+		pf_count = blk_rq_cur_sectors(pf_req);
+		pf_buf = pf_req->buffer;
+	}
+	return 0;
+}
+
+static inline void next_request(int err)
+{
+	unsigned long saved_flags;
+
+	spin_lock_irqsave(&pf_spin_lock, saved_flags);
+	pf_end_request(err);
+	pf_busy = 0;
+	do_pf_request(pf_queue);
+	spin_unlock_irqrestore(&pf_spin_lock, saved_flags);
+}
+
+/* detach from the calling context - in case the spinlock is held */
+static void do_pf_read(void)
+{
+	ps_set_intr(do_pf_read_start, NULL, 0, nice);
+}
+
+static void do_pf_read_start(void)
+{
+	pf_busy = 1;
+
+	if (pf_start(pf_current, ATAPI_READ_10, pf_block, pf_run)) {
+		pi_disconnect(pf_current->pi);
+		if (pf_retries < PF_MAX_RETRIES) {
+			pf_retries++;
+			pi_do_claimed(pf_current->pi, do_pf_read_start);
+			return;
+		}
+		next_request(-EIO);
+		return;
+	}
+	pf_mask = STAT_DRQ;
+	ps_set_intr(do_pf_read_drq, pf_ready, PF_TMO, nice);
+}
+
+static void do_pf_read_drq(void)
+{
+	while (1) {
+		if (pf_wait(pf_current, STAT_BUSY, STAT_DRQ | STAT_ERR,
+			    "read block", "completion") & STAT_ERR) {
+			pi_disconnect(pf_current->pi);
+			if (pf_retries < PF_MAX_RETRIES) {
+				pf_req_sense(pf_current, 0);
+				pf_retries++;
+				pi_do_claimed(pf_current->pi, do_pf_read_start);
+				return;
+			}
+			next_request(-EIO);
+			return;
+		}
+		pi_read_block(pf_current->pi, pf_buf, 512);
+		if (pf_next_buf())
+			break;
+	}
+	pi_disconnect(pf_current->pi);
+	next_request(0);
+}
+
+static void do_pf_write(void)
+{
+	ps_set_intr(do_pf_write_start, NULL, 0, nice);
+}
+
+static void do_pf_write_start(void)
+{
+	pf_busy = 1;
+
+	if (pf_start(pf_current, ATAPI_WRITE_10, pf_block, pf_run)) {
+		pi_disconnect(pf_current->pi);
+		if (pf_retries < PF_MAX_RETRIES) {
+			pf_retries++;
+			pi_do_claimed(pf_current->pi, do_pf_write_start);
+			return;
+		}
+		next_request(-EIO);
+		return;
+	}
+
+	while (1) {
+		if (pf_wait(pf_current, STAT_BUSY, STAT_DRQ | STAT_ERR,
+			    "write block", "data wait") & STAT_ERR) {
+			pi_disconnect(pf_current->pi);
+			if (pf_retries < PF_MAX_RETRIES) {
+				pf_retries++;
+				pi_do_claimed(pf_current->pi, do_pf_write_start);
+				return;
+			}
+			next_request(-EIO);
+			return;
+		}
+		pi_write_block(pf_current->pi, pf_buf, 512);
+		if (pf_next_buf())
+			break;
+	}
+	pf_mask = 0;
+	ps_set_intr(do_pf_write_done, pf_ready, PF_TMO, nice);
+}
+
+static void do_pf_write_done(void)
+{
+	if (pf_wait(pf_current, STAT_BUSY, 0, "write block", "done") & STAT_ERR) {
+		pi_disconnect(pf_current->pi);
+		if (pf_retries < PF_MAX_RETRIES) {
+			pf_retries++;
+			pi_do_claimed(pf_current->pi, do_pf_write_start);
+			return;
+		}
+		next_request(-EIO);
+		return;
+	}
+	pi_disconnect(pf_current->pi);
+	next_request(0);
+}
+
+static int __init pf_init(void)
+{				/* preliminary initialisation */
+	struct pf_unit *pf;
+	int unit;
+
+	if (disable)
+		return -EINVAL;
+
+	pf_init_units();
+
+	if (pf_detect())
+		return -ENODEV;
+	pf_busy = 0;
+
+	if (register_blkdev(major, name)) {
+		for (pf = units, unit = 0; unit < PF_UNITS; pf++, unit++)
+			put_disk(pf->disk);
+		return -EBUSY;
+	}
+	pf_queue = blk_init_queue(do_pf_request, &pf_spin_lock);
+	if (!pf_queue) {
+		unregister_blkdev(major, name);
+		for (pf = units, unit = 0; unit < PF_UNITS; pf++, unit++)
+			put_disk(pf->disk);
+		return -ENOMEM;
+	}
+
+	blk_queue_max_segments(pf_queue, cluster);
+
+	for (pf = units, unit = 0; unit < PF_UNITS; pf++, unit++) {
+		struct gendisk *disk = pf->disk;
+
+		if (!pf->present)
+			continue;
+		disk->private_data = pf;
+		disk->queue = pf_queue;
+		add_disk(disk);
+	}
+	return 0;
+}
+
+static void __exit pf_exit(void)
+{
+	struct pf_unit *pf;
+	int unit;
+	unregister_blkdev(major, name);
+	for (pf = units, unit = 0; unit < PF_UNITS; pf++, unit++) {
+		if (!pf->present)
+			continue;
+		del_gendisk(pf->disk);
+		put_disk(pf->disk);
+		pi_release(pf->pi);
+	}
+	blk_cleanup_queue(pf_queue);
+}
+
+MODULE_LICENSE("GPL");
+module_init(pf_init)
+module_exit(pf_exit)
diff --git a/drivers/block/paride/pg.c b/drivers/block/paride/pg.c
new file mode 100644
index 00000000..6b9a2000
--- /dev/null
+++ b/drivers/block/paride/pg.c
@@ -0,0 +1,724 @@
+/* 
+	pg.c    (c) 1998  Grant R. Guenther <grant@torque.net>
+			  Under the terms of the GNU General Public License.
+
+	The pg driver provides a simple character device interface for
+	sending ATAPI commands to a device.  With the exception of the
+	ATAPI reset operation, all operations are performed by a pair
+	of read and write operations to the appropriate /dev/pgN device.
+	A write operation delivers a command and any outbound data in
+	a single buffer.  Normally, the write will succeed unless the
+	device is offline or malfunctioning, or there is already another
+	command pending.  If the write succeeds, it should be followed
+	immediately by a read operation, to obtain any returned data and
+	status information.  A read will fail if there is no operation
+	in progress.
+
+	As a special case, the device can be reset with a write operation,
+	and in this case, no following read is expected, or permitted.
+
+	There are no ioctl() operations.  Any single operation
+	may transfer at most PG_MAX_DATA bytes.  Note that the driver must
+	copy the data through an internal buffer.  In keeping with all
+	current ATAPI devices, command packets are assumed to be exactly
+	12 bytes in length.
+
+	To permit future changes to this interface, the headers in the
+	read and write buffers contain a single character "magic" flag.
+	Currently this flag must be the character "P".
+
+	By default, the driver will autoprobe for a single parallel
+	port ATAPI device, but if their individual parameters are
+	specified, the driver can handle up to 4 devices.
+
+	To use this device, you must have the following device 
+	special files defined:
+
+		/dev/pg0 c 97 0
+		/dev/pg1 c 97 1
+		/dev/pg2 c 97 2
+		/dev/pg3 c 97 3
+
+	(You'll need to change the 97 to something else if you use
+	the 'major' parameter to install the driver on a different
+	major number.)
+
+	The behaviour of the pg driver can be altered by setting
+	some parameters from the insmod command line.  The following
+	parameters are adjustable:
+
+	    drive0      These four arguments can be arrays of       
+	    drive1      1-6 integers as follows:
+	    drive2
+	    drive3      <prt>,<pro>,<uni>,<mod>,<slv>,<dly>
+
+			Where,
+
+		<prt>   is the base of the parallel port address for
+			the corresponding drive.  (required)
+
+		<pro>   is the protocol number for the adapter that
+			supports this drive.  These numbers are
+			logged by 'paride' when the protocol modules
+			are initialised.  (0 if not given)
+
+		<uni>   for those adapters that support chained
+			devices, this is the unit selector for the
+			chain of devices on the given port.  It should
+			be zero for devices that don't support chaining.
+			(0 if not given)
+
+		<mod>   this can be -1 to choose the best mode, or one
+			of the mode numbers supported by the adapter.
+			(-1 if not given)
+
+		<slv>   ATAPI devices can be jumpered to master or slave.
+			Set this to 0 to choose the master drive, 1 to
+			choose the slave, -1 (the default) to choose the
+			first drive found.
+
+		<dly>   some parallel ports require the driver to 
+			go more slowly.  -1 sets a default value that
+			should work with the chosen protocol.  Otherwise,
+			set this to a small integer, the larger it is
+			the slower the port i/o.  In some cases, setting
+			this to zero will speed up the device. (default -1)
+
+	    major	You may use this parameter to overide the
+			default major number (97) that this driver
+			will use.  Be sure to change the device
+			name as well.
+
+	    name	This parameter is a character string that
+			contains the name the kernel will use for this
+			device (in /proc output, for instance).
+			(default "pg").
+
+	    verbose     This parameter controls the amount of logging
+			that is done by the driver.  Set it to 0 for 
+			quiet operation, to 1 to enable progress
+			messages while the driver probes for devices,
+			or to 2 for full debug logging.  (default 0)
+
+	If this driver is built into the kernel, you can use 
+	the following command line parameters, with the same values
+	as the corresponding module parameters listed above:
+
+	    pg.drive0
+	    pg.drive1
+	    pg.drive2
+	    pg.drive3
+
+	In addition, you can use the parameter pg.disable to disable
+	the driver entirely.
+
+*/
+
+/* Changes:
+
+	1.01	GRG 1998.06.16	Bug fixes
+	1.02    GRG 1998.09.24  Added jumbo support
+
+*/
+
+#define PG_VERSION      "1.02"
+#define PG_MAJOR	97
+#define PG_NAME		"pg"
+#define PG_UNITS	4
+
+#ifndef PI_PG
+#define PI_PG	4
+#endif
+
+/* Here are things one can override from the insmod command.
+   Most are autoprobed by paride unless set here.  Verbose is 0
+   by default.
+
+*/
+
+static int verbose = 0;
+static int major = PG_MAJOR;
+static char *name = PG_NAME;
+static int disable = 0;
+
+static int drive0[6] = { 0, 0, 0, -1, -1, -1 };
+static int drive1[6] = { 0, 0, 0, -1, -1, -1 };
+static int drive2[6] = { 0, 0, 0, -1, -1, -1 };
+static int drive3[6] = { 0, 0, 0, -1, -1, -1 };
+
+static int (*drives[4])[6] = {&drive0, &drive1, &drive2, &drive3};
+static int pg_drive_count;
+
+enum {D_PRT, D_PRO, D_UNI, D_MOD, D_SLV, D_DLY};
+
+/* end of parameters */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/fs.h>
+#include <linux/delay.h>
+#include <linux/slab.h>
+#include <linux/mtio.h>
+#include <linux/pg.h>
+#include <linux/device.h>
+#include <linux/sched.h>	/* current, TASK_* */
+#include <linux/mutex.h>
+#include <linux/jiffies.h>
+
+#include <asm/uaccess.h>
+
+module_param(verbose, bool, 0644);
+module_param(major, int, 0);
+module_param(name, charp, 0);
+module_param_array(drive0, int, NULL, 0);
+module_param_array(drive1, int, NULL, 0);
+module_param_array(drive2, int, NULL, 0);
+module_param_array(drive3, int, NULL, 0);
+
+#include "paride.h"
+
+#define PG_SPIN_DEL     50	/* spin delay in micro-seconds  */
+#define PG_SPIN         200
+#define PG_TMO		HZ
+#define PG_RESET_TMO	10*HZ
+
+#define STAT_ERR        0x01
+#define STAT_INDEX      0x02
+#define STAT_ECC        0x04
+#define STAT_DRQ        0x08
+#define STAT_SEEK       0x10
+#define STAT_WRERR      0x20
+#define STAT_READY      0x40
+#define STAT_BUSY       0x80
+
+#define ATAPI_IDENTIFY		0x12
+
+static DEFINE_MUTEX(pg_mutex);
+static int pg_open(struct inode *inode, struct file *file);
+static int pg_release(struct inode *inode, struct file *file);
+static ssize_t pg_read(struct file *filp, char __user *buf,
+		       size_t count, loff_t * ppos);
+static ssize_t pg_write(struct file *filp, const char __user *buf,
+			size_t count, loff_t * ppos);
+static int pg_detect(void);
+
+#define PG_NAMELEN      8
+
+struct pg {
+	struct pi_adapter pia;	/* interface to paride layer */
+	struct pi_adapter *pi;
+	int busy;		/* write done, read expected */
+	int start;		/* jiffies at command start */
+	int dlen;		/* transfer size requested */
+	unsigned long timeout;	/* timeout requested */
+	int status;		/* last sense key */
+	int drive;		/* drive */
+	unsigned long access;	/* count of active opens ... */
+	int present;		/* device present ? */
+	char *bufptr;
+	char name[PG_NAMELEN];	/* pg0, pg1, ... */
+};
+
+static struct pg devices[PG_UNITS];
+
+static int pg_identify(struct pg *dev, int log);
+
+static char pg_scratch[512];	/* scratch block buffer */
+
+static struct class *pg_class;
+
+/* kernel glue structures */
+
+static const struct file_operations pg_fops = {
+	.owner = THIS_MODULE,
+	.read = pg_read,
+	.write = pg_write,
+	.open = pg_open,
+	.release = pg_release,
+	.llseek = noop_llseek,
+};
+
+static void pg_init_units(void)
+{
+	int unit;
+
+	pg_drive_count = 0;
+	for (unit = 0; unit < PG_UNITS; unit++) {
+		int *parm = *drives[unit];
+		struct pg *dev = &devices[unit];
+		dev->pi = &dev->pia;
+		clear_bit(0, &dev->access);
+		dev->busy = 0;
+		dev->present = 0;
+		dev->bufptr = NULL;
+		dev->drive = parm[D_SLV];
+		snprintf(dev->name, PG_NAMELEN, "%s%c", name, 'a'+unit);
+		if (parm[D_PRT])
+			pg_drive_count++;
+	}
+}
+
+static inline int status_reg(struct pg *dev)
+{
+	return pi_read_regr(dev->pi, 1, 6);
+}
+
+static inline int read_reg(struct pg *dev, int reg)
+{
+	return pi_read_regr(dev->pi, 0, reg);
+}
+
+static inline void write_reg(struct pg *dev, int reg, int val)
+{
+	pi_write_regr(dev->pi, 0, reg, val);
+}
+
+static inline u8 DRIVE(struct pg *dev)
+{
+	return 0xa0+0x10*dev->drive;
+}
+
+static void pg_sleep(int cs)
+{
+	schedule_timeout_interruptible(cs);
+}
+
+static int pg_wait(struct pg *dev, int go, int stop, unsigned long tmo, char *msg)
+{
+	int j, r, e, s, p, to;
+
+	dev->status = 0;
+
+	j = 0;
+	while ((((r = status_reg(dev)) & go) || (stop && (!(r & stop))))
+	       && time_before(jiffies, tmo)) {
+		if (j++ < PG_SPIN)
+			udelay(PG_SPIN_DEL);
+		else
+			pg_sleep(1);
+	}
+
+	to = time_after_eq(jiffies, tmo);
+
+	if ((r & (STAT_ERR & stop)) || to) {
+		s = read_reg(dev, 7);
+		e = read_reg(dev, 1);
+		p = read_reg(dev, 2);
+		if (verbose > 1)
+			printk("%s: %s: stat=0x%x err=0x%x phase=%d%s\n",
+			       dev->name, msg, s, e, p, to ? " timeout" : "");
+		if (to)
+			e |= 0x100;
+		dev->status = (e >> 4) & 0xff;
+		return -1;
+	}
+	return 0;
+}
+
+static int pg_command(struct pg *dev, char *cmd, int dlen, unsigned long tmo)
+{
+	int k;
+
+	pi_connect(dev->pi);
+
+	write_reg(dev, 6, DRIVE(dev));
+
+	if (pg_wait(dev, STAT_BUSY | STAT_DRQ, 0, tmo, "before command"))
+		goto fail;
+
+	write_reg(dev, 4, dlen % 256);
+	write_reg(dev, 5, dlen / 256);
+	write_reg(dev, 7, 0xa0);	/* ATAPI packet command */
+
+	if (pg_wait(dev, STAT_BUSY, STAT_DRQ, tmo, "command DRQ"))
+		goto fail;
+
+	if (read_reg(dev, 2) != 1) {
+		printk("%s: command phase error\n", dev->name);
+		goto fail;
+	}
+
+	pi_write_block(dev->pi, cmd, 12);
+
+	if (verbose > 1) {
+		printk("%s: Command sent, dlen=%d packet= ", dev->name, dlen);
+		for (k = 0; k < 12; k++)
+			printk("%02x ", cmd[k] & 0xff);
+		printk("\n");
+	}
+	return 0;
+fail:
+	pi_disconnect(dev->pi);
+	return -1;
+}
+
+static int pg_completion(struct pg *dev, char *buf, unsigned long tmo)
+{
+	int r, d, n, p;
+
+	r = pg_wait(dev, STAT_BUSY, STAT_DRQ | STAT_READY | STAT_ERR,
+		    tmo, "completion");
+
+	dev->dlen = 0;
+
+	while (read_reg(dev, 7) & STAT_DRQ) {
+		d = (read_reg(dev, 4) + 256 * read_reg(dev, 5));
+		n = ((d + 3) & 0xfffc);
+		p = read_reg(dev, 2) & 3;
+		if (p == 0)
+			pi_write_block(dev->pi, buf, n);
+		if (p == 2)
+			pi_read_block(dev->pi, buf, n);
+		if (verbose > 1)
+			printk("%s: %s %d bytes\n", dev->name,
+			       p ? "Read" : "Write", n);
+		dev->dlen += (1 - p) * d;
+		buf += d;
+		r = pg_wait(dev, STAT_BUSY, STAT_DRQ | STAT_READY | STAT_ERR,
+			    tmo, "completion");
+	}
+
+	pi_disconnect(dev->pi);
+
+	return r;
+}
+
+static int pg_reset(struct pg *dev)
+{
+	int i, k, err;
+	int expect[5] = { 1, 1, 1, 0x14, 0xeb };
+	int got[5];
+
+	pi_connect(dev->pi);
+	write_reg(dev, 6, DRIVE(dev));
+	write_reg(dev, 7, 8);
+
+	pg_sleep(20 * HZ / 1000);
+
+	k = 0;
+	while ((k++ < PG_RESET_TMO) && (status_reg(dev) & STAT_BUSY))
+		pg_sleep(1);
+
+	for (i = 0; i < 5; i++)
+		got[i] = read_reg(dev, i + 1);
+
+	err = memcmp(expect, got, sizeof(got)) ? -1 : 0;
+
+	if (verbose) {
+		printk("%s: Reset (%d) signature = ", dev->name, k);
+		for (i = 0; i < 5; i++)
+			printk("%3x", got[i]);
+		if (err)
+			printk(" (incorrect)");
+		printk("\n");
+	}
+
+	pi_disconnect(dev->pi);
+	return err;
+}
+
+static void xs(char *buf, char *targ, int len)
+{
+	char l = '\0';
+	int k;
+
+	for (k = 0; k < len; k++) {
+		char c = *buf++;
+		if (c != ' ' && c != l)
+			l = *targ++ = c;
+	}
+	if (l == ' ')
+		targ--;
+	*targ = '\0';
+}
+
+static int pg_identify(struct pg *dev, int log)
+{
+	int s;
+	char *ms[2] = { "master", "slave" };
+	char mf[10], id[18];
+	char id_cmd[12] = { ATAPI_IDENTIFY, 0, 0, 0, 36, 0, 0, 0, 0, 0, 0, 0 };
+	char buf[36];
+
+	s = pg_command(dev, id_cmd, 36, jiffies + PG_TMO);
+	if (s)
+		return -1;
+	s = pg_completion(dev, buf, jiffies + PG_TMO);
+	if (s)
+		return -1;
+
+	if (log) {
+		xs(buf + 8, mf, 8);
+		xs(buf + 16, id, 16);
+		printk("%s: %s %s, %s\n", dev->name, mf, id, ms[dev->drive]);
+	}
+
+	return 0;
+}
+
+/*
+ * returns  0, with id set if drive is detected
+ *	   -1, if drive detection failed
+ */
+static int pg_probe(struct pg *dev)
+{
+	if (dev->drive == -1) {
+		for (dev->drive = 0; dev->drive <= 1; dev->drive++)
+			if (!pg_reset(dev))
+				return pg_identify(dev, 1);
+	} else {
+		if (!pg_reset(dev))
+			return pg_identify(dev, 1);
+	}
+	return -1;
+}
+
+static int pg_detect(void)
+{
+	struct pg *dev = &devices[0];
+	int k, unit;
+
+	printk("%s: %s version %s, major %d\n", name, name, PG_VERSION, major);
+
+	k = 0;
+	if (pg_drive_count == 0) {
+		if (pi_init(dev->pi, 1, -1, -1, -1, -1, -1, pg_scratch,
+			    PI_PG, verbose, dev->name)) {
+			if (!pg_probe(dev)) {
+				dev->present = 1;
+				k++;
+			} else
+				pi_release(dev->pi);
+		}
+
+	} else
+		for (unit = 0; unit < PG_UNITS; unit++, dev++) {
+			int *parm = *drives[unit];
+			if (!parm[D_PRT])
+				continue;
+			if (pi_init(dev->pi, 0, parm[D_PRT], parm[D_MOD],
+				    parm[D_UNI], parm[D_PRO], parm[D_DLY],
+				    pg_scratch, PI_PG, verbose, dev->name)) {
+				if (!pg_probe(dev)) {
+					dev->present = 1;
+					k++;
+				} else
+					pi_release(dev->pi);
+			}
+		}
+
+	if (k)
+		return 0;
+
+	printk("%s: No ATAPI device detected\n", name);
+	return -1;
+}
+
+static int pg_open(struct inode *inode, struct file *file)
+{
+	int unit = iminor(inode) & 0x7f;
+	struct pg *dev = &devices[unit];
+	int ret = 0;
+
+	mutex_lock(&pg_mutex);
+	if ((unit >= PG_UNITS) || (!dev->present)) {
+		ret = -ENODEV;
+		goto out;
+	}
+
+	if (test_and_set_bit(0, &dev->access)) {
+		ret = -EBUSY;
+		goto out;
+	}
+
+	if (dev->busy) {
+		pg_reset(dev);
+		dev->busy = 0;
+	}
+
+	pg_identify(dev, (verbose > 1));
+
+	dev->bufptr = kmalloc(PG_MAX_DATA, GFP_KERNEL);
+	if (dev->bufptr == NULL) {
+		clear_bit(0, &dev->access);
+		printk("%s: buffer allocation failed\n", dev->name);
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	file->private_data = dev;
+
+out:
+	mutex_unlock(&pg_mutex);
+	return ret;
+}
+
+static int pg_release(struct inode *inode, struct file *file)
+{
+	struct pg *dev = file->private_data;
+
+	kfree(dev->bufptr);
+	dev->bufptr = NULL;
+	clear_bit(0, &dev->access);
+
+	return 0;
+}
+
+static ssize_t pg_write(struct file *filp, const char __user *buf, size_t count, loff_t *ppos)
+{
+	struct pg *dev = filp->private_data;
+	struct pg_write_hdr hdr;
+	int hs = sizeof (hdr);
+
+	if (dev->busy)
+		return -EBUSY;
+	if (count < hs)
+		return -EINVAL;
+
+	if (copy_from_user(&hdr, buf, hs))
+		return -EFAULT;
+
+	if (hdr.magic != PG_MAGIC)
+		return -EINVAL;
+	if (hdr.dlen > PG_MAX_DATA)
+		return -EINVAL;
+	if ((count - hs) > PG_MAX_DATA)
+		return -EINVAL;
+
+	if (hdr.func == PG_RESET) {
+		if (count != hs)
+			return -EINVAL;
+		if (pg_reset(dev))
+			return -EIO;
+		return count;
+	}
+
+	if (hdr.func != PG_COMMAND)
+		return -EINVAL;
+
+	dev->start = jiffies;
+	dev->timeout = hdr.timeout * HZ + HZ / 2 + jiffies;
+
+	if (pg_command(dev, hdr.packet, hdr.dlen, jiffies + PG_TMO)) {
+		if (dev->status & 0x10)
+			return -ETIME;
+		return -EIO;
+	}
+
+	dev->busy = 1;
+
+	if (copy_from_user(dev->bufptr, buf + hs, count - hs))
+		return -EFAULT;
+	return count;
+}
+
+static ssize_t pg_read(struct file *filp, char __user *buf, size_t count, loff_t *ppos)
+{
+	struct pg *dev = filp->private_data;
+	struct pg_read_hdr hdr;
+	int hs = sizeof (hdr);
+	int copy;
+
+	if (!dev->busy)
+		return -EINVAL;
+	if (count < hs)
+		return -EINVAL;
+
+	dev->busy = 0;
+
+	if (pg_completion(dev, dev->bufptr, dev->timeout))
+		if (dev->status & 0x10)
+			return -ETIME;
+
+	hdr.magic = PG_MAGIC;
+	hdr.dlen = dev->dlen;
+	copy = 0;
+
+	if (hdr.dlen < 0) {
+		hdr.dlen = -1 * hdr.dlen;
+		copy = hdr.dlen;
+		if (copy > (count - hs))
+			copy = count - hs;
+	}
+
+	hdr.duration = (jiffies - dev->start + HZ / 2) / HZ;
+	hdr.scsi = dev->status & 0x0f;
+
+	if (copy_to_user(buf, &hdr, hs))
+		return -EFAULT;
+	if (copy > 0)
+		if (copy_to_user(buf + hs, dev->bufptr, copy))
+			return -EFAULT;
+	return copy + hs;
+}
+
+static int __init pg_init(void)
+{
+	int unit;
+	int err;
+
+	if (disable){
+		err = -EINVAL;
+		goto out;
+	}
+
+	pg_init_units();
+
+	if (pg_detect()) {
+		err = -ENODEV;
+		goto out;
+	}
+
+	err = register_chrdev(major, name, &pg_fops);
+	if (err < 0) {
+		printk("pg_init: unable to get major number %d\n", major);
+		for (unit = 0; unit < PG_UNITS; unit++) {
+			struct pg *dev = &devices[unit];
+			if (dev->present)
+				pi_release(dev->pi);
+		}
+		goto out;
+	}
+	major = err;	/* In case the user specified `major=0' (dynamic) */
+	pg_class = class_create(THIS_MODULE, "pg");
+	if (IS_ERR(pg_class)) {
+		err = PTR_ERR(pg_class);
+		goto out_chrdev;
+	}
+	for (unit = 0; unit < PG_UNITS; unit++) {
+		struct pg *dev = &devices[unit];
+		if (dev->present)
+			device_create(pg_class, NULL, MKDEV(major, unit), NULL,
+				      "pg%u", unit);
+	}
+	err = 0;
+	goto out;
+
+out_chrdev:
+	unregister_chrdev(major, "pg");
+out:
+	return err;
+}
+
+static void __exit pg_exit(void)
+{
+	int unit;
+
+	for (unit = 0; unit < PG_UNITS; unit++) {
+		struct pg *dev = &devices[unit];
+		if (dev->present)
+			device_destroy(pg_class, MKDEV(major, unit));
+	}
+	class_destroy(pg_class);
+	unregister_chrdev(major, name);
+
+	for (unit = 0; unit < PG_UNITS; unit++) {
+		struct pg *dev = &devices[unit];
+		if (dev->present)
+			pi_release(dev->pi);
+	}
+}
+
+MODULE_LICENSE("GPL");
+module_init(pg_init)
+module_exit(pg_exit)
diff --git a/drivers/block/paride/ppc6lnx.c b/drivers/block/paride/ppc6lnx.c
new file mode 100644
index 00000000..5e5521d3
--- /dev/null
+++ b/drivers/block/paride/ppc6lnx.c
@@ -0,0 +1,726 @@
+/*
+	ppc6lnx.c (c) 2001 Micro Solutions Inc.
+		Released under the terms of the GNU General Public license
+
+	ppc6lnx.c  is a par of the protocol driver for the Micro Solutions
+		"BACKPACK" parallel port IDE adapter
+		(Works on Series 6 drives)
+
+*/
+
+//***************************************************************************
+
+// PPC 6 Code in C sanitized for LINUX
+// Original x86 ASM by Ron, Converted to C by Clive
+
+//***************************************************************************
+
+
+#define port_stb					1
+#define port_afd					2
+#define cmd_stb						port_afd
+#define port_init					4
+#define data_stb					port_init
+#define port_sel					8
+#define port_int					16
+#define port_dir					0x20
+
+#define ECR_EPP	0x80
+#define ECR_BI	0x20
+
+//***************************************************************************
+
+//  60772 Commands
+
+#define ACCESS_REG				0x00
+#define ACCESS_PORT				0x40
+
+#define ACCESS_READ				0x00
+#define ACCESS_WRITE			0x20
+
+//  60772 Command Prefix
+
+#define CMD_PREFIX_SET		0xe0		// Special command that modifies the next command's operation
+#define CMD_PREFIX_RESET	0xc0		// Resets current cmd modifier reg bits
+ #define PREFIX_IO16			0x01		// perform 16-bit wide I/O
+ #define PREFIX_FASTWR		0x04		// enable PPC mode fast-write
+ #define PREFIX_BLK				0x08		// enable block transfer mode
+
+// 60772 Registers
+
+#define REG_STATUS				0x00		// status register
+ #define STATUS_IRQA			0x01		// Peripheral IRQA line
+ #define STATUS_EEPROM_DO	0x40		// Serial EEPROM data bit
+#define REG_VERSION				0x01		// PPC version register (read)
+#define REG_HWCFG					0x02		// Hardware Config register
+#define REG_RAMSIZE				0x03		// Size of RAM Buffer
+ #define RAMSIZE_128K			0x02
+#define REG_EEPROM				0x06		// EEPROM control register
+ #define EEPROM_SK				0x01		// eeprom SK bit
+ #define EEPROM_DI				0x02		// eeprom DI bit
+ #define EEPROM_CS				0x04		// eeprom CS bit
+ #define EEPROM_EN				0x08		// eeprom output enable
+#define REG_BLKSIZE				0x08		// Block transfer len (24 bit)
+
+//***************************************************************************
+
+typedef struct ppc_storage {
+	u16	lpt_addr;				// LPT base address
+	u8	ppc_id;
+	u8	mode;						// operating mode
+					// 0 = PPC Uni SW
+					// 1 = PPC Uni FW
+					// 2 = PPC Bi SW
+					// 3 = PPC Bi FW
+					// 4 = EPP Byte
+					// 5 = EPP Word
+					// 6 = EPP Dword
+	u8	ppc_flags;
+	u8	org_data;				// original LPT data port contents
+	u8	org_ctrl;				// original LPT control port contents
+	u8	cur_ctrl;				// current control port contents
+} Interface;
+
+//***************************************************************************
+
+// ppc_flags
+
+#define fifo_wait					0x10
+
+//***************************************************************************
+
+// DONT CHANGE THESE LEST YOU BREAK EVERYTHING - BIT FIELD DEPENDENCIES
+
+#define PPCMODE_UNI_SW		0
+#define PPCMODE_UNI_FW		1
+#define PPCMODE_BI_SW			2
+#define PPCMODE_BI_FW			3
+#define PPCMODE_EPP_BYTE	4
+#define PPCMODE_EPP_WORD	5
+#define PPCMODE_EPP_DWORD	6
+
+//***************************************************************************
+
+static int ppc6_select(Interface *ppc);
+static void ppc6_deselect(Interface *ppc);
+static void ppc6_send_cmd(Interface *ppc, u8 cmd);
+static void ppc6_wr_data_byte(Interface *ppc, u8 data);
+static u8 ppc6_rd_data_byte(Interface *ppc);
+static u8 ppc6_rd_port(Interface *ppc, u8 port);
+static void ppc6_wr_port(Interface *ppc, u8 port, u8 data);
+static void ppc6_rd_data_blk(Interface *ppc, u8 *data, long count);
+static void ppc6_wait_for_fifo(Interface *ppc);
+static void ppc6_wr_data_blk(Interface *ppc, u8 *data, long count);
+static void ppc6_rd_port16_blk(Interface *ppc, u8 port, u8 *data, long length);
+static void ppc6_wr_port16_blk(Interface *ppc, u8 port, u8 *data, long length);
+static void ppc6_wr_extout(Interface *ppc, u8 regdata);
+static int ppc6_open(Interface *ppc);
+static void ppc6_close(Interface *ppc);
+
+//***************************************************************************
+
+static int ppc6_select(Interface *ppc)
+{
+	u8 i, j, k;
+
+	i = inb(ppc->lpt_addr + 1);
+
+	if (i & 1)
+		outb(i, ppc->lpt_addr + 1);
+
+	ppc->org_data = inb(ppc->lpt_addr);
+
+	ppc->org_ctrl = inb(ppc->lpt_addr + 2) & 0x5F; // readback ctrl
+
+	ppc->cur_ctrl = ppc->org_ctrl;
+
+	ppc->cur_ctrl |= port_sel;
+
+	outb(ppc->cur_ctrl, ppc->lpt_addr + 2);
+
+	if (ppc->org_data == 'b')
+		outb('x', ppc->lpt_addr);
+
+	outb('b', ppc->lpt_addr);
+	outb('p', ppc->lpt_addr);
+	outb(ppc->ppc_id, ppc->lpt_addr);
+	outb(~ppc->ppc_id,ppc->lpt_addr);
+
+	ppc->cur_ctrl &= ~port_sel;
+
+	outb(ppc->cur_ctrl, ppc->lpt_addr + 2);
+
+	ppc->cur_ctrl = (ppc->cur_ctrl & port_int) | port_init;
+
+	outb(ppc->cur_ctrl, ppc->lpt_addr + 2);
+
+	i = ppc->mode & 0x0C;
+
+	if (i == 0)
+		i = (ppc->mode & 2) | 1;
+
+	outb(i, ppc->lpt_addr);
+
+	ppc->cur_ctrl |= port_sel;
+
+	outb(ppc->cur_ctrl, ppc->lpt_addr + 2);
+
+	// DELAY
+
+	ppc->cur_ctrl |= port_afd;
+
+	outb(ppc->cur_ctrl, ppc->lpt_addr + 2);
+
+	j = ((i & 0x08) << 4) | ((i & 0x07) << 3);
+
+	k = inb(ppc->lpt_addr + 1) & 0xB8;
+
+	if (j == k)
+	{
+		ppc->cur_ctrl &= ~port_afd;
+
+		outb(ppc->cur_ctrl, ppc->lpt_addr + 2);
+
+		k = (inb(ppc->lpt_addr + 1) & 0xB8) ^ 0xB8;
+
+		if (j == k)
+		{
+			if (i & 4)	// EPP
+				ppc->cur_ctrl &= ~(port_sel | port_init);
+			else				// PPC/ECP
+				ppc->cur_ctrl &= ~port_sel;
+
+			outb(ppc->cur_ctrl, ppc->lpt_addr + 2);
+
+			return(1);
+		}
+	}
+
+	outb(ppc->org_ctrl, ppc->lpt_addr + 2);
+
+	outb(ppc->org_data, ppc->lpt_addr);
+
+	return(0); // FAIL
+}
+
+//***************************************************************************
+
+static void ppc6_deselect(Interface *ppc)
+{
+	if (ppc->mode & 4)	// EPP
+		ppc->cur_ctrl |= port_init;
+	else								// PPC/ECP
+		ppc->cur_ctrl |= port_sel;
+
+	outb(ppc->cur_ctrl, ppc->lpt_addr + 2);
+
+	outb(ppc->org_data, ppc->lpt_addr);
+
+	outb((ppc->org_ctrl | port_sel), ppc->lpt_addr + 2);
+
+	outb(ppc->org_ctrl, ppc->lpt_addr + 2);
+}
+
+//***************************************************************************
+
+static void ppc6_send_cmd(Interface *ppc, u8 cmd)
+{
+	switch(ppc->mode)
+	{
+		case PPCMODE_UNI_SW :
+		case PPCMODE_UNI_FW :
+		case PPCMODE_BI_SW :
+		case PPCMODE_BI_FW :
+		{
+			outb(cmd, ppc->lpt_addr);
+
+			ppc->cur_ctrl ^= cmd_stb;
+
+			outb(ppc->cur_ctrl, ppc->lpt_addr + 2);
+
+			break;
+		}
+
+		case PPCMODE_EPP_BYTE :
+		case PPCMODE_EPP_WORD :
+		case PPCMODE_EPP_DWORD :
+		{
+			outb(cmd, ppc->lpt_addr + 3);
+
+			break;
+		}
+	}
+}
+
+//***************************************************************************
+
+static void ppc6_wr_data_byte(Interface *ppc, u8 data)
+{
+	switch(ppc->mode)
+	{
+		case PPCMODE_UNI_SW :
+		case PPCMODE_UNI_FW :
+		case PPCMODE_BI_SW :
+		case PPCMODE_BI_FW :
+		{
+			outb(data, ppc->lpt_addr);
+
+			ppc->cur_ctrl ^= data_stb;
+
+			outb(ppc->cur_ctrl, ppc->lpt_addr + 2);
+
+			break;
+		}
+
+		case PPCMODE_EPP_BYTE :
+		case PPCMODE_EPP_WORD :
+		case PPCMODE_EPP_DWORD :
+		{
+			outb(data, ppc->lpt_addr + 4);
+
+			break;
+		}
+	}
+}
+
+//***************************************************************************
+
+static u8 ppc6_rd_data_byte(Interface *ppc)
+{
+	u8 data = 0;
+
+	switch(ppc->mode)
+	{
+		case PPCMODE_UNI_SW :
+		case PPCMODE_UNI_FW :
+		{
+			ppc->cur_ctrl = (ppc->cur_ctrl & ~port_stb) ^ data_stb;
+
+			outb(ppc->cur_ctrl, ppc->lpt_addr + 2);
+
+			// DELAY
+
+			data = inb(ppc->lpt_addr + 1);
+
+			data = ((data & 0x80) >> 1) | ((data & 0x38) >> 3);
+
+			ppc->cur_ctrl |= port_stb;
+
+			outb(ppc->cur_ctrl, ppc->lpt_addr + 2);
+
+			// DELAY
+
+			data |= inb(ppc->lpt_addr + 1) & 0xB8;
+
+			break;
+		}
+
+		case PPCMODE_BI_SW :
+		case PPCMODE_BI_FW :
+		{
+			ppc->cur_ctrl |= port_dir;
+
+			outb(ppc->cur_ctrl, ppc->lpt_addr + 2);
+
+			ppc->cur_ctrl = (ppc->cur_ctrl | port_stb) ^ data_stb;
+
+			outb(ppc->cur_ctrl, ppc->lpt_addr + 2);
+
+			data = inb(ppc->lpt_addr);
+
+			ppc->cur_ctrl &= ~port_stb;
+
+			outb(ppc->cur_ctrl,ppc->lpt_addr + 2);
+
+			ppc->cur_ctrl &= ~port_dir;
+
+			outb(ppc->cur_ctrl, ppc->lpt_addr + 2);
+
+			break;
+		}
+
+		case PPCMODE_EPP_BYTE :
+		case PPCMODE_EPP_WORD :
+		case PPCMODE_EPP_DWORD :
+		{
+			outb((ppc->cur_ctrl | port_dir),ppc->lpt_addr + 2);
+
+			data = inb(ppc->lpt_addr + 4);
+
+			outb(ppc->cur_ctrl,ppc->lpt_addr + 2);
+
+			break;
+		}
+	}
+
+	return(data);
+}
+
+//***************************************************************************
+
+static u8 ppc6_rd_port(Interface *ppc, u8 port)
+{
+	ppc6_send_cmd(ppc,(u8)(port | ACCESS_PORT | ACCESS_READ));
+
+	return(ppc6_rd_data_byte(ppc));
+}
+
+//***************************************************************************
+
+static void ppc6_wr_port(Interface *ppc, u8 port, u8 data)
+{
+	ppc6_send_cmd(ppc,(u8)(port | ACCESS_PORT | ACCESS_WRITE));
+
+	ppc6_wr_data_byte(ppc, data);
+}
+
+//***************************************************************************
+
+static void ppc6_rd_data_blk(Interface *ppc, u8 *data, long count)
+{
+	switch(ppc->mode)
+	{
+		case PPCMODE_UNI_SW :
+		case PPCMODE_UNI_FW :
+		{
+			while(count)
+			{
+				u8 d;
+
+				ppc->cur_ctrl = (ppc->cur_ctrl & ~port_stb) ^ data_stb;
+
+				outb(ppc->cur_ctrl, ppc->lpt_addr + 2);
+
+				// DELAY
+
+				d = inb(ppc->lpt_addr + 1);
+
+				d = ((d & 0x80) >> 1) | ((d & 0x38) >> 3);
+
+				ppc->cur_ctrl |= port_stb;
+
+				outb(ppc->cur_ctrl, ppc->lpt_addr + 2);
+
+				// DELAY
+
+				d |= inb(ppc->lpt_addr + 1) & 0xB8;
+
+				*data++ = d;
+				count--;
+			}
+
+			break;
+		}
+
+		case PPCMODE_BI_SW :
+		case PPCMODE_BI_FW :
+		{
+			ppc->cur_ctrl |= port_dir;
+
+			outb(ppc->cur_ctrl, ppc->lpt_addr + 2);
+
+			ppc->cur_ctrl |= port_stb;
+
+			while(count)
+			{
+				ppc->cur_ctrl ^= data_stb;
+
+				outb(ppc->cur_ctrl, ppc->lpt_addr + 2);
+
+				*data++ = inb(ppc->lpt_addr);
+				count--;
+			}
+
+			ppc->cur_ctrl &= ~port_stb;
+
+			outb(ppc->cur_ctrl, ppc->lpt_addr + 2);
+
+			ppc->cur_ctrl &= ~port_dir;
+
+			outb(ppc->cur_ctrl, ppc->lpt_addr + 2);
+
+			break;
+		}
+
+		case PPCMODE_EPP_BYTE :
+		{
+			outb((ppc->cur_ctrl | port_dir), ppc->lpt_addr + 2);
+
+			// DELAY
+
+			while(count)
+			{
+				*data++ = inb(ppc->lpt_addr + 4);
+				count--;
+			}
+
+			outb(ppc->cur_ctrl, ppc->lpt_addr + 2);
+
+			break;
+		}
+
+		case PPCMODE_EPP_WORD :
+		{
+			outb((ppc->cur_ctrl | port_dir), ppc->lpt_addr + 2);
+
+			// DELAY
+
+			while(count > 1)
+			{
+				*((u16 *)data) = inw(ppc->lpt_addr + 4);
+				data  += 2;
+				count -= 2;
+			}
+
+			while(count)
+			{
+				*data++ = inb(ppc->lpt_addr + 4);
+				count--;
+			}
+
+			outb(ppc->cur_ctrl, ppc->lpt_addr + 2);
+
+			break;
+		}
+
+		case PPCMODE_EPP_DWORD :
+		{
+			outb((ppc->cur_ctrl | port_dir),ppc->lpt_addr + 2);
+
+			// DELAY
+
+			while(count > 3)
+			{
+				*((u32 *)data) = inl(ppc->lpt_addr + 4);
+				data  += 4;
+				count -= 4;
+			}
+
+			while(count)
+			{
+				*data++ = inb(ppc->lpt_addr + 4);
+				count--;
+			}
+
+			outb(ppc->cur_ctrl, ppc->lpt_addr + 2);
+
+			break;
+		}
+	}
+
+}
+
+//***************************************************************************
+
+static void ppc6_wait_for_fifo(Interface *ppc)
+{
+	int i;
+
+	if (ppc->ppc_flags & fifo_wait)
+	{
+		for(i=0; i<20; i++)
+			inb(ppc->lpt_addr + 1);
+	}
+}
+
+//***************************************************************************
+
+static void ppc6_wr_data_blk(Interface *ppc, u8 *data, long count)
+{
+	switch(ppc->mode)
+	{
+		case PPCMODE_UNI_SW :
+		case PPCMODE_BI_SW :
+		{
+			while(count--)
+			{
+				outb(*data++, ppc->lpt_addr);
+
+				ppc->cur_ctrl ^= data_stb;
+
+				outb(ppc->cur_ctrl, ppc->lpt_addr + 2);
+			}
+
+			break;
+		}
+
+		case PPCMODE_UNI_FW :
+		case PPCMODE_BI_FW :
+		{
+			u8 this, last;
+
+			ppc6_send_cmd(ppc,(CMD_PREFIX_SET | PREFIX_FASTWR));
+
+			ppc->cur_ctrl |= port_stb;
+
+			outb(ppc->cur_ctrl, ppc->lpt_addr + 2);
+
+			last = *data;
+
+			outb(last, ppc->lpt_addr);
+
+			while(count)
+			{
+				this = *data++;
+				count--;
+
+				if (this == last)
+				{
+					ppc->cur_ctrl ^= data_stb;
+
+					outb(ppc->cur_ctrl, ppc->lpt_addr + 2);
+				}
+				else
+				{
+					outb(this, ppc->lpt_addr);
+
+					last = this;
+				}
+			}
+
+			ppc->cur_ctrl &= ~port_stb;
+
+			outb(ppc->cur_ctrl, ppc->lpt_addr + 2);
+
+			ppc6_send_cmd(ppc,(CMD_PREFIX_RESET | PREFIX_FASTWR));
+
+			break;
+		}
+
+		case PPCMODE_EPP_BYTE :
+		{
+			while(count)
+			{
+				outb(*data++,ppc->lpt_addr + 4);
+				count--;
+			}
+
+			ppc6_wait_for_fifo(ppc);
+
+			break;
+		}
+
+		case PPCMODE_EPP_WORD :
+		{
+			while(count > 1)
+			{
+				outw(*((u16 *)data),ppc->lpt_addr + 4);
+				data  += 2;
+				count -= 2;
+			}
+
+			while(count)
+			{
+				outb(*data++,ppc->lpt_addr + 4);
+				count--;
+			}
+
+			ppc6_wait_for_fifo(ppc);
+
+			break;
+		}
+
+		case PPCMODE_EPP_DWORD :
+		{
+			while(count > 3)
+			{
+				outl(*((u32 *)data),ppc->lpt_addr + 4);
+				data  += 4;
+				count -= 4;
+			}
+
+			while(count)
+			{
+				outb(*data++,ppc->lpt_addr + 4);
+				count--;
+			}
+
+			ppc6_wait_for_fifo(ppc);
+
+			break;
+		}
+	}
+}
+
+//***************************************************************************
+
+static void ppc6_rd_port16_blk(Interface *ppc, u8 port, u8 *data, long length)
+{
+	length = length << 1;
+
+	ppc6_send_cmd(ppc, (REG_BLKSIZE | ACCESS_REG | ACCESS_WRITE));
+	ppc6_wr_data_byte(ppc,(u8)length);
+	ppc6_wr_data_byte(ppc,(u8)(length >> 8));
+	ppc6_wr_data_byte(ppc,0);
+
+	ppc6_send_cmd(ppc, (CMD_PREFIX_SET | PREFIX_IO16 | PREFIX_BLK));
+
+	ppc6_send_cmd(ppc, (u8)(port | ACCESS_PORT | ACCESS_READ));
+
+	ppc6_rd_data_blk(ppc, data, length);
+
+	ppc6_send_cmd(ppc, (CMD_PREFIX_RESET | PREFIX_IO16 | PREFIX_BLK));
+}
+
+//***************************************************************************
+
+static void ppc6_wr_port16_blk(Interface *ppc, u8 port, u8 *data, long length)
+{
+	length = length << 1;
+
+	ppc6_send_cmd(ppc, (REG_BLKSIZE | ACCESS_REG | ACCESS_WRITE));
+	ppc6_wr_data_byte(ppc,(u8)length);
+	ppc6_wr_data_byte(ppc,(u8)(length >> 8));
+	ppc6_wr_data_byte(ppc,0);
+
+	ppc6_send_cmd(ppc, (CMD_PREFIX_SET | PREFIX_IO16 | PREFIX_BLK));
+
+	ppc6_send_cmd(ppc, (u8)(port | ACCESS_PORT | ACCESS_WRITE));
+
+	ppc6_wr_data_blk(ppc, data, length);
+
+	ppc6_send_cmd(ppc, (CMD_PREFIX_RESET | PREFIX_IO16 | PREFIX_BLK));
+}
+
+//***************************************************************************
+
+static void ppc6_wr_extout(Interface *ppc, u8 regdata)
+{
+	ppc6_send_cmd(ppc,(REG_VERSION | ACCESS_REG | ACCESS_WRITE));
+
+	ppc6_wr_data_byte(ppc, (u8)((regdata & 0x03) << 6));
+}
+
+//***************************************************************************
+
+static int ppc6_open(Interface *ppc)
+{
+	int ret;
+
+	ret = ppc6_select(ppc);
+
+	if (ret == 0)
+		return(ret);
+
+	ppc->ppc_flags &= ~fifo_wait;
+
+	ppc6_send_cmd(ppc, (ACCESS_REG | ACCESS_WRITE | REG_RAMSIZE));
+	ppc6_wr_data_byte(ppc, RAMSIZE_128K);
+
+	ppc6_send_cmd(ppc, (ACCESS_REG | ACCESS_READ | REG_VERSION));
+
+	if ((ppc6_rd_data_byte(ppc) & 0x3F) == 0x0C)
+		ppc->ppc_flags |= fifo_wait;
+
+	return(ret);
+}
+
+//***************************************************************************
+
+static void ppc6_close(Interface *ppc)
+{
+	ppc6_deselect(ppc);
+}
+
+//***************************************************************************
+
diff --git a/drivers/block/paride/pseudo.h b/drivers/block/paride/pseudo.h
new file mode 100644
index 00000000..bc370329
--- /dev/null
+++ b/drivers/block/paride/pseudo.h
@@ -0,0 +1,102 @@
+/* 
+        pseudo.h    (c) 1997-8  Grant R. Guenther <grant@torque.net>
+                                Under the terms of the GNU General Public License.
+
+	This is the "pseudo-interrupt" logic for parallel port drivers.
+
+        This module is #included into each driver.  It makes one
+        function available:
+
+		ps_set_intr( void (*continuation)(void),
+			     int  (*ready)(void),
+			     int timeout,
+			     int nice )
+
+	Which will arrange for ready() to be evaluated frequently and
+	when either it returns true, or timeout jiffies have passed,
+	continuation() will be invoked.
+
+	If nice is 1, the test will done approximately once a
+	jiffy.  If nice is 0, the test will also be done whenever
+	the scheduler runs (by adding it to a task queue).  If
+	nice is greater than 1, the test will be done once every
+	(nice-1) jiffies. 
+
+*/
+
+/* Changes:
+
+	1.01	1998.05.03	Switched from cli()/sti() to spinlocks
+	1.02    1998.12.14      Added support for nice > 1
+*/
+	
+#define PS_VERSION	"1.02"
+
+#include <linux/sched.h>
+#include <linux/workqueue.h>
+
+static void ps_tq_int(struct work_struct *work);
+
+static void (* ps_continuation)(void);
+static int (* ps_ready)(void);
+static unsigned long ps_timeout;
+static int ps_tq_active = 0;
+static int ps_nice = 0;
+
+static DEFINE_SPINLOCK(ps_spinlock __attribute__((unused)));
+
+static DECLARE_DELAYED_WORK(ps_tq, ps_tq_int);
+
+static void ps_set_intr(void (*continuation)(void), 
+			int (*ready)(void),
+			int timeout, int nice)
+{
+	unsigned long	flags;
+
+	spin_lock_irqsave(&ps_spinlock,flags);
+
+	ps_continuation = continuation;
+	ps_ready = ready;
+	ps_timeout = jiffies + timeout;
+	ps_nice = nice;
+
+	if (!ps_tq_active) {
+		ps_tq_active = 1;
+		if (!ps_nice)
+			schedule_delayed_work(&ps_tq, 0);
+		else
+			schedule_delayed_work(&ps_tq, ps_nice-1);
+	}
+	spin_unlock_irqrestore(&ps_spinlock,flags);
+}
+
+static void ps_tq_int(struct work_struct *work)
+{
+	void (*con)(void);
+	unsigned long flags;
+
+	spin_lock_irqsave(&ps_spinlock,flags);
+
+	con = ps_continuation;
+	ps_tq_active = 0;
+
+	if (!con) {
+		spin_unlock_irqrestore(&ps_spinlock,flags);
+		return;
+	}
+	if (!ps_ready || ps_ready() || time_after_eq(jiffies, ps_timeout)) {
+		ps_continuation = NULL;
+		spin_unlock_irqrestore(&ps_spinlock,flags);
+		con();
+		return;
+	}
+	ps_tq_active = 1;
+	if (!ps_nice)
+		schedule_delayed_work(&ps_tq, 0);
+	else
+		schedule_delayed_work(&ps_tq, ps_nice-1);
+	spin_unlock_irqrestore(&ps_spinlock,flags);
+}
+
+/* end of pseudo.h */
+
diff --git a/drivers/block/paride/pt.c b/drivers/block/paride/pt.c
new file mode 100644
index 00000000..7179f79d
--- /dev/null
+++ b/drivers/block/paride/pt.c
@@ -0,0 +1,1014 @@
+/* 
+        pt.c    (c) 1998  Grant R. Guenther <grant@torque.net>
+                          Under the terms of the GNU General Public License.
+
+        This is the high-level driver for parallel port ATAPI tape
+        drives based on chips supported by the paride module.
+
+	The driver implements both rewinding and non-rewinding
+	devices, filemarks, and the rewind ioctl.  It allocates
+	a small internal "bounce buffer" for each open device, but
+        otherwise expects buffering and blocking to be done at the
+        user level.  As with most block-structured tapes, short
+	writes are padded to full tape blocks, so reading back a file
+        may return more data than was actually written.
+
+        By default, the driver will autoprobe for a single parallel
+        port ATAPI tape drive, but if their individual parameters are
+        specified, the driver can handle up to 4 drives.
+
+	The rewinding devices are named /dev/pt0, /dev/pt1, ...
+	while the non-rewinding devices are /dev/npt0, /dev/npt1, etc.
+
+        The behaviour of the pt driver can be altered by setting
+        some parameters from the insmod command line.  The following
+        parameters are adjustable:
+
+            drive0      These four arguments can be arrays of       
+            drive1      1-6 integers as follows:
+            drive2
+            drive3      <prt>,<pro>,<uni>,<mod>,<slv>,<dly>
+
+                        Where,
+
+                <prt>   is the base of the parallel port address for
+                        the corresponding drive.  (required)
+
+                <pro>   is the protocol number for the adapter that
+                        supports this drive.  These numbers are
+                        logged by 'paride' when the protocol modules
+                        are initialised.  (0 if not given)
+
+                <uni>   for those adapters that support chained
+                        devices, this is the unit selector for the
+                        chain of devices on the given port.  It should
+                        be zero for devices that don't support chaining.
+                        (0 if not given)
+
+                <mod>   this can be -1 to choose the best mode, or one
+                        of the mode numbers supported by the adapter.
+                        (-1 if not given)
+
+                <slv>   ATAPI devices can be jumpered to master or slave.
+                        Set this to 0 to choose the master drive, 1 to
+                        choose the slave, -1 (the default) to choose the
+                        first drive found.
+
+                <dly>   some parallel ports require the driver to 
+                        go more slowly.  -1 sets a default value that
+                        should work with the chosen protocol.  Otherwise,
+                        set this to a small integer, the larger it is
+                        the slower the port i/o.  In some cases, setting
+                        this to zero will speed up the device. (default -1)
+
+	    major	You may use this parameter to overide the
+			default major number (96) that this driver
+			will use.  Be sure to change the device
+			name as well.
+
+	    name	This parameter is a character string that
+			contains the name the kernel will use for this
+			device (in /proc output, for instance).
+			(default "pt").
+
+            verbose     This parameter controls the amount of logging
+                        that the driver will do.  Set it to 0 for
+                        normal operation, 1 to see autoprobe progress
+                        messages, or 2 to see additional debugging
+                        output.  (default 0)
+ 
+        If this driver is built into the kernel, you can use 
+        the following command line parameters, with the same values
+        as the corresponding module parameters listed above:
+
+            pt.drive0
+            pt.drive1
+            pt.drive2
+            pt.drive3
+
+        In addition, you can use the parameter pt.disable to disable
+        the driver entirely.
+
+*/
+
+/*   Changes:
+
+	1.01	GRG 1998.05.06	Round up transfer size, fix ready_wait,
+			        loosed interpretation of ATAPI standard
+				for clearing error status.
+				Eliminate sti();
+	1.02    GRG 1998.06.16  Eliminate an Ugh.
+	1.03    GRG 1998.08.15  Adjusted PT_TMO, use HZ in loop timing,
+				extra debugging
+	1.04    GRG 1998.09.24  Repair minor coding error, added jumbo support
+	
+*/
+
+#define PT_VERSION      "1.04"
+#define PT_MAJOR	96
+#define PT_NAME		"pt"
+#define PT_UNITS	4
+
+/* Here are things one can override from the insmod command.
+   Most are autoprobed by paride unless set here.  Verbose is on
+   by default.
+
+*/
+
+static int verbose = 0;
+static int major = PT_MAJOR;
+static char *name = PT_NAME;
+static int disable = 0;
+
+static int drive0[6] = { 0, 0, 0, -1, -1, -1 };
+static int drive1[6] = { 0, 0, 0, -1, -1, -1 };
+static int drive2[6] = { 0, 0, 0, -1, -1, -1 };
+static int drive3[6] = { 0, 0, 0, -1, -1, -1 };
+
+static int (*drives[4])[6] = {&drive0, &drive1, &drive2, &drive3};
+
+#define D_PRT   0
+#define D_PRO   1
+#define D_UNI   2
+#define D_MOD   3
+#define D_SLV   4
+#define D_DLY   5
+
+#define DU              (*drives[unit])
+
+/* end of parameters */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/fs.h>
+#include <linux/delay.h>
+#include <linux/slab.h>
+#include <linux/mtio.h>
+#include <linux/device.h>
+#include <linux/sched.h>	/* current, TASK_*, schedule_timeout() */
+#include <linux/mutex.h>
+
+#include <asm/uaccess.h>
+
+module_param(verbose, bool, 0);
+module_param(major, int, 0);
+module_param(name, charp, 0);
+module_param_array(drive0, int, NULL, 0);
+module_param_array(drive1, int, NULL, 0);
+module_param_array(drive2, int, NULL, 0);
+module_param_array(drive3, int, NULL, 0);
+
+#include "paride.h"
+
+#define PT_MAX_RETRIES  5
+#define PT_TMO          3000	/* interrupt timeout in jiffies */
+#define PT_SPIN_DEL     50	/* spin delay in micro-seconds  */
+#define PT_RESET_TMO    30	/* 30 seconds */
+#define PT_READY_TMO	60	/* 60 seconds */
+#define PT_REWIND_TMO	1200	/* 20 minutes */
+
+#define PT_SPIN         ((1000000/(HZ*PT_SPIN_DEL))*PT_TMO)
+
+#define STAT_ERR        0x00001
+#define STAT_INDEX      0x00002
+#define STAT_ECC        0x00004
+#define STAT_DRQ        0x00008
+#define STAT_SEEK       0x00010
+#define STAT_WRERR      0x00020
+#define STAT_READY      0x00040
+#define STAT_BUSY       0x00080
+#define STAT_SENSE	0x1f000
+
+#define ATAPI_TEST_READY	0x00
+#define ATAPI_REWIND		0x01
+#define ATAPI_REQ_SENSE		0x03
+#define ATAPI_READ_6		0x08
+#define ATAPI_WRITE_6		0x0a
+#define ATAPI_WFM		0x10
+#define ATAPI_IDENTIFY		0x12
+#define ATAPI_MODE_SENSE	0x1a
+#define ATAPI_LOG_SENSE		0x4d
+
+static DEFINE_MUTEX(pt_mutex);
+static int pt_open(struct inode *inode, struct file *file);
+static long pt_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
+static int pt_release(struct inode *inode, struct file *file);
+static ssize_t pt_read(struct file *filp, char __user *buf,
+		       size_t count, loff_t * ppos);
+static ssize_t pt_write(struct file *filp, const char __user *buf,
+			size_t count, loff_t * ppos);
+static int pt_detect(void);
+
+/* bits in tape->flags */
+
+#define PT_MEDIA	1
+#define PT_WRITE_OK	2
+#define PT_REWIND	4
+#define PT_WRITING      8
+#define PT_READING     16
+#define PT_EOF	       32
+
+#define PT_NAMELEN      8
+#define PT_BUFSIZE  16384
+
+struct pt_unit {
+	struct pi_adapter pia;	/* interface to paride layer */
+	struct pi_adapter *pi;
+	int flags;		/* various state flags */
+	int last_sense;		/* result of last request sense */
+	int drive;		/* drive */
+	atomic_t available;	/* 1 if access is available 0 otherwise */
+	int bs;			/* block size */
+	int capacity;		/* Size of tape in KB */
+	int present;		/* device present ? */
+	char *bufptr;
+	char name[PT_NAMELEN];	/* pf0, pf1, ... */
+};
+
+static int pt_identify(struct pt_unit *tape);
+
+static struct pt_unit pt[PT_UNITS];
+
+static char pt_scratch[512];	/* scratch block buffer */
+
+/* kernel glue structures */
+
+static const struct file_operations pt_fops = {
+	.owner = THIS_MODULE,
+	.read = pt_read,
+	.write = pt_write,
+	.unlocked_ioctl = pt_ioctl,
+	.open = pt_open,
+	.release = pt_release,
+	.llseek = noop_llseek,
+};
+
+/* sysfs class support */
+static struct class *pt_class;
+
+static inline int status_reg(struct pi_adapter *pi)
+{
+	return pi_read_regr(pi, 1, 6);
+}
+
+static inline int read_reg(struct pi_adapter *pi, int reg)
+{
+	return pi_read_regr(pi, 0, reg);
+}
+
+static inline void write_reg(struct pi_adapter *pi, int reg, int val)
+{
+	pi_write_regr(pi, 0, reg, val);
+}
+
+static inline u8 DRIVE(struct pt_unit *tape)
+{
+	return 0xa0+0x10*tape->drive;
+}
+
+static int pt_wait(struct pt_unit *tape, int go, int stop, char *fun, char *msg)
+{
+	int j, r, e, s, p;
+	struct pi_adapter *pi = tape->pi;
+
+	j = 0;
+	while ((((r = status_reg(pi)) & go) || (stop && (!(r & stop))))
+	       && (j++ < PT_SPIN))
+		udelay(PT_SPIN_DEL);
+
+	if ((r & (STAT_ERR & stop)) || (j > PT_SPIN)) {
+		s = read_reg(pi, 7);
+		e = read_reg(pi, 1);
+		p = read_reg(pi, 2);
+		if (j > PT_SPIN)
+			e |= 0x100;
+		if (fun)
+			printk("%s: %s %s: alt=0x%x stat=0x%x err=0x%x"
+			       " loop=%d phase=%d\n",
+			       tape->name, fun, msg, r, s, e, j, p);
+		return (e << 8) + s;
+	}
+	return 0;
+}
+
+static int pt_command(struct pt_unit *tape, char *cmd, int dlen, char *fun)
+{
+	struct pi_adapter *pi = tape->pi;
+	pi_connect(pi);
+
+	write_reg(pi, 6, DRIVE(tape));
+
+	if (pt_wait(tape, STAT_BUSY | STAT_DRQ, 0, fun, "before command")) {
+		pi_disconnect(pi);
+		return -1;
+	}
+
+	write_reg(pi, 4, dlen % 256);
+	write_reg(pi, 5, dlen / 256);
+	write_reg(pi, 7, 0xa0);	/* ATAPI packet command */
+
+	if (pt_wait(tape, STAT_BUSY, STAT_DRQ, fun, "command DRQ")) {
+		pi_disconnect(pi);
+		return -1;
+	}
+
+	if (read_reg(pi, 2) != 1) {
+		printk("%s: %s: command phase error\n", tape->name, fun);
+		pi_disconnect(pi);
+		return -1;
+	}
+
+	pi_write_block(pi, cmd, 12);
+
+	return 0;
+}
+
+static int pt_completion(struct pt_unit *tape, char *buf, char *fun)
+{
+	struct pi_adapter *pi = tape->pi;
+	int r, s, n, p;
+
+	r = pt_wait(tape, STAT_BUSY, STAT_DRQ | STAT_READY | STAT_ERR,
+		    fun, "completion");
+
+	if (read_reg(pi, 7) & STAT_DRQ) {
+		n = (((read_reg(pi, 4) + 256 * read_reg(pi, 5)) +
+		      3) & 0xfffc);
+		p = read_reg(pi, 2) & 3;
+		if (p == 0)
+			pi_write_block(pi, buf, n);
+		if (p == 2)
+			pi_read_block(pi, buf, n);
+	}
+
+	s = pt_wait(tape, STAT_BUSY, STAT_READY | STAT_ERR, fun, "data done");
+
+	pi_disconnect(pi);
+
+	return (r ? r : s);
+}
+
+static void pt_req_sense(struct pt_unit *tape, int quiet)
+{
+	char rs_cmd[12] = { ATAPI_REQ_SENSE, 0, 0, 0, 16, 0, 0, 0, 0, 0, 0, 0 };
+	char buf[16];
+	int r;
+
+	r = pt_command(tape, rs_cmd, 16, "Request sense");
+	mdelay(1);
+	if (!r)
+		pt_completion(tape, buf, "Request sense");
+
+	tape->last_sense = -1;
+	if (!r) {
+		if (!quiet)
+			printk("%s: Sense key: %x, ASC: %x, ASQ: %x\n",
+			       tape->name, buf[2] & 0xf, buf[12], buf[13]);
+		tape->last_sense = (buf[2] & 0xf) | ((buf[12] & 0xff) << 8)
+		    | ((buf[13] & 0xff) << 16);
+	}
+}
+
+static int pt_atapi(struct pt_unit *tape, char *cmd, int dlen, char *buf, char *fun)
+{
+	int r;
+
+	r = pt_command(tape, cmd, dlen, fun);
+	mdelay(1);
+	if (!r)
+		r = pt_completion(tape, buf, fun);
+	if (r)
+		pt_req_sense(tape, !fun);
+
+	return r;
+}
+
+static void pt_sleep(int cs)
+{
+	schedule_timeout_interruptible(cs);
+}
+
+static int pt_poll_dsc(struct pt_unit *tape, int pause, int tmo, char *msg)
+{
+	struct pi_adapter *pi = tape->pi;
+	int k, e, s;
+
+	k = 0;
+	e = 0;
+	s = 0;
+	while (k < tmo) {
+		pt_sleep(pause);
+		k++;
+		pi_connect(pi);
+		write_reg(pi, 6, DRIVE(tape));
+		s = read_reg(pi, 7);
+		e = read_reg(pi, 1);
+		pi_disconnect(pi);
+		if (s & (STAT_ERR | STAT_SEEK))
+			break;
+	}
+	if ((k >= tmo) || (s & STAT_ERR)) {
+		if (k >= tmo)
+			printk("%s: %s DSC timeout\n", tape->name, msg);
+		else
+			printk("%s: %s stat=0x%x err=0x%x\n", tape->name, msg, s,
+			       e);
+		pt_req_sense(tape, 0);
+		return 0;
+	}
+	return 1;
+}
+
+static void pt_media_access_cmd(struct pt_unit *tape, int tmo, char *cmd, char *fun)
+{
+	if (pt_command(tape, cmd, 0, fun)) {
+		pt_req_sense(tape, 0);
+		return;
+	}
+	pi_disconnect(tape->pi);
+	pt_poll_dsc(tape, HZ, tmo, fun);
+}
+
+static void pt_rewind(struct pt_unit *tape)
+{
+	char rw_cmd[12] = { ATAPI_REWIND, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+
+	pt_media_access_cmd(tape, PT_REWIND_TMO, rw_cmd, "rewind");
+}
+
+static void pt_write_fm(struct pt_unit *tape)
+{
+	char wm_cmd[12] = { ATAPI_WFM, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0 };
+
+	pt_media_access_cmd(tape, PT_TMO, wm_cmd, "write filemark");
+}
+
+#define DBMSG(msg)      ((verbose>1)?(msg):NULL)
+
+static int pt_reset(struct pt_unit *tape)
+{
+	struct pi_adapter *pi = tape->pi;
+	int i, k, flg;
+	int expect[5] = { 1, 1, 1, 0x14, 0xeb };
+
+	pi_connect(pi);
+	write_reg(pi, 6, DRIVE(tape));
+	write_reg(pi, 7, 8);
+
+	pt_sleep(20 * HZ / 1000);
+
+	k = 0;
+	while ((k++ < PT_RESET_TMO) && (status_reg(pi) & STAT_BUSY))
+		pt_sleep(HZ / 10);
+
+	flg = 1;
+	for (i = 0; i < 5; i++)
+		flg &= (read_reg(pi, i + 1) == expect[i]);
+
+	if (verbose) {
+		printk("%s: Reset (%d) signature = ", tape->name, k);
+		for (i = 0; i < 5; i++)
+			printk("%3x", read_reg(pi, i + 1));
+		if (!flg)
+			printk(" (incorrect)");
+		printk("\n");
+	}
+
+	pi_disconnect(pi);
+	return flg - 1;
+}
+
+static int pt_ready_wait(struct pt_unit *tape, int tmo)
+{
+	char tr_cmd[12] = { ATAPI_TEST_READY, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+	int k, p;
+
+	k = 0;
+	while (k < tmo) {
+		tape->last_sense = 0;
+		pt_atapi(tape, tr_cmd, 0, NULL, DBMSG("test unit ready"));
+		p = tape->last_sense;
+		if (!p)
+			return 0;
+		if (!(((p & 0xffff) == 0x0402) || ((p & 0xff) == 6)))
+			return p;
+		k++;
+		pt_sleep(HZ);
+	}
+	return 0x000020;	/* timeout */
+}
+
+static void xs(char *buf, char *targ, int offs, int len)
+{
+	int j, k, l;
+
+	j = 0;
+	l = 0;
+	for (k = 0; k < len; k++)
+		if ((buf[k + offs] != 0x20) || (buf[k + offs] != l))
+			l = targ[j++] = buf[k + offs];
+	if (l == 0x20)
+		j--;
+	targ[j] = 0;
+}
+
+static int xn(char *buf, int offs, int size)
+{
+	int v, k;
+
+	v = 0;
+	for (k = 0; k < size; k++)
+		v = v * 256 + (buf[k + offs] & 0xff);
+	return v;
+}
+
+static int pt_identify(struct pt_unit *tape)
+{
+	int dt, s;
+	char *ms[2] = { "master", "slave" };
+	char mf[10], id[18];
+	char id_cmd[12] = { ATAPI_IDENTIFY, 0, 0, 0, 36, 0, 0, 0, 0, 0, 0, 0 };
+	char ms_cmd[12] =
+	    { ATAPI_MODE_SENSE, 0, 0x2a, 0, 36, 0, 0, 0, 0, 0, 0, 0 };
+	char ls_cmd[12] =
+	    { ATAPI_LOG_SENSE, 0, 0x71, 0, 0, 0, 0, 0, 36, 0, 0, 0 };
+	char buf[36];
+
+	s = pt_atapi(tape, id_cmd, 36, buf, "identify");
+	if (s)
+		return -1;
+
+	dt = buf[0] & 0x1f;
+	if (dt != 1) {
+		if (verbose)
+			printk("%s: Drive %d, unsupported type %d\n",
+			       tape->name, tape->drive, dt);
+		return -1;
+	}
+
+	xs(buf, mf, 8, 8);
+	xs(buf, id, 16, 16);
+
+	tape->flags = 0;
+	tape->capacity = 0;
+	tape->bs = 0;
+
+	if (!pt_ready_wait(tape, PT_READY_TMO))
+		tape->flags |= PT_MEDIA;
+
+	if (!pt_atapi(tape, ms_cmd, 36, buf, "mode sense")) {
+		if (!(buf[2] & 0x80))
+			tape->flags |= PT_WRITE_OK;
+		tape->bs = xn(buf, 10, 2);
+	}
+
+	if (!pt_atapi(tape, ls_cmd, 36, buf, "log sense"))
+		tape->capacity = xn(buf, 24, 4);
+
+	printk("%s: %s %s, %s", tape->name, mf, id, ms[tape->drive]);
+	if (!(tape->flags & PT_MEDIA))
+		printk(", no media\n");
+	else {
+		if (!(tape->flags & PT_WRITE_OK))
+			printk(", RO");
+		printk(", blocksize %d, %d MB\n", tape->bs, tape->capacity / 1024);
+	}
+
+	return 0;
+}
+
+
+/*
+ * returns  0, with id set if drive is detected
+ *	   -1, if drive detection failed
+ */
+static int pt_probe(struct pt_unit *tape)
+{
+	if (tape->drive == -1) {
+		for (tape->drive = 0; tape->drive <= 1; tape->drive++)
+			if (!pt_reset(tape))
+				return pt_identify(tape);
+	} else {
+		if (!pt_reset(tape))
+			return pt_identify(tape);
+	}
+	return -1;
+}
+
+static int pt_detect(void)
+{
+	struct pt_unit *tape;
+	int specified = 0, found = 0;
+	int unit;
+
+	printk("%s: %s version %s, major %d\n", name, name, PT_VERSION, major);
+
+	specified = 0;
+	for (unit = 0; unit < PT_UNITS; unit++) {
+		struct pt_unit *tape = &pt[unit];
+		tape->pi = &tape->pia;
+		atomic_set(&tape->available, 1);
+		tape->flags = 0;
+		tape->last_sense = 0;
+		tape->present = 0;
+		tape->bufptr = NULL;
+		tape->drive = DU[D_SLV];
+		snprintf(tape->name, PT_NAMELEN, "%s%d", name, unit);
+		if (!DU[D_PRT])
+			continue;
+		specified++;
+		if (pi_init(tape->pi, 0, DU[D_PRT], DU[D_MOD], DU[D_UNI],
+		     DU[D_PRO], DU[D_DLY], pt_scratch, PI_PT,
+		     verbose, tape->name)) {
+			if (!pt_probe(tape)) {
+				tape->present = 1;
+				found++;
+			} else
+				pi_release(tape->pi);
+		}
+	}
+	if (specified == 0) {
+		tape = pt;
+		if (pi_init(tape->pi, 1, -1, -1, -1, -1, -1, pt_scratch,
+			    PI_PT, verbose, tape->name)) {
+			if (!pt_probe(tape)) {
+				tape->present = 1;
+				found++;
+			} else
+				pi_release(tape->pi);
+		}
+
+	}
+	if (found)
+		return 0;
+
+	printk("%s: No ATAPI tape drive detected\n", name);
+	return -1;
+}
+
+static int pt_open(struct inode *inode, struct file *file)
+{
+	int unit = iminor(inode) & 0x7F;
+	struct pt_unit *tape = pt + unit;
+	int err;
+
+	mutex_lock(&pt_mutex);
+	if (unit >= PT_UNITS || (!tape->present)) {
+		mutex_unlock(&pt_mutex);
+		return -ENODEV;
+	}
+
+	err = -EBUSY;
+	if (!atomic_dec_and_test(&tape->available))
+		goto out;
+
+	pt_identify(tape);
+
+	err = -ENODEV;
+	if (!(tape->flags & PT_MEDIA))
+		goto out;
+
+	err = -EROFS;
+	if ((!(tape->flags & PT_WRITE_OK)) && (file->f_mode & FMODE_WRITE))
+		goto out;
+
+	if (!(iminor(inode) & 128))
+		tape->flags |= PT_REWIND;
+
+	err = -ENOMEM;
+	tape->bufptr = kmalloc(PT_BUFSIZE, GFP_KERNEL);
+	if (tape->bufptr == NULL) {
+		printk("%s: buffer allocation failed\n", tape->name);
+		goto out;
+	}
+
+	file->private_data = tape;
+	mutex_unlock(&pt_mutex);
+	return 0;
+
+out:
+	atomic_inc(&tape->available);
+	mutex_unlock(&pt_mutex);
+	return err;
+}
+
+static long pt_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+	struct pt_unit *tape = file->private_data;
+	struct mtop __user *p = (void __user *)arg;
+	struct mtop mtop;
+
+	switch (cmd) {
+	case MTIOCTOP:
+		if (copy_from_user(&mtop, p, sizeof(struct mtop)))
+			return -EFAULT;
+
+		switch (mtop.mt_op) {
+
+		case MTREW:
+			mutex_lock(&pt_mutex);
+			pt_rewind(tape);
+			mutex_unlock(&pt_mutex);
+			return 0;
+
+		case MTWEOF:
+			mutex_lock(&pt_mutex);
+			pt_write_fm(tape);
+			mutex_unlock(&pt_mutex);
+			return 0;
+
+		default:
+			/* FIXME: rate limit ?? */
+			printk(KERN_DEBUG "%s: Unimplemented mt_op %d\n", tape->name,
+			       mtop.mt_op);
+			return -EINVAL;
+		}
+
+	default:
+		return -ENOTTY;
+	}
+}
+
+static int
+pt_release(struct inode *inode, struct file *file)
+{
+	struct pt_unit *tape = file->private_data;
+
+	if (atomic_read(&tape->available) > 1)
+		return -EINVAL;
+
+	if (tape->flags & PT_WRITING)
+		pt_write_fm(tape);
+
+	if (tape->flags & PT_REWIND)
+		pt_rewind(tape);
+
+	kfree(tape->bufptr);
+	tape->bufptr = NULL;
+
+	atomic_inc(&tape->available);
+
+	return 0;
+
+}
+
+static ssize_t pt_read(struct file *filp, char __user *buf, size_t count, loff_t * ppos)
+{
+	struct pt_unit *tape = filp->private_data;
+	struct pi_adapter *pi = tape->pi;
+	char rd_cmd[12] = { ATAPI_READ_6, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+	int k, n, r, p, s, t, b;
+
+	if (!(tape->flags & (PT_READING | PT_WRITING))) {
+		tape->flags |= PT_READING;
+		if (pt_atapi(tape, rd_cmd, 0, NULL, "start read-ahead"))
+			return -EIO;
+	} else if (tape->flags & PT_WRITING)
+		return -EIO;
+
+	if (tape->flags & PT_EOF)
+		return 0;
+
+	t = 0;
+
+	while (count > 0) {
+
+		if (!pt_poll_dsc(tape, HZ / 100, PT_TMO, "read"))
+			return -EIO;
+
+		n = count;
+		if (n > 32768)
+			n = 32768;	/* max per command */
+		b = (n - 1 + tape->bs) / tape->bs;
+		n = b * tape->bs;	/* rounded up to even block */
+
+		rd_cmd[4] = b;
+
+		r = pt_command(tape, rd_cmd, n, "read");
+
+		mdelay(1);
+
+		if (r) {
+			pt_req_sense(tape, 0);
+			return -EIO;
+		}
+
+		while (1) {
+
+			r = pt_wait(tape, STAT_BUSY,
+				    STAT_DRQ | STAT_ERR | STAT_READY,
+				    DBMSG("read DRQ"), "");
+
+			if (r & STAT_SENSE) {
+				pi_disconnect(pi);
+				pt_req_sense(tape, 0);
+				return -EIO;
+			}
+
+			if (r)
+				tape->flags |= PT_EOF;
+
+			s = read_reg(pi, 7);
+
+			if (!(s & STAT_DRQ))
+				break;
+
+			n = (read_reg(pi, 4) + 256 * read_reg(pi, 5));
+			p = (read_reg(pi, 2) & 3);
+			if (p != 2) {
+				pi_disconnect(pi);
+				printk("%s: Phase error on read: %d\n", tape->name,
+				       p);
+				return -EIO;
+			}
+
+			while (n > 0) {
+				k = n;
+				if (k > PT_BUFSIZE)
+					k = PT_BUFSIZE;
+				pi_read_block(pi, tape->bufptr, k);
+				n -= k;
+				b = k;
+				if (b > count)
+					b = count;
+				if (copy_to_user(buf + t, tape->bufptr, b)) {
+					pi_disconnect(pi);
+					return -EFAULT;
+				}
+				t += b;
+				count -= b;
+			}
+
+		}
+		pi_disconnect(pi);
+		if (tape->flags & PT_EOF)
+			break;
+	}
+
+	return t;
+
+}
+
+static ssize_t pt_write(struct file *filp, const char __user *buf, size_t count, loff_t * ppos)
+{
+	struct pt_unit *tape = filp->private_data;
+	struct pi_adapter *pi = tape->pi;
+	char wr_cmd[12] = { ATAPI_WRITE_6, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+	int k, n, r, p, s, t, b;
+
+	if (!(tape->flags & PT_WRITE_OK))
+		return -EROFS;
+
+	if (!(tape->flags & (PT_READING | PT_WRITING))) {
+		tape->flags |= PT_WRITING;
+		if (pt_atapi
+		    (tape, wr_cmd, 0, NULL, "start buffer-available mode"))
+			return -EIO;
+	} else if (tape->flags & PT_READING)
+		return -EIO;
+
+	if (tape->flags & PT_EOF)
+		return -ENOSPC;
+
+	t = 0;
+
+	while (count > 0) {
+
+		if (!pt_poll_dsc(tape, HZ / 100, PT_TMO, "write"))
+			return -EIO;
+
+		n = count;
+		if (n > 32768)
+			n = 32768;	/* max per command */
+		b = (n - 1 + tape->bs) / tape->bs;
+		n = b * tape->bs;	/* rounded up to even block */
+
+		wr_cmd[4] = b;
+
+		r = pt_command(tape, wr_cmd, n, "write");
+
+		mdelay(1);
+
+		if (r) {	/* error delivering command only */
+			pt_req_sense(tape, 0);
+			return -EIO;
+		}
+
+		while (1) {
+
+			r = pt_wait(tape, STAT_BUSY,
+				    STAT_DRQ | STAT_ERR | STAT_READY,
+				    DBMSG("write DRQ"), NULL);
+
+			if (r & STAT_SENSE) {
+				pi_disconnect(pi);
+				pt_req_sense(tape, 0);
+				return -EIO;
+			}
+
+			if (r)
+				tape->flags |= PT_EOF;
+
+			s = read_reg(pi, 7);
+
+			if (!(s & STAT_DRQ))
+				break;
+
+			n = (read_reg(pi, 4) + 256 * read_reg(pi, 5));
+			p = (read_reg(pi, 2) & 3);
+			if (p != 0) {
+				pi_disconnect(pi);
+				printk("%s: Phase error on write: %d \n",
+				       tape->name, p);
+				return -EIO;
+			}
+
+			while (n > 0) {
+				k = n;
+				if (k > PT_BUFSIZE)
+					k = PT_BUFSIZE;
+				b = k;
+				if (b > count)
+					b = count;
+				if (copy_from_user(tape->bufptr, buf + t, b)) {
+					pi_disconnect(pi);
+					return -EFAULT;
+				}
+				pi_write_block(pi, tape->bufptr, k);
+				t += b;
+				count -= b;
+				n -= k;
+			}
+
+		}
+		pi_disconnect(pi);
+		if (tape->flags & PT_EOF)
+			break;
+	}
+
+	return t;
+}
+
+static int __init pt_init(void)
+{
+	int unit;
+	int err;
+
+	if (disable) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	if (pt_detect()) {
+		err = -ENODEV;
+		goto out;
+	}
+
+	err = register_chrdev(major, name, &pt_fops);
+	if (err < 0) {
+		printk("pt_init: unable to get major number %d\n", major);
+		for (unit = 0; unit < PT_UNITS; unit++)
+			if (pt[unit].present)
+				pi_release(pt[unit].pi);
+		goto out;
+	}
+	major = err;
+	pt_class = class_create(THIS_MODULE, "pt");
+	if (IS_ERR(pt_class)) {
+		err = PTR_ERR(pt_class);
+		goto out_chrdev;
+	}
+
+	for (unit = 0; unit < PT_UNITS; unit++)
+		if (pt[unit].present) {
+			device_create(pt_class, NULL, MKDEV(major, unit), NULL,
+				      "pt%d", unit);
+			device_create(pt_class, NULL, MKDEV(major, unit + 128),
+				      NULL, "pt%dn", unit);
+		}
+	goto out;
+
+out_chrdev:
+	unregister_chrdev(major, "pt");
+out:
+	return err;
+}
+
+static void __exit pt_exit(void)
+{
+	int unit;
+	for (unit = 0; unit < PT_UNITS; unit++)
+		if (pt[unit].present) {
+			device_destroy(pt_class, MKDEV(major, unit));
+			device_destroy(pt_class, MKDEV(major, unit + 128));
+		}
+	class_destroy(pt_class);
+	unregister_chrdev(major, name);
+	for (unit = 0; unit < PT_UNITS; unit++)
+		if (pt[unit].present)
+			pi_release(pt[unit].pi);
+}
+
+MODULE_LICENSE("GPL");
+module_init(pt_init)
+module_exit(pt_exit)
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
new file mode 100644
index 00000000..07a382ea
--- /dev/null
+++ b/drivers/block/pktcdvd.c
@@ -0,0 +1,3117 @@
+/*
+ * Copyright (C) 2000 Jens Axboe <axboe@suse.de>
+ * Copyright (C) 2001-2004 Peter Osterlund <petero2@telia.com>
+ * Copyright (C) 2006 Thomas Maier <balagi@justmail.de>
+ *
+ * May be copied or modified under the terms of the GNU General Public
+ * License.  See linux/COPYING for more information.
+ *
+ * Packet writing layer for ATAPI and SCSI CD-RW, DVD+RW, DVD-RW and
+ * DVD-RAM devices.
+ *
+ * Theory of operation:
+ *
+ * At the lowest level, there is the standard driver for the CD/DVD device,
+ * typically ide-cd.c or sr.c. This driver can handle read and write requests,
+ * but it doesn't know anything about the special restrictions that apply to
+ * packet writing. One restriction is that write requests must be aligned to
+ * packet boundaries on the physical media, and the size of a write request
+ * must be equal to the packet size. Another restriction is that a
+ * GPCMD_FLUSH_CACHE command has to be issued to the drive before a read
+ * command, if the previous command was a write.
+ *
+ * The purpose of the packet writing driver is to hide these restrictions from
+ * higher layers, such as file systems, and present a block device that can be
+ * randomly read and written using 2kB-sized blocks.
+ *
+ * The lowest layer in the packet writing driver is the packet I/O scheduler.
+ * Its data is defined by the struct packet_iosched and includes two bio
+ * queues with pending read and write requests. These queues are processed
+ * by the pkt_iosched_process_queue() function. The write requests in this
+ * queue are already properly aligned and sized. This layer is responsible for
+ * issuing the flush cache commands and scheduling the I/O in a good order.
+ *
+ * The next layer transforms unaligned write requests to aligned writes. This
+ * transformation requires reading missing pieces of data from the underlying
+ * block device, assembling the pieces to full packets and queuing them to the
+ * packet I/O scheduler.
+ *
+ * At the top layer there is a custom make_request_fn function that forwards
+ * read requests directly to the iosched queue and puts write requests in the
+ * unaligned write queue. A kernel thread performs the necessary read
+ * gathering to convert the unaligned writes to aligned writes and then feeds
+ * them to the packet I/O scheduler.
+ *
+ *************************************************************************/
+
+#include <linux/pktcdvd.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/compat.h>
+#include <linux/kthread.h>
+#include <linux/errno.h>
+#include <linux/spinlock.h>
+#include <linux/file.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/miscdevice.h>
+#include <linux/freezer.h>
+#include <linux/mutex.h>
+#include <linux/slab.h>
+#include <scsi/scsi_cmnd.h>
+#include <scsi/scsi_ioctl.h>
+#include <scsi/scsi.h>
+#include <linux/debugfs.h>
+#include <linux/device.h>
+
+#include <asm/uaccess.h>
+
+#define DRIVER_NAME	"pktcdvd"
+
+#if PACKET_DEBUG
+#define DPRINTK(fmt, args...) printk(KERN_NOTICE fmt, ##args)
+#else
+#define DPRINTK(fmt, args...)
+#endif
+
+#if PACKET_DEBUG > 1
+#define VPRINTK(fmt, args...) printk(KERN_NOTICE fmt, ##args)
+#else
+#define VPRINTK(fmt, args...)
+#endif
+
+#define MAX_SPEED 0xffff
+
+#define ZONE(sector, pd) (((sector) + (pd)->offset) & ~((pd)->settings.size - 1))
+
+static DEFINE_MUTEX(pktcdvd_mutex);
+static struct pktcdvd_device *pkt_devs[MAX_WRITERS];
+static struct proc_dir_entry *pkt_proc;
+static int pktdev_major;
+static int write_congestion_on  = PKT_WRITE_CONGESTION_ON;
+static int write_congestion_off = PKT_WRITE_CONGESTION_OFF;
+static struct mutex ctl_mutex;	/* Serialize open/close/setup/teardown */
+static mempool_t *psd_pool;
+
+static struct class	*class_pktcdvd = NULL;    /* /sys/class/pktcdvd */
+static struct dentry	*pkt_debugfs_root = NULL; /* /sys/kernel/debug/pktcdvd */
+
+/* forward declaration */
+static int pkt_setup_dev(dev_t dev, dev_t* pkt_dev);
+static int pkt_remove_dev(dev_t pkt_dev);
+static int pkt_seq_show(struct seq_file *m, void *p);
+
+
+
+/*
+ * create and register a pktcdvd kernel object.
+ */
+static struct pktcdvd_kobj* pkt_kobj_create(struct pktcdvd_device *pd,
+					const char* name,
+					struct kobject* parent,
+					struct kobj_type* ktype)
+{
+	struct pktcdvd_kobj *p;
+	int error;
+
+	p = kzalloc(sizeof(*p), GFP_KERNEL);
+	if (!p)
+		return NULL;
+	p->pd = pd;
+	error = kobject_init_and_add(&p->kobj, ktype, parent, "%s", name);
+	if (error) {
+		kobject_put(&p->kobj);
+		return NULL;
+	}
+	kobject_uevent(&p->kobj, KOBJ_ADD);
+	return p;
+}
+/*
+ * remove a pktcdvd kernel object.
+ */
+static void pkt_kobj_remove(struct pktcdvd_kobj *p)
+{
+	if (p)
+		kobject_put(&p->kobj);
+}
+/*
+ * default release function for pktcdvd kernel objects.
+ */
+static void pkt_kobj_release(struct kobject *kobj)
+{
+	kfree(to_pktcdvdkobj(kobj));
+}
+
+
+/**********************************************************
+ *
+ * sysfs interface for pktcdvd
+ * by (C) 2006  Thomas Maier <balagi@justmail.de>
+ *
+ **********************************************************/
+
+#define DEF_ATTR(_obj,_name,_mode) \
+	static struct attribute _obj = { .name = _name, .mode = _mode }
+
+/**********************************************************
+  /sys/class/pktcdvd/pktcdvd[0-7]/
+                     stat/reset
+                     stat/packets_started
+                     stat/packets_finished
+                     stat/kb_written
+                     stat/kb_read
+                     stat/kb_read_gather
+                     write_queue/size
+                     write_queue/congestion_off
+                     write_queue/congestion_on
+ **********************************************************/
+
+DEF_ATTR(kobj_pkt_attr_st1, "reset", 0200);
+DEF_ATTR(kobj_pkt_attr_st2, "packets_started", 0444);
+DEF_ATTR(kobj_pkt_attr_st3, "packets_finished", 0444);
+DEF_ATTR(kobj_pkt_attr_st4, "kb_written", 0444);
+DEF_ATTR(kobj_pkt_attr_st5, "kb_read", 0444);
+DEF_ATTR(kobj_pkt_attr_st6, "kb_read_gather", 0444);
+
+static struct attribute *kobj_pkt_attrs_stat[] = {
+	&kobj_pkt_attr_st1,
+	&kobj_pkt_attr_st2,
+	&kobj_pkt_attr_st3,
+	&kobj_pkt_attr_st4,
+	&kobj_pkt_attr_st5,
+	&kobj_pkt_attr_st6,
+	NULL
+};
+
+DEF_ATTR(kobj_pkt_attr_wq1, "size", 0444);
+DEF_ATTR(kobj_pkt_attr_wq2, "congestion_off", 0644);
+DEF_ATTR(kobj_pkt_attr_wq3, "congestion_on",  0644);
+
+static struct attribute *kobj_pkt_attrs_wqueue[] = {
+	&kobj_pkt_attr_wq1,
+	&kobj_pkt_attr_wq2,
+	&kobj_pkt_attr_wq3,
+	NULL
+};
+
+static ssize_t kobj_pkt_show(struct kobject *kobj,
+			struct attribute *attr, char *data)
+{
+	struct pktcdvd_device *pd = to_pktcdvdkobj(kobj)->pd;
+	int n = 0;
+	int v;
+	if (strcmp(attr->name, "packets_started") == 0) {
+		n = sprintf(data, "%lu\n", pd->stats.pkt_started);
+
+	} else if (strcmp(attr->name, "packets_finished") == 0) {
+		n = sprintf(data, "%lu\n", pd->stats.pkt_ended);
+
+	} else if (strcmp(attr->name, "kb_written") == 0) {
+		n = sprintf(data, "%lu\n", pd->stats.secs_w >> 1);
+
+	} else if (strcmp(attr->name, "kb_read") == 0) {
+		n = sprintf(data, "%lu\n", pd->stats.secs_r >> 1);
+
+	} else if (strcmp(attr->name, "kb_read_gather") == 0) {
+		n = sprintf(data, "%lu\n", pd->stats.secs_rg >> 1);
+
+	} else if (strcmp(attr->name, "size") == 0) {
+		spin_lock(&pd->lock);
+		v = pd->bio_queue_size;
+		spin_unlock(&pd->lock);
+		n = sprintf(data, "%d\n", v);
+
+	} else if (strcmp(attr->name, "congestion_off") == 0) {
+		spin_lock(&pd->lock);
+		v = pd->write_congestion_off;
+		spin_unlock(&pd->lock);
+		n = sprintf(data, "%d\n", v);
+
+	} else if (strcmp(attr->name, "congestion_on") == 0) {
+		spin_lock(&pd->lock);
+		v = pd->write_congestion_on;
+		spin_unlock(&pd->lock);
+		n = sprintf(data, "%d\n", v);
+	}
+	return n;
+}
+
+static void init_write_congestion_marks(int* lo, int* hi)
+{
+	if (*hi > 0) {
+		*hi = max(*hi, 500);
+		*hi = min(*hi, 1000000);
+		if (*lo <= 0)
+			*lo = *hi - 100;
+		else {
+			*lo = min(*lo, *hi - 100);
+			*lo = max(*lo, 100);
+		}
+	} else {
+		*hi = -1;
+		*lo = -1;
+	}
+}
+
+static ssize_t kobj_pkt_store(struct kobject *kobj,
+			struct attribute *attr,
+			const char *data, size_t len)
+{
+	struct pktcdvd_device *pd = to_pktcdvdkobj(kobj)->pd;
+	int val;
+
+	if (strcmp(attr->name, "reset") == 0 && len > 0) {
+		pd->stats.pkt_started = 0;
+		pd->stats.pkt_ended = 0;
+		pd->stats.secs_w = 0;
+		pd->stats.secs_rg = 0;
+		pd->stats.secs_r = 0;
+
+	} else if (strcmp(attr->name, "congestion_off") == 0
+		   && sscanf(data, "%d", &val) == 1) {
+		spin_lock(&pd->lock);
+		pd->write_congestion_off = val;
+		init_write_congestion_marks(&pd->write_congestion_off,
+					&pd->write_congestion_on);
+		spin_unlock(&pd->lock);
+
+	} else if (strcmp(attr->name, "congestion_on") == 0
+		   && sscanf(data, "%d", &val) == 1) {
+		spin_lock(&pd->lock);
+		pd->write_congestion_on = val;
+		init_write_congestion_marks(&pd->write_congestion_off,
+					&pd->write_congestion_on);
+		spin_unlock(&pd->lock);
+	}
+	return len;
+}
+
+static const struct sysfs_ops kobj_pkt_ops = {
+	.show = kobj_pkt_show,
+	.store = kobj_pkt_store
+};
+static struct kobj_type kobj_pkt_type_stat = {
+	.release = pkt_kobj_release,
+	.sysfs_ops = &kobj_pkt_ops,
+	.default_attrs = kobj_pkt_attrs_stat
+};
+static struct kobj_type kobj_pkt_type_wqueue = {
+	.release = pkt_kobj_release,
+	.sysfs_ops = &kobj_pkt_ops,
+	.default_attrs = kobj_pkt_attrs_wqueue
+};
+
+static void pkt_sysfs_dev_new(struct pktcdvd_device *pd)
+{
+	if (class_pktcdvd) {
+		pd->dev = device_create(class_pktcdvd, NULL, MKDEV(0, 0), NULL,
+					"%s", pd->name);
+		if (IS_ERR(pd->dev))
+			pd->dev = NULL;
+	}
+	if (pd->dev) {
+		pd->kobj_stat = pkt_kobj_create(pd, "stat",
+					&pd->dev->kobj,
+					&kobj_pkt_type_stat);
+		pd->kobj_wqueue = pkt_kobj_create(pd, "write_queue",
+					&pd->dev->kobj,
+					&kobj_pkt_type_wqueue);
+	}
+}
+
+static void pkt_sysfs_dev_remove(struct pktcdvd_device *pd)
+{
+	pkt_kobj_remove(pd->kobj_stat);
+	pkt_kobj_remove(pd->kobj_wqueue);
+	if (class_pktcdvd)
+		device_unregister(pd->dev);
+}
+
+
+/********************************************************************
+  /sys/class/pktcdvd/
+                     add            map block device
+                     remove         unmap packet dev
+                     device_map     show mappings
+ *******************************************************************/
+
+static void class_pktcdvd_release(struct class *cls)
+{
+	kfree(cls);
+}
+static ssize_t class_pktcdvd_show_map(struct class *c,
+					struct class_attribute *attr,
+					char *data)
+{
+	int n = 0;
+	int idx;
+	mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
+	for (idx = 0; idx < MAX_WRITERS; idx++) {
+		struct pktcdvd_device *pd = pkt_devs[idx];
+		if (!pd)
+			continue;
+		n += sprintf(data+n, "%s %u:%u %u:%u\n",
+			pd->name,
+			MAJOR(pd->pkt_dev), MINOR(pd->pkt_dev),
+			MAJOR(pd->bdev->bd_dev),
+			MINOR(pd->bdev->bd_dev));
+	}
+	mutex_unlock(&ctl_mutex);
+	return n;
+}
+
+static ssize_t class_pktcdvd_store_add(struct class *c,
+					struct class_attribute *attr,
+					const char *buf,
+					size_t count)
+{
+	unsigned int major, minor;
+
+	if (sscanf(buf, "%u:%u", &major, &minor) == 2) {
+		/* pkt_setup_dev() expects caller to hold reference to self */
+		if (!try_module_get(THIS_MODULE))
+			return -ENODEV;
+
+		pkt_setup_dev(MKDEV(major, minor), NULL);
+
+		module_put(THIS_MODULE);
+
+		return count;
+	}
+
+	return -EINVAL;
+}
+
+static ssize_t class_pktcdvd_store_remove(struct class *c,
+					  struct class_attribute *attr,
+					  const char *buf,
+					size_t count)
+{
+	unsigned int major, minor;
+	if (sscanf(buf, "%u:%u", &major, &minor) == 2) {
+		pkt_remove_dev(MKDEV(major, minor));
+		return count;
+	}
+	return -EINVAL;
+}
+
+static struct class_attribute class_pktcdvd_attrs[] = {
+ __ATTR(add,            0200, NULL, class_pktcdvd_store_add),
+ __ATTR(remove,         0200, NULL, class_pktcdvd_store_remove),
+ __ATTR(device_map,     0444, class_pktcdvd_show_map, NULL),
+ __ATTR_NULL
+};
+
+
+static int pkt_sysfs_init(void)
+{
+	int ret = 0;
+
+	/*
+	 * create control files in sysfs
+	 * /sys/class/pktcdvd/...
+	 */
+	class_pktcdvd = kzalloc(sizeof(*class_pktcdvd), GFP_KERNEL);
+	if (!class_pktcdvd)
+		return -ENOMEM;
+	class_pktcdvd->name = DRIVER_NAME;
+	class_pktcdvd->owner = THIS_MODULE;
+	class_pktcdvd->class_release = class_pktcdvd_release;
+	class_pktcdvd->class_attrs = class_pktcdvd_attrs;
+	ret = class_register(class_pktcdvd);
+	if (ret) {
+		kfree(class_pktcdvd);
+		class_pktcdvd = NULL;
+		printk(DRIVER_NAME": failed to create class pktcdvd\n");
+		return ret;
+	}
+	return 0;
+}
+
+static void pkt_sysfs_cleanup(void)
+{
+	if (class_pktcdvd)
+		class_destroy(class_pktcdvd);
+	class_pktcdvd = NULL;
+}
+
+/********************************************************************
+  entries in debugfs
+
+  /sys/kernel/debug/pktcdvd[0-7]/
+			info
+
+ *******************************************************************/
+
+static int pkt_debugfs_seq_show(struct seq_file *m, void *p)
+{
+	return pkt_seq_show(m, p);
+}
+
+static int pkt_debugfs_fops_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, pkt_debugfs_seq_show, inode->i_private);
+}
+
+static const struct file_operations debug_fops = {
+	.open		= pkt_debugfs_fops_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+	.owner		= THIS_MODULE,
+};
+
+static void pkt_debugfs_dev_new(struct pktcdvd_device *pd)
+{
+	if (!pkt_debugfs_root)
+		return;
+	pd->dfs_f_info = NULL;
+	pd->dfs_d_root = debugfs_create_dir(pd->name, pkt_debugfs_root);
+	if (IS_ERR(pd->dfs_d_root)) {
+		pd->dfs_d_root = NULL;
+		return;
+	}
+	pd->dfs_f_info = debugfs_create_file("info", S_IRUGO,
+				pd->dfs_d_root, pd, &debug_fops);
+	if (IS_ERR(pd->dfs_f_info)) {
+		pd->dfs_f_info = NULL;
+		return;
+	}
+}
+
+static void pkt_debugfs_dev_remove(struct pktcdvd_device *pd)
+{
+	if (!pkt_debugfs_root)
+		return;
+	if (pd->dfs_f_info)
+		debugfs_remove(pd->dfs_f_info);
+	pd->dfs_f_info = NULL;
+	if (pd->dfs_d_root)
+		debugfs_remove(pd->dfs_d_root);
+	pd->dfs_d_root = NULL;
+}
+
+static void pkt_debugfs_init(void)
+{
+	pkt_debugfs_root = debugfs_create_dir(DRIVER_NAME, NULL);
+	if (IS_ERR(pkt_debugfs_root)) {
+		pkt_debugfs_root = NULL;
+		return;
+	}
+}
+
+static void pkt_debugfs_cleanup(void)
+{
+	if (!pkt_debugfs_root)
+		return;
+	debugfs_remove(pkt_debugfs_root);
+	pkt_debugfs_root = NULL;
+}
+
+/* ----------------------------------------------------------*/
+
+
+static void pkt_bio_finished(struct pktcdvd_device *pd)
+{
+	BUG_ON(atomic_read(&pd->cdrw.pending_bios) <= 0);
+	if (atomic_dec_and_test(&pd->cdrw.pending_bios)) {
+		VPRINTK(DRIVER_NAME": queue empty\n");
+		atomic_set(&pd->iosched.attention, 1);
+		wake_up(&pd->wqueue);
+	}
+}
+
+static void pkt_bio_destructor(struct bio *bio)
+{
+	kfree(bio->bi_io_vec);
+	kfree(bio);
+}
+
+static struct bio *pkt_bio_alloc(int nr_iovecs)
+{
+	struct bio_vec *bvl = NULL;
+	struct bio *bio;
+
+	bio = kmalloc(sizeof(struct bio), GFP_KERNEL);
+	if (!bio)
+		goto no_bio;
+	bio_init(bio);
+
+	bvl = kcalloc(nr_iovecs, sizeof(struct bio_vec), GFP_KERNEL);
+	if (!bvl)
+		goto no_bvl;
+
+	bio->bi_max_vecs = nr_iovecs;
+	bio->bi_io_vec = bvl;
+	bio->bi_destructor = pkt_bio_destructor;
+
+	return bio;
+
+ no_bvl:
+	kfree(bio);
+ no_bio:
+	return NULL;
+}
+
+/*
+ * Allocate a packet_data struct
+ */
+static struct packet_data *pkt_alloc_packet_data(int frames)
+{
+	int i;
+	struct packet_data *pkt;
+
+	pkt = kzalloc(sizeof(struct packet_data), GFP_KERNEL);
+	if (!pkt)
+		goto no_pkt;
+
+	pkt->frames = frames;
+	pkt->w_bio = pkt_bio_alloc(frames);
+	if (!pkt->w_bio)
+		goto no_bio;
+
+	for (i = 0; i < frames / FRAMES_PER_PAGE; i++) {
+		pkt->pages[i] = alloc_page(GFP_KERNEL|__GFP_ZERO);
+		if (!pkt->pages[i])
+			goto no_page;
+	}
+
+	spin_lock_init(&pkt->lock);
+	bio_list_init(&pkt->orig_bios);
+
+	for (i = 0; i < frames; i++) {
+		struct bio *bio = pkt_bio_alloc(1);
+		if (!bio)
+			goto no_rd_bio;
+		pkt->r_bios[i] = bio;
+	}
+
+	return pkt;
+
+no_rd_bio:
+	for (i = 0; i < frames; i++) {
+		struct bio *bio = pkt->r_bios[i];
+		if (bio)
+			bio_put(bio);
+	}
+
+no_page:
+	for (i = 0; i < frames / FRAMES_PER_PAGE; i++)
+		if (pkt->pages[i])
+			__free_page(pkt->pages[i]);
+	bio_put(pkt->w_bio);
+no_bio:
+	kfree(pkt);
+no_pkt:
+	return NULL;
+}
+
+/*
+ * Free a packet_data struct
+ */
+static void pkt_free_packet_data(struct packet_data *pkt)
+{
+	int i;
+
+	for (i = 0; i < pkt->frames; i++) {
+		struct bio *bio = pkt->r_bios[i];
+		if (bio)
+			bio_put(bio);
+	}
+	for (i = 0; i < pkt->frames / FRAMES_PER_PAGE; i++)
+		__free_page(pkt->pages[i]);
+	bio_put(pkt->w_bio);
+	kfree(pkt);
+}
+
+static void pkt_shrink_pktlist(struct pktcdvd_device *pd)
+{
+	struct packet_data *pkt, *next;
+
+	BUG_ON(!list_empty(&pd->cdrw.pkt_active_list));
+
+	list_for_each_entry_safe(pkt, next, &pd->cdrw.pkt_free_list, list) {
+		pkt_free_packet_data(pkt);
+	}
+	INIT_LIST_HEAD(&pd->cdrw.pkt_free_list);
+}
+
+static int pkt_grow_pktlist(struct pktcdvd_device *pd, int nr_packets)
+{
+	struct packet_data *pkt;
+
+	BUG_ON(!list_empty(&pd->cdrw.pkt_free_list));
+
+	while (nr_packets > 0) {
+		pkt = pkt_alloc_packet_data(pd->settings.size >> 2);
+		if (!pkt) {
+			pkt_shrink_pktlist(pd);
+			return 0;
+		}
+		pkt->id = nr_packets;
+		pkt->pd = pd;
+		list_add(&pkt->list, &pd->cdrw.pkt_free_list);
+		nr_packets--;
+	}
+	return 1;
+}
+
+static inline struct pkt_rb_node *pkt_rbtree_next(struct pkt_rb_node *node)
+{
+	struct rb_node *n = rb_next(&node->rb_node);
+	if (!n)
+		return NULL;
+	return rb_entry(n, struct pkt_rb_node, rb_node);
+}
+
+static void pkt_rbtree_erase(struct pktcdvd_device *pd, struct pkt_rb_node *node)
+{
+	rb_erase(&node->rb_node, &pd->bio_queue);
+	mempool_free(node, pd->rb_pool);
+	pd->bio_queue_size--;
+	BUG_ON(pd->bio_queue_size < 0);
+}
+
+/*
+ * Find the first node in the pd->bio_queue rb tree with a starting sector >= s.
+ */
+static struct pkt_rb_node *pkt_rbtree_find(struct pktcdvd_device *pd, sector_t s)
+{
+	struct rb_node *n = pd->bio_queue.rb_node;
+	struct rb_node *next;
+	struct pkt_rb_node *tmp;
+
+	if (!n) {
+		BUG_ON(pd->bio_queue_size > 0);
+		return NULL;
+	}
+
+	for (;;) {
+		tmp = rb_entry(n, struct pkt_rb_node, rb_node);
+		if (s <= tmp->bio->bi_sector)
+			next = n->rb_left;
+		else
+			next = n->rb_right;
+		if (!next)
+			break;
+		n = next;
+	}
+
+	if (s > tmp->bio->bi_sector) {
+		tmp = pkt_rbtree_next(tmp);
+		if (!tmp)
+			return NULL;
+	}
+	BUG_ON(s > tmp->bio->bi_sector);
+	return tmp;
+}
+
+/*
+ * Insert a node into the pd->bio_queue rb tree.
+ */
+static void pkt_rbtree_insert(struct pktcdvd_device *pd, struct pkt_rb_node *node)
+{
+	struct rb_node **p = &pd->bio_queue.rb_node;
+	struct rb_node *parent = NULL;
+	sector_t s = node->bio->bi_sector;
+	struct pkt_rb_node *tmp;
+
+	while (*p) {
+		parent = *p;
+		tmp = rb_entry(parent, struct pkt_rb_node, rb_node);
+		if (s < tmp->bio->bi_sector)
+			p = &(*p)->rb_left;
+		else
+			p = &(*p)->rb_right;
+	}
+	rb_link_node(&node->rb_node, parent, p);
+	rb_insert_color(&node->rb_node, &pd->bio_queue);
+	pd->bio_queue_size++;
+}
+
+/*
+ * Send a packet_command to the underlying block device and
+ * wait for completion.
+ */
+static int pkt_generic_packet(struct pktcdvd_device *pd, struct packet_command *cgc)
+{
+	struct request_queue *q = bdev_get_queue(pd->bdev);
+	struct request *rq;
+	int ret = 0;
+
+	rq = blk_get_request(q, (cgc->data_direction == CGC_DATA_WRITE) ?
+			     WRITE : READ, __GFP_WAIT);
+
+	if (cgc->buflen) {
+		if (blk_rq_map_kern(q, rq, cgc->buffer, cgc->buflen, __GFP_WAIT))
+			goto out;
+	}
+
+	rq->cmd_len = COMMAND_SIZE(cgc->cmd[0]);
+	memcpy(rq->cmd, cgc->cmd, CDROM_PACKET_SIZE);
+
+	rq->timeout = 60*HZ;
+	rq->cmd_type = REQ_TYPE_BLOCK_PC;
+	if (cgc->quiet)
+		rq->cmd_flags |= REQ_QUIET;
+
+	blk_execute_rq(rq->q, pd->bdev->bd_disk, rq, 0);
+	if (rq->errors)
+		ret = -EIO;
+out:
+	blk_put_request(rq);
+	return ret;
+}
+
+/*
+ * A generic sense dump / resolve mechanism should be implemented across
+ * all ATAPI + SCSI devices.
+ */
+static void pkt_dump_sense(struct packet_command *cgc)
+{
+	static char *info[9] = { "No sense", "Recovered error", "Not ready",
+				 "Medium error", "Hardware error", "Illegal request",
+				 "Unit attention", "Data protect", "Blank check" };
+	int i;
+	struct request_sense *sense = cgc->sense;
+
+	printk(DRIVER_NAME":");
+	for (i = 0; i < CDROM_PACKET_SIZE; i++)
+		printk(" %02x", cgc->cmd[i]);
+	printk(" - ");
+
+	if (sense == NULL) {
+		printk("no sense\n");
+		return;
+	}
+
+	printk("sense %02x.%02x.%02x", sense->sense_key, sense->asc, sense->ascq);
+
+	if (sense->sense_key > 8) {
+		printk(" (INVALID)\n");
+		return;
+	}
+
+	printk(" (%s)\n", info[sense->sense_key]);
+}
+
+/*
+ * flush the drive cache to media
+ */
+static int pkt_flush_cache(struct pktcdvd_device *pd)
+{
+	struct packet_command cgc;
+
+	init_cdrom_command(&cgc, NULL, 0, CGC_DATA_NONE);
+	cgc.cmd[0] = GPCMD_FLUSH_CACHE;
+	cgc.quiet = 1;
+
+	/*
+	 * the IMMED bit -- we default to not setting it, although that
+	 * would allow a much faster close, this is safer
+	 */
+#if 0
+	cgc.cmd[1] = 1 << 1;
+#endif
+	return pkt_generic_packet(pd, &cgc);
+}
+
+/*
+ * speed is given as the normal factor, e.g. 4 for 4x
+ */
+static noinline_for_stack int pkt_set_speed(struct pktcdvd_device *pd,
+				unsigned write_speed, unsigned read_speed)
+{
+	struct packet_command cgc;
+	struct request_sense sense;
+	int ret;
+
+	init_cdrom_command(&cgc, NULL, 0, CGC_DATA_NONE);
+	cgc.sense = &sense;
+	cgc.cmd[0] = GPCMD_SET_SPEED;
+	cgc.cmd[2] = (read_speed >> 8) & 0xff;
+	cgc.cmd[3] = read_speed & 0xff;
+	cgc.cmd[4] = (write_speed >> 8) & 0xff;
+	cgc.cmd[5] = write_speed & 0xff;
+
+	if ((ret = pkt_generic_packet(pd, &cgc)))
+		pkt_dump_sense(&cgc);
+
+	return ret;
+}
+
+/*
+ * Queue a bio for processing by the low-level CD device. Must be called
+ * from process context.
+ */
+static void pkt_queue_bio(struct pktcdvd_device *pd, struct bio *bio)
+{
+	spin_lock(&pd->iosched.lock);
+	if (bio_data_dir(bio) == READ)
+		bio_list_add(&pd->iosched.read_queue, bio);
+	else
+		bio_list_add(&pd->iosched.write_queue, bio);
+	spin_unlock(&pd->iosched.lock);
+
+	atomic_set(&pd->iosched.attention, 1);
+	wake_up(&pd->wqueue);
+}
+
+/*
+ * Process the queued read/write requests. This function handles special
+ * requirements for CDRW drives:
+ * - A cache flush command must be inserted before a read request if the
+ *   previous request was a write.
+ * - Switching between reading and writing is slow, so don't do it more often
+ *   than necessary.
+ * - Optimize for throughput at the expense of latency. This means that streaming
+ *   writes will never be interrupted by a read, but if the drive has to seek
+ *   before the next write, switch to reading instead if there are any pending
+ *   read requests.
+ * - Set the read speed according to current usage pattern. When only reading
+ *   from the device, it's best to use the highest possible read speed, but
+ *   when switching often between reading and writing, it's better to have the
+ *   same read and write speeds.
+ */
+static void pkt_iosched_process_queue(struct pktcdvd_device *pd)
+{
+
+	if (atomic_read(&pd->iosched.attention) == 0)
+		return;
+	atomic_set(&pd->iosched.attention, 0);
+
+	for (;;) {
+		struct bio *bio;
+		int reads_queued, writes_queued;
+
+		spin_lock(&pd->iosched.lock);
+		reads_queued = !bio_list_empty(&pd->iosched.read_queue);
+		writes_queued = !bio_list_empty(&pd->iosched.write_queue);
+		spin_unlock(&pd->iosched.lock);
+
+		if (!reads_queued && !writes_queued)
+			break;
+
+		if (pd->iosched.writing) {
+			int need_write_seek = 1;
+			spin_lock(&pd->iosched.lock);
+			bio = bio_list_peek(&pd->iosched.write_queue);
+			spin_unlock(&pd->iosched.lock);
+			if (bio && (bio->bi_sector == pd->iosched.last_write))
+				need_write_seek = 0;
+			if (need_write_seek && reads_queued) {
+				if (atomic_read(&pd->cdrw.pending_bios) > 0) {
+					VPRINTK(DRIVER_NAME": write, waiting\n");
+					break;
+				}
+				pkt_flush_cache(pd);
+				pd->iosched.writing = 0;
+			}
+		} else {
+			if (!reads_queued && writes_queued) {
+				if (atomic_read(&pd->cdrw.pending_bios) > 0) {
+					VPRINTK(DRIVER_NAME": read, waiting\n");
+					break;
+				}
+				pd->iosched.writing = 1;
+			}
+		}
+
+		spin_lock(&pd->iosched.lock);
+		if (pd->iosched.writing)
+			bio = bio_list_pop(&pd->iosched.write_queue);
+		else
+			bio = bio_list_pop(&pd->iosched.read_queue);
+		spin_unlock(&pd->iosched.lock);
+
+		if (!bio)
+			continue;
+
+		if (bio_data_dir(bio) == READ)
+			pd->iosched.successive_reads += bio->bi_size >> 10;
+		else {
+			pd->iosched.successive_reads = 0;
+			pd->iosched.last_write = bio->bi_sector + bio_sectors(bio);
+		}
+		if (pd->iosched.successive_reads >= HI_SPEED_SWITCH) {
+			if (pd->read_speed == pd->write_speed) {
+				pd->read_speed = MAX_SPEED;
+				pkt_set_speed(pd, pd->write_speed, pd->read_speed);
+			}
+		} else {
+			if (pd->read_speed != pd->write_speed) {
+				pd->read_speed = pd->write_speed;
+				pkt_set_speed(pd, pd->write_speed, pd->read_speed);
+			}
+		}
+
+		atomic_inc(&pd->cdrw.pending_bios);
+		generic_make_request(bio);
+	}
+}
+
+/*
+ * Special care is needed if the underlying block device has a small
+ * max_phys_segments value.
+ */
+static int pkt_set_segment_merging(struct pktcdvd_device *pd, struct request_queue *q)
+{
+	if ((pd->settings.size << 9) / CD_FRAMESIZE
+	    <= queue_max_segments(q)) {
+		/*
+		 * The cdrom device can handle one segment/frame
+		 */
+		clear_bit(PACKET_MERGE_SEGS, &pd->flags);
+		return 0;
+	} else if ((pd->settings.size << 9) / PAGE_SIZE
+		   <= queue_max_segments(q)) {
+		/*
+		 * We can handle this case at the expense of some extra memory
+		 * copies during write operations
+		 */
+		set_bit(PACKET_MERGE_SEGS, &pd->flags);
+		return 0;
+	} else {
+		printk(DRIVER_NAME": cdrom max_phys_segments too small\n");
+		return -EIO;
+	}
+}
+
+/*
+ * Copy CD_FRAMESIZE bytes from src_bio into a destination page
+ */
+static void pkt_copy_bio_data(struct bio *src_bio, int seg, int offs, struct page *dst_page, int dst_offs)
+{
+	unsigned int copy_size = CD_FRAMESIZE;
+
+	while (copy_size > 0) {
+		struct bio_vec *src_bvl = bio_iovec_idx(src_bio, seg);
+		void *vfrom = kmap_atomic(src_bvl->bv_page, KM_USER0) +
+			src_bvl->bv_offset + offs;
+		void *vto = page_address(dst_page) + dst_offs;
+		int len = min_t(int, copy_size, src_bvl->bv_len - offs);
+
+		BUG_ON(len < 0);
+		memcpy(vto, vfrom, len);
+		kunmap_atomic(vfrom, KM_USER0);
+
+		seg++;
+		offs = 0;
+		dst_offs += len;
+		copy_size -= len;
+	}
+}
+
+/*
+ * Copy all data for this packet to pkt->pages[], so that
+ * a) The number of required segments for the write bio is minimized, which
+ *    is necessary for some scsi controllers.
+ * b) The data can be used as cache to avoid read requests if we receive a
+ *    new write request for the same zone.
+ */
+static void pkt_make_local_copy(struct packet_data *pkt, struct bio_vec *bvec)
+{
+	int f, p, offs;
+
+	/* Copy all data to pkt->pages[] */
+	p = 0;
+	offs = 0;
+	for (f = 0; f < pkt->frames; f++) {
+		if (bvec[f].bv_page != pkt->pages[p]) {
+			void *vfrom = kmap_atomic(bvec[f].bv_page, KM_USER0) + bvec[f].bv_offset;
+			void *vto = page_address(pkt->pages[p]) + offs;
+			memcpy(vto, vfrom, CD_FRAMESIZE);
+			kunmap_atomic(vfrom, KM_USER0);
+			bvec[f].bv_page = pkt->pages[p];
+			bvec[f].bv_offset = offs;
+		} else {
+			BUG_ON(bvec[f].bv_offset != offs);
+		}
+		offs += CD_FRAMESIZE;
+		if (offs >= PAGE_SIZE) {
+			offs = 0;
+			p++;
+		}
+	}
+}
+
+static void pkt_end_io_read(struct bio *bio, int err)
+{
+	struct packet_data *pkt = bio->bi_private;
+	struct pktcdvd_device *pd = pkt->pd;
+	BUG_ON(!pd);
+
+	VPRINTK("pkt_end_io_read: bio=%p sec0=%llx sec=%llx err=%d\n", bio,
+		(unsigned long long)pkt->sector, (unsigned long long)bio->bi_sector, err);
+
+	if (err)
+		atomic_inc(&pkt->io_errors);
+	if (atomic_dec_and_test(&pkt->io_wait)) {
+		atomic_inc(&pkt->run_sm);
+		wake_up(&pd->wqueue);
+	}
+	pkt_bio_finished(pd);
+}
+
+static void pkt_end_io_packet_write(struct bio *bio, int err)
+{
+	struct packet_data *pkt = bio->bi_private;
+	struct pktcdvd_device *pd = pkt->pd;
+	BUG_ON(!pd);
+
+	VPRINTK("pkt_end_io_packet_write: id=%d, err=%d\n", pkt->id, err);
+
+	pd->stats.pkt_ended++;
+
+	pkt_bio_finished(pd);
+	atomic_dec(&pkt->io_wait);
+	atomic_inc(&pkt->run_sm);
+	wake_up(&pd->wqueue);
+}
+
+/*
+ * Schedule reads for the holes in a packet
+ */
+static void pkt_gather_data(struct pktcdvd_device *pd, struct packet_data *pkt)
+{
+	int frames_read = 0;
+	struct bio *bio;
+	int f;
+	char written[PACKET_MAX_SIZE];
+
+	BUG_ON(bio_list_empty(&pkt->orig_bios));
+
+	atomic_set(&pkt->io_wait, 0);
+	atomic_set(&pkt->io_errors, 0);
+
+	/*
+	 * Figure out which frames we need to read before we can write.
+	 */
+	memset(written, 0, sizeof(written));
+	spin_lock(&pkt->lock);
+	bio_list_for_each(bio, &pkt->orig_bios) {
+		int first_frame = (bio->bi_sector - pkt->sector) / (CD_FRAMESIZE >> 9);
+		int num_frames = bio->bi_size / CD_FRAMESIZE;
+		pd->stats.secs_w += num_frames * (CD_FRAMESIZE >> 9);
+		BUG_ON(first_frame < 0);
+		BUG_ON(first_frame + num_frames > pkt->frames);
+		for (f = first_frame; f < first_frame + num_frames; f++)
+			written[f] = 1;
+	}
+	spin_unlock(&pkt->lock);
+
+	if (pkt->cache_valid) {
+		VPRINTK("pkt_gather_data: zone %llx cached\n",
+			(unsigned long long)pkt->sector);
+		goto out_account;
+	}
+
+	/*
+	 * Schedule reads for missing parts of the packet.
+	 */
+	for (f = 0; f < pkt->frames; f++) {
+		struct bio_vec *vec;
+
+		int p, offset;
+		if (written[f])
+			continue;
+		bio = pkt->r_bios[f];
+		vec = bio->bi_io_vec;
+		bio_init(bio);
+		bio->bi_max_vecs = 1;
+		bio->bi_sector = pkt->sector + f * (CD_FRAMESIZE >> 9);
+		bio->bi_bdev = pd->bdev;
+		bio->bi_end_io = pkt_end_io_read;
+		bio->bi_private = pkt;
+		bio->bi_io_vec = vec;
+		bio->bi_destructor = pkt_bio_destructor;
+
+		p = (f * CD_FRAMESIZE) / PAGE_SIZE;
+		offset = (f * CD_FRAMESIZE) % PAGE_SIZE;
+		VPRINTK("pkt_gather_data: Adding frame %d, page:%p offs:%d\n",
+			f, pkt->pages[p], offset);
+		if (!bio_add_page(bio, pkt->pages[p], CD_FRAMESIZE, offset))
+			BUG();
+
+		atomic_inc(&pkt->io_wait);
+		bio->bi_rw = READ;
+		pkt_queue_bio(pd, bio);
+		frames_read++;
+	}
+
+out_account:
+	VPRINTK("pkt_gather_data: need %d frames for zone %llx\n",
+		frames_read, (unsigned long long)pkt->sector);
+	pd->stats.pkt_started++;
+	pd->stats.secs_rg += frames_read * (CD_FRAMESIZE >> 9);
+}
+
+/*
+ * Find a packet matching zone, or the least recently used packet if
+ * there is no match.
+ */
+static struct packet_data *pkt_get_packet_data(struct pktcdvd_device *pd, int zone)
+{
+	struct packet_data *pkt;
+
+	list_for_each_entry(pkt, &pd->cdrw.pkt_free_list, list) {
+		if (pkt->sector == zone || pkt->list.next == &pd->cdrw.pkt_free_list) {
+			list_del_init(&pkt->list);
+			if (pkt->sector != zone)
+				pkt->cache_valid = 0;
+			return pkt;
+		}
+	}
+	BUG();
+	return NULL;
+}
+
+static void pkt_put_packet_data(struct pktcdvd_device *pd, struct packet_data *pkt)
+{
+	if (pkt->cache_valid) {
+		list_add(&pkt->list, &pd->cdrw.pkt_free_list);
+	} else {
+		list_add_tail(&pkt->list, &pd->cdrw.pkt_free_list);
+	}
+}
+
+/*
+ * recover a failed write, query for relocation if possible
+ *
+ * returns 1 if recovery is possible, or 0 if not
+ *
+ */
+static int pkt_start_recovery(struct packet_data *pkt)
+{
+	/*
+	 * FIXME. We need help from the file system to implement
+	 * recovery handling.
+	 */
+	return 0;
+#if 0
+	struct request *rq = pkt->rq;
+	struct pktcdvd_device *pd = rq->rq_disk->private_data;
+	struct block_device *pkt_bdev;
+	struct super_block *sb = NULL;
+	unsigned long old_block, new_block;
+	sector_t new_sector;
+
+	pkt_bdev = bdget(kdev_t_to_nr(pd->pkt_dev));
+	if (pkt_bdev) {
+		sb = get_super(pkt_bdev);
+		bdput(pkt_bdev);
+	}
+
+	if (!sb)
+		return 0;
+
+	if (!sb->s_op || !sb->s_op->relocate_blocks)
+		goto out;
+
+	old_block = pkt->sector / (CD_FRAMESIZE >> 9);
+	if (sb->s_op->relocate_blocks(sb, old_block, &new_block))
+		goto out;
+
+	new_sector = new_block * (CD_FRAMESIZE >> 9);
+	pkt->sector = new_sector;
+
+	pkt->bio->bi_sector = new_sector;
+	pkt->bio->bi_next = NULL;
+	pkt->bio->bi_flags = 1 << BIO_UPTODATE;
+	pkt->bio->bi_idx = 0;
+
+	BUG_ON(pkt->bio->bi_rw != REQ_WRITE);
+	BUG_ON(pkt->bio->bi_vcnt != pkt->frames);
+	BUG_ON(pkt->bio->bi_size != pkt->frames * CD_FRAMESIZE);
+	BUG_ON(pkt->bio->bi_end_io != pkt_end_io_packet_write);
+	BUG_ON(pkt->bio->bi_private != pkt);
+
+	drop_super(sb);
+	return 1;
+
+out:
+	drop_super(sb);
+	return 0;
+#endif
+}
+
+static inline void pkt_set_state(struct packet_data *pkt, enum packet_data_state state)
+{
+#if PACKET_DEBUG > 1
+	static const char *state_name[] = {
+		"IDLE", "WAITING", "READ_WAIT", "WRITE_WAIT", "RECOVERY", "FINISHED"
+	};
+	enum packet_data_state old_state = pkt->state;
+	VPRINTK("pkt %2d : s=%6llx %s -> %s\n", pkt->id, (unsigned long long)pkt->sector,
+		state_name[old_state], state_name[state]);
+#endif
+	pkt->state = state;
+}
+
+/*
+ * Scan the work queue to see if we can start a new packet.
+ * returns non-zero if any work was done.
+ */
+static int pkt_handle_queue(struct pktcdvd_device *pd)
+{
+	struct packet_data *pkt, *p;
+	struct bio *bio = NULL;
+	sector_t zone = 0; /* Suppress gcc warning */
+	struct pkt_rb_node *node, *first_node;
+	struct rb_node *n;
+	int wakeup;
+
+	VPRINTK("handle_queue\n");
+
+	atomic_set(&pd->scan_queue, 0);
+
+	if (list_empty(&pd->cdrw.pkt_free_list)) {
+		VPRINTK("handle_queue: no pkt\n");
+		return 0;
+	}
+
+	/*
+	 * Try to find a zone we are not already working on.
+	 */
+	spin_lock(&pd->lock);
+	first_node = pkt_rbtree_find(pd, pd->current_sector);
+	if (!first_node) {
+		n = rb_first(&pd->bio_queue);
+		if (n)
+			first_node = rb_entry(n, struct pkt_rb_node, rb_node);
+	}
+	node = first_node;
+	while (node) {
+		bio = node->bio;
+		zone = ZONE(bio->bi_sector, pd);
+		list_for_each_entry(p, &pd->cdrw.pkt_active_list, list) {
+			if (p->sector == zone) {
+				bio = NULL;
+				goto try_next_bio;
+			}
+		}
+		break;
+try_next_bio:
+		node = pkt_rbtree_next(node);
+		if (!node) {
+			n = rb_first(&pd->bio_queue);
+			if (n)
+				node = rb_entry(n, struct pkt_rb_node, rb_node);
+		}
+		if (node == first_node)
+			node = NULL;
+	}
+	spin_unlock(&pd->lock);
+	if (!bio) {
+		VPRINTK("handle_queue: no bio\n");
+		return 0;
+	}
+
+	pkt = pkt_get_packet_data(pd, zone);
+
+	pd->current_sector = zone + pd->settings.size;
+	pkt->sector = zone;
+	BUG_ON(pkt->frames != pd->settings.size >> 2);
+	pkt->write_size = 0;
+
+	/*
+	 * Scan work queue for bios in the same zone and link them
+	 * to this packet.
+	 */
+	spin_lock(&pd->lock);
+	VPRINTK("pkt_handle_queue: looking for zone %llx\n", (unsigned long long)zone);
+	while ((node = pkt_rbtree_find(pd, zone)) != NULL) {
+		bio = node->bio;
+		VPRINTK("pkt_handle_queue: found zone=%llx\n",
+			(unsigned long long)ZONE(bio->bi_sector, pd));
+		if (ZONE(bio->bi_sector, pd) != zone)
+			break;
+		pkt_rbtree_erase(pd, node);
+		spin_lock(&pkt->lock);
+		bio_list_add(&pkt->orig_bios, bio);
+		pkt->write_size += bio->bi_size / CD_FRAMESIZE;
+		spin_unlock(&pkt->lock);
+	}
+	/* check write congestion marks, and if bio_queue_size is
+	   below, wake up any waiters */
+	wakeup = (pd->write_congestion_on > 0
+	 		&& pd->bio_queue_size <= pd->write_congestion_off);
+	spin_unlock(&pd->lock);
+	if (wakeup) {
+		clear_bdi_congested(&pd->disk->queue->backing_dev_info,
+					BLK_RW_ASYNC);
+	}
+
+	pkt->sleep_time = max(PACKET_WAIT_TIME, 1);
+	pkt_set_state(pkt, PACKET_WAITING_STATE);
+	atomic_set(&pkt->run_sm, 1);
+
+	spin_lock(&pd->cdrw.active_list_lock);
+	list_add(&pkt->list, &pd->cdrw.pkt_active_list);
+	spin_unlock(&pd->cdrw.active_list_lock);
+
+	return 1;
+}
+
+/*
+ * Assemble a bio to write one packet and queue the bio for processing
+ * by the underlying block device.
+ */
+static void pkt_start_write(struct pktcdvd_device *pd, struct packet_data *pkt)
+{
+	struct bio *bio;
+	int f;
+	int frames_write;
+	struct bio_vec *bvec = pkt->w_bio->bi_io_vec;
+
+	for (f = 0; f < pkt->frames; f++) {
+		bvec[f].bv_page = pkt->pages[(f * CD_FRAMESIZE) / PAGE_SIZE];
+		bvec[f].bv_offset = (f * CD_FRAMESIZE) % PAGE_SIZE;
+	}
+
+	/*
+	 * Fill-in bvec with data from orig_bios.
+	 */
+	frames_write = 0;
+	spin_lock(&pkt->lock);
+	bio_list_for_each(bio, &pkt->orig_bios) {
+		int segment = bio->bi_idx;
+		int src_offs = 0;
+		int first_frame = (bio->bi_sector - pkt->sector) / (CD_FRAMESIZE >> 9);
+		int num_frames = bio->bi_size / CD_FRAMESIZE;
+		BUG_ON(first_frame < 0);
+		BUG_ON(first_frame + num_frames > pkt->frames);
+		for (f = first_frame; f < first_frame + num_frames; f++) {
+			struct bio_vec *src_bvl = bio_iovec_idx(bio, segment);
+
+			while (src_offs >= src_bvl->bv_len) {
+				src_offs -= src_bvl->bv_len;
+				segment++;
+				BUG_ON(segment >= bio->bi_vcnt);
+				src_bvl = bio_iovec_idx(bio, segment);
+			}
+
+			if (src_bvl->bv_len - src_offs >= CD_FRAMESIZE) {
+				bvec[f].bv_page = src_bvl->bv_page;
+				bvec[f].bv_offset = src_bvl->bv_offset + src_offs;
+			} else {
+				pkt_copy_bio_data(bio, segment, src_offs,
+						  bvec[f].bv_page, bvec[f].bv_offset);
+			}
+			src_offs += CD_FRAMESIZE;
+			frames_write++;
+		}
+	}
+	pkt_set_state(pkt, PACKET_WRITE_WAIT_STATE);
+	spin_unlock(&pkt->lock);
+
+	VPRINTK("pkt_start_write: Writing %d frames for zone %llx\n",
+		frames_write, (unsigned long long)pkt->sector);
+	BUG_ON(frames_write != pkt->write_size);
+
+	if (test_bit(PACKET_MERGE_SEGS, &pd->flags) || (pkt->write_size < pkt->frames)) {
+		pkt_make_local_copy(pkt, bvec);
+		pkt->cache_valid = 1;
+	} else {
+		pkt->cache_valid = 0;
+	}
+
+	/* Start the write request */
+	bio_init(pkt->w_bio);
+	pkt->w_bio->bi_max_vecs = PACKET_MAX_SIZE;
+	pkt->w_bio->bi_sector = pkt->sector;
+	pkt->w_bio->bi_bdev = pd->bdev;
+	pkt->w_bio->bi_end_io = pkt_end_io_packet_write;
+	pkt->w_bio->bi_private = pkt;
+	pkt->w_bio->bi_io_vec = bvec;
+	pkt->w_bio->bi_destructor = pkt_bio_destructor;
+	for (f = 0; f < pkt->frames; f++)
+		if (!bio_add_page(pkt->w_bio, bvec[f].bv_page, CD_FRAMESIZE, bvec[f].bv_offset))
+			BUG();
+	VPRINTK(DRIVER_NAME": vcnt=%d\n", pkt->w_bio->bi_vcnt);
+
+	atomic_set(&pkt->io_wait, 1);
+	pkt->w_bio->bi_rw = WRITE;
+	pkt_queue_bio(pd, pkt->w_bio);
+}
+
+static void pkt_finish_packet(struct packet_data *pkt, int uptodate)
+{
+	struct bio *bio;
+
+	if (!uptodate)
+		pkt->cache_valid = 0;
+
+	/* Finish all bios corresponding to this packet */
+	while ((bio = bio_list_pop(&pkt->orig_bios)))
+		bio_endio(bio, uptodate ? 0 : -EIO);
+}
+
+static void pkt_run_state_machine(struct pktcdvd_device *pd, struct packet_data *pkt)
+{
+	int uptodate;
+
+	VPRINTK("run_state_machine: pkt %d\n", pkt->id);
+
+	for (;;) {
+		switch (pkt->state) {
+		case PACKET_WAITING_STATE:
+			if ((pkt->write_size < pkt->frames) && (pkt->sleep_time > 0))
+				return;
+
+			pkt->sleep_time = 0;
+			pkt_gather_data(pd, pkt);
+			pkt_set_state(pkt, PACKET_READ_WAIT_STATE);
+			break;
+
+		case PACKET_READ_WAIT_STATE:
+			if (atomic_read(&pkt->io_wait) > 0)
+				return;
+
+			if (atomic_read(&pkt->io_errors) > 0) {
+				pkt_set_state(pkt, PACKET_RECOVERY_STATE);
+			} else {
+				pkt_start_write(pd, pkt);
+			}
+			break;
+
+		case PACKET_WRITE_WAIT_STATE:
+			if (atomic_read(&pkt->io_wait) > 0)
+				return;
+
+			if (test_bit(BIO_UPTODATE, &pkt->w_bio->bi_flags)) {
+				pkt_set_state(pkt, PACKET_FINISHED_STATE);
+			} else {
+				pkt_set_state(pkt, PACKET_RECOVERY_STATE);
+			}
+			break;
+
+		case PACKET_RECOVERY_STATE:
+			if (pkt_start_recovery(pkt)) {
+				pkt_start_write(pd, pkt);
+			} else {
+				VPRINTK("No recovery possible\n");
+				pkt_set_state(pkt, PACKET_FINISHED_STATE);
+			}
+			break;
+
+		case PACKET_FINISHED_STATE:
+			uptodate = test_bit(BIO_UPTODATE, &pkt->w_bio->bi_flags);
+			pkt_finish_packet(pkt, uptodate);
+			return;
+
+		default:
+			BUG();
+			break;
+		}
+	}
+}
+
+static void pkt_handle_packets(struct pktcdvd_device *pd)
+{
+	struct packet_data *pkt, *next;
+
+	VPRINTK("pkt_handle_packets\n");
+
+	/*
+	 * Run state machine for active packets
+	 */
+	list_for_each_entry(pkt, &pd->cdrw.pkt_active_list, list) {
+		if (atomic_read(&pkt->run_sm) > 0) {
+			atomic_set(&pkt->run_sm, 0);
+			pkt_run_state_machine(pd, pkt);
+		}
+	}
+
+	/*
+	 * Move no longer active packets to the free list
+	 */
+	spin_lock(&pd->cdrw.active_list_lock);
+	list_for_each_entry_safe(pkt, next, &pd->cdrw.pkt_active_list, list) {
+		if (pkt->state == PACKET_FINISHED_STATE) {
+			list_del(&pkt->list);
+			pkt_put_packet_data(pd, pkt);
+			pkt_set_state(pkt, PACKET_IDLE_STATE);
+			atomic_set(&pd->scan_queue, 1);
+		}
+	}
+	spin_unlock(&pd->cdrw.active_list_lock);
+}
+
+static void pkt_count_states(struct pktcdvd_device *pd, int *states)
+{
+	struct packet_data *pkt;
+	int i;
+
+	for (i = 0; i < PACKET_NUM_STATES; i++)
+		states[i] = 0;
+
+	spin_lock(&pd->cdrw.active_list_lock);
+	list_for_each_entry(pkt, &pd->cdrw.pkt_active_list, list) {
+		states[pkt->state]++;
+	}
+	spin_unlock(&pd->cdrw.active_list_lock);
+}
+
+/*
+ * kcdrwd is woken up when writes have been queued for one of our
+ * registered devices
+ */
+static int kcdrwd(void *foobar)
+{
+	struct pktcdvd_device *pd = foobar;
+	struct packet_data *pkt;
+	long min_sleep_time, residue;
+
+	set_user_nice(current, -20);
+	set_freezable();
+
+	for (;;) {
+		DECLARE_WAITQUEUE(wait, current);
+
+		/*
+		 * Wait until there is something to do
+		 */
+		add_wait_queue(&pd->wqueue, &wait);
+		for (;;) {
+			set_current_state(TASK_INTERRUPTIBLE);
+
+			/* Check if we need to run pkt_handle_queue */
+			if (atomic_read(&pd->scan_queue) > 0)
+				goto work_to_do;
+
+			/* Check if we need to run the state machine for some packet */
+			list_for_each_entry(pkt, &pd->cdrw.pkt_active_list, list) {
+				if (atomic_read(&pkt->run_sm) > 0)
+					goto work_to_do;
+			}
+
+			/* Check if we need to process the iosched queues */
+			if (atomic_read(&pd->iosched.attention) != 0)
+				goto work_to_do;
+
+			/* Otherwise, go to sleep */
+			if (PACKET_DEBUG > 1) {
+				int states[PACKET_NUM_STATES];
+				pkt_count_states(pd, states);
+				VPRINTK("kcdrwd: i:%d ow:%d rw:%d ww:%d rec:%d fin:%d\n",
+					states[0], states[1], states[2], states[3],
+					states[4], states[5]);
+			}
+
+			min_sleep_time = MAX_SCHEDULE_TIMEOUT;
+			list_for_each_entry(pkt, &pd->cdrw.pkt_active_list, list) {
+				if (pkt->sleep_time && pkt->sleep_time < min_sleep_time)
+					min_sleep_time = pkt->sleep_time;
+			}
+
+			VPRINTK("kcdrwd: sleeping\n");
+			residue = schedule_timeout(min_sleep_time);
+			VPRINTK("kcdrwd: wake up\n");
+
+			/* make swsusp happy with our thread */
+			try_to_freeze();
+
+			list_for_each_entry(pkt, &pd->cdrw.pkt_active_list, list) {
+				if (!pkt->sleep_time)
+					continue;
+				pkt->sleep_time -= min_sleep_time - residue;
+				if (pkt->sleep_time <= 0) {
+					pkt->sleep_time = 0;
+					atomic_inc(&pkt->run_sm);
+				}
+			}
+
+			if (kthread_should_stop())
+				break;
+		}
+work_to_do:
+		set_current_state(TASK_RUNNING);
+		remove_wait_queue(&pd->wqueue, &wait);
+
+		if (kthread_should_stop())
+			break;
+
+		/*
+		 * if pkt_handle_queue returns true, we can queue
+		 * another request.
+		 */
+		while (pkt_handle_queue(pd))
+			;
+
+		/*
+		 * Handle packet state machine
+		 */
+		pkt_handle_packets(pd);
+
+		/*
+		 * Handle iosched queues
+		 */
+		pkt_iosched_process_queue(pd);
+	}
+
+	return 0;
+}
+
+static void pkt_print_settings(struct pktcdvd_device *pd)
+{
+	printk(DRIVER_NAME": %s packets, ", pd->settings.fp ? "Fixed" : "Variable");
+	printk("%u blocks, ", pd->settings.size >> 2);
+	printk("Mode-%c disc\n", pd->settings.block_mode == 8 ? '1' : '2');
+}
+
+static int pkt_mode_sense(struct pktcdvd_device *pd, struct packet_command *cgc, int page_code, int page_control)
+{
+	memset(cgc->cmd, 0, sizeof(cgc->cmd));
+
+	cgc->cmd[0] = GPCMD_MODE_SENSE_10;
+	cgc->cmd[2] = page_code | (page_control << 6);
+	cgc->cmd[7] = cgc->buflen >> 8;
+	cgc->cmd[8] = cgc->buflen & 0xff;
+	cgc->data_direction = CGC_DATA_READ;
+	return pkt_generic_packet(pd, cgc);
+}
+
+static int pkt_mode_select(struct pktcdvd_device *pd, struct packet_command *cgc)
+{
+	memset(cgc->cmd, 0, sizeof(cgc->cmd));
+	memset(cgc->buffer, 0, 2);
+	cgc->cmd[0] = GPCMD_MODE_SELECT_10;
+	cgc->cmd[1] = 0x10;		/* PF */
+	cgc->cmd[7] = cgc->buflen >> 8;
+	cgc->cmd[8] = cgc->buflen & 0xff;
+	cgc->data_direction = CGC_DATA_WRITE;
+	return pkt_generic_packet(pd, cgc);
+}
+
+static int pkt_get_disc_info(struct pktcdvd_device *pd, disc_information *di)
+{
+	struct packet_command cgc;
+	int ret;
+
+	/* set up command and get the disc info */
+	init_cdrom_command(&cgc, di, sizeof(*di), CGC_DATA_READ);
+	cgc.cmd[0] = GPCMD_READ_DISC_INFO;
+	cgc.cmd[8] = cgc.buflen = 2;
+	cgc.quiet = 1;
+
+	if ((ret = pkt_generic_packet(pd, &cgc)))
+		return ret;
+
+	/* not all drives have the same disc_info length, so requeue
+	 * packet with the length the drive tells us it can supply
+	 */
+	cgc.buflen = be16_to_cpu(di->disc_information_length) +
+		     sizeof(di->disc_information_length);
+
+	if (cgc.buflen > sizeof(disc_information))
+		cgc.buflen = sizeof(disc_information);
+
+	cgc.cmd[8] = cgc.buflen;
+	return pkt_generic_packet(pd, &cgc);
+}
+
+static int pkt_get_track_info(struct pktcdvd_device *pd, __u16 track, __u8 type, track_information *ti)
+{
+	struct packet_command cgc;
+	int ret;
+
+	init_cdrom_command(&cgc, ti, 8, CGC_DATA_READ);
+	cgc.cmd[0] = GPCMD_READ_TRACK_RZONE_INFO;
+	cgc.cmd[1] = type & 3;
+	cgc.cmd[4] = (track & 0xff00) >> 8;
+	cgc.cmd[5] = track & 0xff;
+	cgc.cmd[8] = 8;
+	cgc.quiet = 1;
+
+	if ((ret = pkt_generic_packet(pd, &cgc)))
+		return ret;
+
+	cgc.buflen = be16_to_cpu(ti->track_information_length) +
+		     sizeof(ti->track_information_length);
+
+	if (cgc.buflen > sizeof(track_information))
+		cgc.buflen = sizeof(track_information);
+
+	cgc.cmd[8] = cgc.buflen;
+	return pkt_generic_packet(pd, &cgc);
+}
+
+static noinline_for_stack int pkt_get_last_written(struct pktcdvd_device *pd,
+						long *last_written)
+{
+	disc_information di;
+	track_information ti;
+	__u32 last_track;
+	int ret = -1;
+
+	if ((ret = pkt_get_disc_info(pd, &di)))
+		return ret;
+
+	last_track = (di.last_track_msb << 8) | di.last_track_lsb;
+	if ((ret = pkt_get_track_info(pd, last_track, 1, &ti)))
+		return ret;
+
+	/* if this track is blank, try the previous. */
+	if (ti.blank) {
+		last_track--;
+		if ((ret = pkt_get_track_info(pd, last_track, 1, &ti)))
+			return ret;
+	}
+
+	/* if last recorded field is valid, return it. */
+	if (ti.lra_v) {
+		*last_written = be32_to_cpu(ti.last_rec_address);
+	} else {
+		/* make it up instead */
+		*last_written = be32_to_cpu(ti.track_start) +
+				be32_to_cpu(ti.track_size);
+		if (ti.free_blocks)
+			*last_written -= (be32_to_cpu(ti.free_blocks) + 7);
+	}
+	return 0;
+}
+
+/*
+ * write mode select package based on pd->settings
+ */
+static noinline_for_stack int pkt_set_write_settings(struct pktcdvd_device *pd)
+{
+	struct packet_command cgc;
+	struct request_sense sense;
+	write_param_page *wp;
+	char buffer[128];
+	int ret, size;
+
+	/* doesn't apply to DVD+RW or DVD-RAM */
+	if ((pd->mmc3_profile == 0x1a) || (pd->mmc3_profile == 0x12))
+		return 0;
+
+	memset(buffer, 0, sizeof(buffer));
+	init_cdrom_command(&cgc, buffer, sizeof(*wp), CGC_DATA_READ);
+	cgc.sense = &sense;
+	if ((ret = pkt_mode_sense(pd, &cgc, GPMODE_WRITE_PARMS_PAGE, 0))) {
+		pkt_dump_sense(&cgc);
+		return ret;
+	}
+
+	size = 2 + ((buffer[0] << 8) | (buffer[1] & 0xff));
+	pd->mode_offset = (buffer[6] << 8) | (buffer[7] & 0xff);
+	if (size > sizeof(buffer))
+		size = sizeof(buffer);
+
+	/*
+	 * now get it all
+	 */
+	init_cdrom_command(&cgc, buffer, size, CGC_DATA_READ);
+	cgc.sense = &sense;
+	if ((ret = pkt_mode_sense(pd, &cgc, GPMODE_WRITE_PARMS_PAGE, 0))) {
+		pkt_dump_sense(&cgc);
+		return ret;
+	}
+
+	/*
+	 * write page is offset header + block descriptor length
+	 */
+	wp = (write_param_page *) &buffer[sizeof(struct mode_page_header) + pd->mode_offset];
+
+	wp->fp = pd->settings.fp;
+	wp->track_mode = pd->settings.track_mode;
+	wp->write_type = pd->settings.write_type;
+	wp->data_block_type = pd->settings.block_mode;
+
+	wp->multi_session = 0;
+
+#ifdef PACKET_USE_LS
+	wp->link_size = 7;
+	wp->ls_v = 1;
+#endif
+
+	if (wp->data_block_type == PACKET_BLOCK_MODE1) {
+		wp->session_format = 0;
+		wp->subhdr2 = 0x20;
+	} else if (wp->data_block_type == PACKET_BLOCK_MODE2) {
+		wp->session_format = 0x20;
+		wp->subhdr2 = 8;
+#if 0
+		wp->mcn[0] = 0x80;
+		memcpy(&wp->mcn[1], PACKET_MCN, sizeof(wp->mcn) - 1);
+#endif
+	} else {
+		/*
+		 * paranoia
+		 */
+		printk(DRIVER_NAME": write mode wrong %d\n", wp->data_block_type);
+		return 1;
+	}
+	wp->packet_size = cpu_to_be32(pd->settings.size >> 2);
+
+	cgc.buflen = cgc.cmd[8] = size;
+	if ((ret = pkt_mode_select(pd, &cgc))) {
+		pkt_dump_sense(&cgc);
+		return ret;
+	}
+
+	pkt_print_settings(pd);
+	return 0;
+}
+
+/*
+ * 1 -- we can write to this track, 0 -- we can't
+ */
+static int pkt_writable_track(struct pktcdvd_device *pd, track_information *ti)
+{
+	switch (pd->mmc3_profile) {
+		case 0x1a: /* DVD+RW */
+		case 0x12: /* DVD-RAM */
+			/* The track is always writable on DVD+RW/DVD-RAM */
+			return 1;
+		default:
+			break;
+	}
+
+	if (!ti->packet || !ti->fp)
+		return 0;
+
+	/*
+	 * "good" settings as per Mt Fuji.
+	 */
+	if (ti->rt == 0 && ti->blank == 0)
+		return 1;
+
+	if (ti->rt == 0 && ti->blank == 1)
+		return 1;
+
+	if (ti->rt == 1 && ti->blank == 0)
+		return 1;
+
+	printk(DRIVER_NAME": bad state %d-%d-%d\n", ti->rt, ti->blank, ti->packet);
+	return 0;
+}
+
+/*
+ * 1 -- we can write to this disc, 0 -- we can't
+ */
+static int pkt_writable_disc(struct pktcdvd_device *pd, disc_information *di)
+{
+	switch (pd->mmc3_profile) {
+		case 0x0a: /* CD-RW */
+		case 0xffff: /* MMC3 not supported */
+			break;
+		case 0x1a: /* DVD+RW */
+		case 0x13: /* DVD-RW */
+		case 0x12: /* DVD-RAM */
+			return 1;
+		default:
+			VPRINTK(DRIVER_NAME": Wrong disc profile (%x)\n", pd->mmc3_profile);
+			return 0;
+	}
+
+	/*
+	 * for disc type 0xff we should probably reserve a new track.
+	 * but i'm not sure, should we leave this to user apps? probably.
+	 */
+	if (di->disc_type == 0xff) {
+		printk(DRIVER_NAME": Unknown disc. No track?\n");
+		return 0;
+	}
+
+	if (di->disc_type != 0x20 && di->disc_type != 0) {
+		printk(DRIVER_NAME": Wrong disc type (%x)\n", di->disc_type);
+		return 0;
+	}
+
+	if (di->erasable == 0) {
+		printk(DRIVER_NAME": Disc not erasable\n");
+		return 0;
+	}
+
+	if (di->border_status == PACKET_SESSION_RESERVED) {
+		printk(DRIVER_NAME": Can't write to last track (reserved)\n");
+		return 0;
+	}
+
+	return 1;
+}
+
+static noinline_for_stack int pkt_probe_settings(struct pktcdvd_device *pd)
+{
+	struct packet_command cgc;
+	unsigned char buf[12];
+	disc_information di;
+	track_information ti;
+	int ret, track;
+
+	init_cdrom_command(&cgc, buf, sizeof(buf), CGC_DATA_READ);
+	cgc.cmd[0] = GPCMD_GET_CONFIGURATION;
+	cgc.cmd[8] = 8;
+	ret = pkt_generic_packet(pd, &cgc);
+	pd->mmc3_profile = ret ? 0xffff : buf[6] << 8 | buf[7];
+
+	memset(&di, 0, sizeof(disc_information));
+	memset(&ti, 0, sizeof(track_information));
+
+	if ((ret = pkt_get_disc_info(pd, &di))) {
+		printk("failed get_disc\n");
+		return ret;
+	}
+
+	if (!pkt_writable_disc(pd, &di))
+		return -EROFS;
+
+	pd->type = di.erasable ? PACKET_CDRW : PACKET_CDR;
+
+	track = 1; /* (di.last_track_msb << 8) | di.last_track_lsb; */
+	if ((ret = pkt_get_track_info(pd, track, 1, &ti))) {
+		printk(DRIVER_NAME": failed get_track\n");
+		return ret;
+	}
+
+	if (!pkt_writable_track(pd, &ti)) {
+		printk(DRIVER_NAME": can't write to this track\n");
+		return -EROFS;
+	}
+
+	/*
+	 * we keep packet size in 512 byte units, makes it easier to
+	 * deal with request calculations.
+	 */
+	pd->settings.size = be32_to_cpu(ti.fixed_packet_size) << 2;
+	if (pd->settings.size == 0) {
+		printk(DRIVER_NAME": detected zero packet size!\n");
+		return -ENXIO;
+	}
+	if (pd->settings.size > PACKET_MAX_SECTORS) {
+		printk(DRIVER_NAME": packet size is too big\n");
+		return -EROFS;
+	}
+	pd->settings.fp = ti.fp;
+	pd->offset = (be32_to_cpu(ti.track_start) << 2) & (pd->settings.size - 1);
+
+	if (ti.nwa_v) {
+		pd->nwa = be32_to_cpu(ti.next_writable);
+		set_bit(PACKET_NWA_VALID, &pd->flags);
+	}
+
+	/*
+	 * in theory we could use lra on -RW media as well and just zero
+	 * blocks that haven't been written yet, but in practice that
+	 * is just a no-go. we'll use that for -R, naturally.
+	 */
+	if (ti.lra_v) {
+		pd->lra = be32_to_cpu(ti.last_rec_address);
+		set_bit(PACKET_LRA_VALID, &pd->flags);
+	} else {
+		pd->lra = 0xffffffff;
+		set_bit(PACKET_LRA_VALID, &pd->flags);
+	}
+
+	/*
+	 * fine for now
+	 */
+	pd->settings.link_loss = 7;
+	pd->settings.write_type = 0;	/* packet */
+	pd->settings.track_mode = ti.track_mode;
+
+	/*
+	 * mode1 or mode2 disc
+	 */
+	switch (ti.data_mode) {
+		case PACKET_MODE1:
+			pd->settings.block_mode = PACKET_BLOCK_MODE1;
+			break;
+		case PACKET_MODE2:
+			pd->settings.block_mode = PACKET_BLOCK_MODE2;
+			break;
+		default:
+			printk(DRIVER_NAME": unknown data mode\n");
+			return -EROFS;
+	}
+	return 0;
+}
+
+/*
+ * enable/disable write caching on drive
+ */
+static noinline_for_stack int pkt_write_caching(struct pktcdvd_device *pd,
+						int set)
+{
+	struct packet_command cgc;
+	struct request_sense sense;
+	unsigned char buf[64];
+	int ret;
+
+	init_cdrom_command(&cgc, buf, sizeof(buf), CGC_DATA_READ);
+	cgc.sense = &sense;
+	cgc.buflen = pd->mode_offset + 12;
+
+	/*
+	 * caching mode page might not be there, so quiet this command
+	 */
+	cgc.quiet = 1;
+
+	if ((ret = pkt_mode_sense(pd, &cgc, GPMODE_WCACHING_PAGE, 0)))
+		return ret;
+
+	buf[pd->mode_offset + 10] |= (!!set << 2);
+
+	cgc.buflen = cgc.cmd[8] = 2 + ((buf[0] << 8) | (buf[1] & 0xff));
+	ret = pkt_mode_select(pd, &cgc);
+	if (ret) {
+		printk(DRIVER_NAME": write caching control failed\n");
+		pkt_dump_sense(&cgc);
+	} else if (!ret && set)
+		printk(DRIVER_NAME": enabled write caching on %s\n", pd->name);
+	return ret;
+}
+
+static int pkt_lock_door(struct pktcdvd_device *pd, int lockflag)
+{
+	struct packet_command cgc;
+
+	init_cdrom_command(&cgc, NULL, 0, CGC_DATA_NONE);
+	cgc.cmd[0] = GPCMD_PREVENT_ALLOW_MEDIUM_REMOVAL;
+	cgc.cmd[4] = lockflag ? 1 : 0;
+	return pkt_generic_packet(pd, &cgc);
+}
+
+/*
+ * Returns drive maximum write speed
+ */
+static noinline_for_stack int pkt_get_max_speed(struct pktcdvd_device *pd,
+						unsigned *write_speed)
+{
+	struct packet_command cgc;
+	struct request_sense sense;
+	unsigned char buf[256+18];
+	unsigned char *cap_buf;
+	int ret, offset;
+
+	cap_buf = &buf[sizeof(struct mode_page_header) + pd->mode_offset];
+	init_cdrom_command(&cgc, buf, sizeof(buf), CGC_DATA_UNKNOWN);
+	cgc.sense = &sense;
+
+	ret = pkt_mode_sense(pd, &cgc, GPMODE_CAPABILITIES_PAGE, 0);
+	if (ret) {
+		cgc.buflen = pd->mode_offset + cap_buf[1] + 2 +
+			     sizeof(struct mode_page_header);
+		ret = pkt_mode_sense(pd, &cgc, GPMODE_CAPABILITIES_PAGE, 0);
+		if (ret) {
+			pkt_dump_sense(&cgc);
+			return ret;
+		}
+	}
+
+	offset = 20;			    /* Obsoleted field, used by older drives */
+	if (cap_buf[1] >= 28)
+		offset = 28;		    /* Current write speed selected */
+	if (cap_buf[1] >= 30) {
+		/* If the drive reports at least one "Logical Unit Write
+		 * Speed Performance Descriptor Block", use the information
+		 * in the first block. (contains the highest speed)
+		 */
+		int num_spdb = (cap_buf[30] << 8) + cap_buf[31];
+		if (num_spdb > 0)
+			offset = 34;
+	}
+
+	*write_speed = (cap_buf[offset] << 8) | cap_buf[offset + 1];
+	return 0;
+}
+
+/* These tables from cdrecord - I don't have orange book */
+/* standard speed CD-RW (1-4x) */
+static char clv_to_speed[16] = {
+	/* 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 */
+	   0, 2, 4, 6, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+};
+/* high speed CD-RW (-10x) */
+static char hs_clv_to_speed[16] = {
+	/* 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 */
+	   0, 2, 4, 6, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+};
+/* ultra high speed CD-RW */
+static char us_clv_to_speed[16] = {
+	/* 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 */
+	   0, 2, 4, 8, 0, 0,16, 0,24,32,40,48, 0, 0, 0, 0
+};
+
+/*
+ * reads the maximum media speed from ATIP
+ */
+static noinline_for_stack int pkt_media_speed(struct pktcdvd_device *pd,
+						unsigned *speed)
+{
+	struct packet_command cgc;
+	struct request_sense sense;
+	unsigned char buf[64];
+	unsigned int size, st, sp;
+	int ret;
+
+	init_cdrom_command(&cgc, buf, 2, CGC_DATA_READ);
+	cgc.sense = &sense;
+	cgc.cmd[0] = GPCMD_READ_TOC_PMA_ATIP;
+	cgc.cmd[1] = 2;
+	cgc.cmd[2] = 4; /* READ ATIP */
+	cgc.cmd[8] = 2;
+	ret = pkt_generic_packet(pd, &cgc);
+	if (ret) {
+		pkt_dump_sense(&cgc);
+		return ret;
+	}
+	size = ((unsigned int) buf[0]<<8) + buf[1] + 2;
+	if (size > sizeof(buf))
+		size = sizeof(buf);
+
+	init_cdrom_command(&cgc, buf, size, CGC_DATA_READ);
+	cgc.sense = &sense;
+	cgc.cmd[0] = GPCMD_READ_TOC_PMA_ATIP;
+	cgc.cmd[1] = 2;
+	cgc.cmd[2] = 4;
+	cgc.cmd[8] = size;
+	ret = pkt_generic_packet(pd, &cgc);
+	if (ret) {
+		pkt_dump_sense(&cgc);
+		return ret;
+	}
+
+	if (!(buf[6] & 0x40)) {
+		printk(DRIVER_NAME": Disc type is not CD-RW\n");
+		return 1;
+	}
+	if (!(buf[6] & 0x4)) {
+		printk(DRIVER_NAME": A1 values on media are not valid, maybe not CDRW?\n");
+		return 1;
+	}
+
+	st = (buf[6] >> 3) & 0x7; /* disc sub-type */
+
+	sp = buf[16] & 0xf; /* max speed from ATIP A1 field */
+
+	/* Info from cdrecord */
+	switch (st) {
+		case 0: /* standard speed */
+			*speed = clv_to_speed[sp];
+			break;
+		case 1: /* high speed */
+			*speed = hs_clv_to_speed[sp];
+			break;
+		case 2: /* ultra high speed */
+			*speed = us_clv_to_speed[sp];
+			break;
+		default:
+			printk(DRIVER_NAME": Unknown disc sub-type %d\n",st);
+			return 1;
+	}
+	if (*speed) {
+		printk(DRIVER_NAME": Max. media speed: %d\n",*speed);
+		return 0;
+	} else {
+		printk(DRIVER_NAME": Unknown speed %d for sub-type %d\n",sp,st);
+		return 1;
+	}
+}
+
+static noinline_for_stack int pkt_perform_opc(struct pktcdvd_device *pd)
+{
+	struct packet_command cgc;
+	struct request_sense sense;
+	int ret;
+
+	VPRINTK(DRIVER_NAME": Performing OPC\n");
+
+	init_cdrom_command(&cgc, NULL, 0, CGC_DATA_NONE);
+	cgc.sense = &sense;
+	cgc.timeout = 60*HZ;
+	cgc.cmd[0] = GPCMD_SEND_OPC;
+	cgc.cmd[1] = 1;
+	if ((ret = pkt_generic_packet(pd, &cgc)))
+		pkt_dump_sense(&cgc);
+	return ret;
+}
+
+static int pkt_open_write(struct pktcdvd_device *pd)
+{
+	int ret;
+	unsigned int write_speed, media_write_speed, read_speed;
+
+	if ((ret = pkt_probe_settings(pd))) {
+		VPRINTK(DRIVER_NAME": %s failed probe\n", pd->name);
+		return ret;
+	}
+
+	if ((ret = pkt_set_write_settings(pd))) {
+		DPRINTK(DRIVER_NAME": %s failed saving write settings\n", pd->name);
+		return -EIO;
+	}
+
+	pkt_write_caching(pd, USE_WCACHING);
+
+	if ((ret = pkt_get_max_speed(pd, &write_speed)))
+		write_speed = 16 * 177;
+	switch (pd->mmc3_profile) {
+		case 0x13: /* DVD-RW */
+		case 0x1a: /* DVD+RW */
+		case 0x12: /* DVD-RAM */
+			DPRINTK(DRIVER_NAME": write speed %ukB/s\n", write_speed);
+			break;
+		default:
+			if ((ret = pkt_media_speed(pd, &media_write_speed)))
+				media_write_speed = 16;
+			write_speed = min(write_speed, media_write_speed * 177);
+			DPRINTK(DRIVER_NAME": write speed %ux\n", write_speed / 176);
+			break;
+	}
+	read_speed = write_speed;
+
+	if ((ret = pkt_set_speed(pd, write_speed, read_speed))) {
+		DPRINTK(DRIVER_NAME": %s couldn't set write speed\n", pd->name);
+		return -EIO;
+	}
+	pd->write_speed = write_speed;
+	pd->read_speed = read_speed;
+
+	if ((ret = pkt_perform_opc(pd))) {
+		DPRINTK(DRIVER_NAME": %s Optimum Power Calibration failed\n", pd->name);
+	}
+
+	return 0;
+}
+
+/*
+ * called at open time.
+ */
+static int pkt_open_dev(struct pktcdvd_device *pd, fmode_t write)
+{
+	int ret;
+	long lba;
+	struct request_queue *q;
+
+	/*
+	 * We need to re-open the cdrom device without O_NONBLOCK to be able
+	 * to read/write from/to it. It is already opened in O_NONBLOCK mode
+	 * so bdget() can't fail.
+	 */
+	bdget(pd->bdev->bd_dev);
+	if ((ret = blkdev_get(pd->bdev, FMODE_READ | FMODE_EXCL, pd)))
+		goto out;
+
+	if ((ret = pkt_get_last_written(pd, &lba))) {
+		printk(DRIVER_NAME": pkt_get_last_written failed\n");
+		goto out_putdev;
+	}
+
+	set_capacity(pd->disk, lba << 2);
+	set_capacity(pd->bdev->bd_disk, lba << 2);
+	bd_set_size(pd->bdev, (loff_t)lba << 11);
+
+	q = bdev_get_queue(pd->bdev);
+	if (write) {
+		if ((ret = pkt_open_write(pd)))
+			goto out_putdev;
+		/*
+		 * Some CDRW drives can not handle writes larger than one packet,
+		 * even if the size is a multiple of the packet size.
+		 */
+		spin_lock_irq(q->queue_lock);
+		blk_queue_max_hw_sectors(q, pd->settings.size);
+		spin_unlock_irq(q->queue_lock);
+		set_bit(PACKET_WRITABLE, &pd->flags);
+	} else {
+		pkt_set_speed(pd, MAX_SPEED, MAX_SPEED);
+		clear_bit(PACKET_WRITABLE, &pd->flags);
+	}
+
+	if ((ret = pkt_set_segment_merging(pd, q)))
+		goto out_putdev;
+
+	if (write) {
+		if (!pkt_grow_pktlist(pd, CONFIG_CDROM_PKTCDVD_BUFFERS)) {
+			printk(DRIVER_NAME": not enough memory for buffers\n");
+			ret = -ENOMEM;
+			goto out_putdev;
+		}
+		printk(DRIVER_NAME": %lukB available on disc\n", lba << 1);
+	}
+
+	return 0;
+
+out_putdev:
+	blkdev_put(pd->bdev, FMODE_READ | FMODE_EXCL);
+out:
+	return ret;
+}
+
+/*
+ * called when the device is closed. makes sure that the device flushes
+ * the internal cache before we close.
+ */
+static void pkt_release_dev(struct pktcdvd_device *pd, int flush)
+{
+	if (flush && pkt_flush_cache(pd))
+		DPRINTK(DRIVER_NAME": %s not flushing cache\n", pd->name);
+
+	pkt_lock_door(pd, 0);
+
+	pkt_set_speed(pd, MAX_SPEED, MAX_SPEED);
+	blkdev_put(pd->bdev, FMODE_READ | FMODE_EXCL);
+
+	pkt_shrink_pktlist(pd);
+}
+
+static struct pktcdvd_device *pkt_find_dev_from_minor(unsigned int dev_minor)
+{
+	if (dev_minor >= MAX_WRITERS)
+		return NULL;
+	return pkt_devs[dev_minor];
+}
+
+static int pkt_open(struct block_device *bdev, fmode_t mode)
+{
+	struct pktcdvd_device *pd = NULL;
+	int ret;
+
+	VPRINTK(DRIVER_NAME": entering open\n");
+
+	mutex_lock(&pktcdvd_mutex);
+	mutex_lock(&ctl_mutex);
+	pd = pkt_find_dev_from_minor(MINOR(bdev->bd_dev));
+	if (!pd) {
+		ret = -ENODEV;
+		goto out;
+	}
+	BUG_ON(pd->refcnt < 0);
+
+	pd->refcnt++;
+	if (pd->refcnt > 1) {
+		if ((mode & FMODE_WRITE) &&
+		    !test_bit(PACKET_WRITABLE, &pd->flags)) {
+			ret = -EBUSY;
+			goto out_dec;
+		}
+	} else {
+		ret = pkt_open_dev(pd, mode & FMODE_WRITE);
+		if (ret)
+			goto out_dec;
+		/*
+		 * needed here as well, since ext2 (among others) may change
+		 * the blocksize at mount time
+		 */
+		set_blocksize(bdev, CD_FRAMESIZE);
+	}
+
+	mutex_unlock(&ctl_mutex);
+	mutex_unlock(&pktcdvd_mutex);
+	return 0;
+
+out_dec:
+	pd->refcnt--;
+out:
+	VPRINTK(DRIVER_NAME": failed open (%d)\n", ret);
+	mutex_unlock(&ctl_mutex);
+	mutex_unlock(&pktcdvd_mutex);
+	return ret;
+}
+
+static int pkt_close(struct gendisk *disk, fmode_t mode)
+{
+	struct pktcdvd_device *pd = disk->private_data;
+	int ret = 0;
+
+	mutex_lock(&pktcdvd_mutex);
+	mutex_lock(&ctl_mutex);
+	pd->refcnt--;
+	BUG_ON(pd->refcnt < 0);
+	if (pd->refcnt == 0) {
+		int flush = test_bit(PACKET_WRITABLE, &pd->flags);
+		pkt_release_dev(pd, flush);
+	}
+	mutex_unlock(&ctl_mutex);
+	mutex_unlock(&pktcdvd_mutex);
+	return ret;
+}
+
+
+static void pkt_end_io_read_cloned(struct bio *bio, int err)
+{
+	struct packet_stacked_data *psd = bio->bi_private;
+	struct pktcdvd_device *pd = psd->pd;
+
+	bio_put(bio);
+	bio_endio(psd->bio, err);
+	mempool_free(psd, psd_pool);
+	pkt_bio_finished(pd);
+}
+
+static int pkt_make_request(struct request_queue *q, struct bio *bio)
+{
+	struct pktcdvd_device *pd;
+	char b[BDEVNAME_SIZE];
+	sector_t zone;
+	struct packet_data *pkt;
+	int was_empty, blocked_bio;
+	struct pkt_rb_node *node;
+
+	pd = q->queuedata;
+	if (!pd) {
+		printk(DRIVER_NAME": %s incorrect request queue\n", bdevname(bio->bi_bdev, b));
+		goto end_io;
+	}
+
+	/*
+	 * Clone READ bios so we can have our own bi_end_io callback.
+	 */
+	if (bio_data_dir(bio) == READ) {
+		struct bio *cloned_bio = bio_clone(bio, GFP_NOIO);
+		struct packet_stacked_data *psd = mempool_alloc(psd_pool, GFP_NOIO);
+
+		psd->pd = pd;
+		psd->bio = bio;
+		cloned_bio->bi_bdev = pd->bdev;
+		cloned_bio->bi_private = psd;
+		cloned_bio->bi_end_io = pkt_end_io_read_cloned;
+		pd->stats.secs_r += bio->bi_size >> 9;
+		pkt_queue_bio(pd, cloned_bio);
+		return 0;
+	}
+
+	if (!test_bit(PACKET_WRITABLE, &pd->flags)) {
+		printk(DRIVER_NAME": WRITE for ro device %s (%llu)\n",
+			pd->name, (unsigned long long)bio->bi_sector);
+		goto end_io;
+	}
+
+	if (!bio->bi_size || (bio->bi_size % CD_FRAMESIZE)) {
+		printk(DRIVER_NAME": wrong bio size\n");
+		goto end_io;
+	}
+
+	blk_queue_bounce(q, &bio);
+
+	zone = ZONE(bio->bi_sector, pd);
+	VPRINTK("pkt_make_request: start = %6llx stop = %6llx\n",
+		(unsigned long long)bio->bi_sector,
+		(unsigned long long)(bio->bi_sector + bio_sectors(bio)));
+
+	/* Check if we have to split the bio */
+	{
+		struct bio_pair *bp;
+		sector_t last_zone;
+		int first_sectors;
+
+		last_zone = ZONE(bio->bi_sector + bio_sectors(bio) - 1, pd);
+		if (last_zone != zone) {
+			BUG_ON(last_zone != zone + pd->settings.size);
+			first_sectors = last_zone - bio->bi_sector;
+			bp = bio_split(bio, first_sectors);
+			BUG_ON(!bp);
+			pkt_make_request(q, &bp->bio1);
+			pkt_make_request(q, &bp->bio2);
+			bio_pair_release(bp);
+			return 0;
+		}
+	}
+
+	/*
+	 * If we find a matching packet in state WAITING or READ_WAIT, we can
+	 * just append this bio to that packet.
+	 */
+	spin_lock(&pd->cdrw.active_list_lock);
+	blocked_bio = 0;
+	list_for_each_entry(pkt, &pd->cdrw.pkt_active_list, list) {
+		if (pkt->sector == zone) {
+			spin_lock(&pkt->lock);
+			if ((pkt->state == PACKET_WAITING_STATE) ||
+			    (pkt->state == PACKET_READ_WAIT_STATE)) {
+				bio_list_add(&pkt->orig_bios, bio);
+				pkt->write_size += bio->bi_size / CD_FRAMESIZE;
+				if ((pkt->write_size >= pkt->frames) &&
+				    (pkt->state == PACKET_WAITING_STATE)) {
+					atomic_inc(&pkt->run_sm);
+					wake_up(&pd->wqueue);
+				}
+				spin_unlock(&pkt->lock);
+				spin_unlock(&pd->cdrw.active_list_lock);
+				return 0;
+			} else {
+				blocked_bio = 1;
+			}
+			spin_unlock(&pkt->lock);
+		}
+	}
+	spin_unlock(&pd->cdrw.active_list_lock);
+
+ 	/*
+	 * Test if there is enough room left in the bio work queue
+	 * (queue size >= congestion on mark).
+	 * If not, wait till the work queue size is below the congestion off mark.
+	 */
+	spin_lock(&pd->lock);
+	if (pd->write_congestion_on > 0
+	    && pd->bio_queue_size >= pd->write_congestion_on) {
+		set_bdi_congested(&q->backing_dev_info, BLK_RW_ASYNC);
+		do {
+			spin_unlock(&pd->lock);
+			congestion_wait(BLK_RW_ASYNC, HZ);
+			spin_lock(&pd->lock);
+		} while(pd->bio_queue_size > pd->write_congestion_off);
+	}
+	spin_unlock(&pd->lock);
+
+	/*
+	 * No matching packet found. Store the bio in the work queue.
+	 */
+	node = mempool_alloc(pd->rb_pool, GFP_NOIO);
+	node->bio = bio;
+	spin_lock(&pd->lock);
+	BUG_ON(pd->bio_queue_size < 0);
+	was_empty = (pd->bio_queue_size == 0);
+	pkt_rbtree_insert(pd, node);
+	spin_unlock(&pd->lock);
+
+	/*
+	 * Wake up the worker thread.
+	 */
+	atomic_set(&pd->scan_queue, 1);
+	if (was_empty) {
+		/* This wake_up is required for correct operation */
+		wake_up(&pd->wqueue);
+	} else if (!list_empty(&pd->cdrw.pkt_free_list) && !blocked_bio) {
+		/*
+		 * This wake up is not required for correct operation,
+		 * but improves performance in some cases.
+		 */
+		wake_up(&pd->wqueue);
+	}
+	return 0;
+end_io:
+	bio_io_error(bio);
+	return 0;
+}
+
+
+
+static int pkt_merge_bvec(struct request_queue *q, struct bvec_merge_data *bmd,
+			  struct bio_vec *bvec)
+{
+	struct pktcdvd_device *pd = q->queuedata;
+	sector_t zone = ZONE(bmd->bi_sector, pd);
+	int used = ((bmd->bi_sector - zone) << 9) + bmd->bi_size;
+	int remaining = (pd->settings.size << 9) - used;
+	int remaining2;
+
+	/*
+	 * A bio <= PAGE_SIZE must be allowed. If it crosses a packet
+	 * boundary, pkt_make_request() will split the bio.
+	 */
+	remaining2 = PAGE_SIZE - bmd->bi_size;
+	remaining = max(remaining, remaining2);
+
+	BUG_ON(remaining < 0);
+	return remaining;
+}
+
+static void pkt_init_queue(struct pktcdvd_device *pd)
+{
+	struct request_queue *q = pd->disk->queue;
+
+	blk_queue_make_request(q, pkt_make_request);
+	blk_queue_logical_block_size(q, CD_FRAMESIZE);
+	blk_queue_max_hw_sectors(q, PACKET_MAX_SECTORS);
+	blk_queue_merge_bvec(q, pkt_merge_bvec);
+	q->queuedata = pd;
+}
+
+static int pkt_seq_show(struct seq_file *m, void *p)
+{
+	struct pktcdvd_device *pd = m->private;
+	char *msg;
+	char bdev_buf[BDEVNAME_SIZE];
+	int states[PACKET_NUM_STATES];
+
+	seq_printf(m, "Writer %s mapped to %s:\n", pd->name,
+		   bdevname(pd->bdev, bdev_buf));
+
+	seq_printf(m, "\nSettings:\n");
+	seq_printf(m, "\tpacket size:\t\t%dkB\n", pd->settings.size / 2);
+
+	if (pd->settings.write_type == 0)
+		msg = "Packet";
+	else
+		msg = "Unknown";
+	seq_printf(m, "\twrite type:\t\t%s\n", msg);
+
+	seq_printf(m, "\tpacket type:\t\t%s\n", pd->settings.fp ? "Fixed" : "Variable");
+	seq_printf(m, "\tlink loss:\t\t%d\n", pd->settings.link_loss);
+
+	seq_printf(m, "\ttrack mode:\t\t%d\n", pd->settings.track_mode);
+
+	if (pd->settings.block_mode == PACKET_BLOCK_MODE1)
+		msg = "Mode 1";
+	else if (pd->settings.block_mode == PACKET_BLOCK_MODE2)
+		msg = "Mode 2";
+	else
+		msg = "Unknown";
+	seq_printf(m, "\tblock mode:\t\t%s\n", msg);
+
+	seq_printf(m, "\nStatistics:\n");
+	seq_printf(m, "\tpackets started:\t%lu\n", pd->stats.pkt_started);
+	seq_printf(m, "\tpackets ended:\t\t%lu\n", pd->stats.pkt_ended);
+	seq_printf(m, "\twritten:\t\t%lukB\n", pd->stats.secs_w >> 1);
+	seq_printf(m, "\tread gather:\t\t%lukB\n", pd->stats.secs_rg >> 1);
+	seq_printf(m, "\tread:\t\t\t%lukB\n", pd->stats.secs_r >> 1);
+
+	seq_printf(m, "\nMisc:\n");
+	seq_printf(m, "\treference count:\t%d\n", pd->refcnt);
+	seq_printf(m, "\tflags:\t\t\t0x%lx\n", pd->flags);
+	seq_printf(m, "\tread speed:\t\t%ukB/s\n", pd->read_speed);
+	seq_printf(m, "\twrite speed:\t\t%ukB/s\n", pd->write_speed);
+	seq_printf(m, "\tstart offset:\t\t%lu\n", pd->offset);
+	seq_printf(m, "\tmode page offset:\t%u\n", pd->mode_offset);
+
+	seq_printf(m, "\nQueue state:\n");
+	seq_printf(m, "\tbios queued:\t\t%d\n", pd->bio_queue_size);
+	seq_printf(m, "\tbios pending:\t\t%d\n", atomic_read(&pd->cdrw.pending_bios));
+	seq_printf(m, "\tcurrent sector:\t\t0x%llx\n", (unsigned long long)pd->current_sector);
+
+	pkt_count_states(pd, states);
+	seq_printf(m, "\tstate:\t\t\ti:%d ow:%d rw:%d ww:%d rec:%d fin:%d\n",
+		   states[0], states[1], states[2], states[3], states[4], states[5]);
+
+	seq_printf(m, "\twrite congestion marks:\toff=%d on=%d\n",
+			pd->write_congestion_off,
+			pd->write_congestion_on);
+	return 0;
+}
+
+static int pkt_seq_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, pkt_seq_show, PDE(inode)->data);
+}
+
+static const struct file_operations pkt_proc_fops = {
+	.open	= pkt_seq_open,
+	.read	= seq_read,
+	.llseek	= seq_lseek,
+	.release = single_release
+};
+
+static int pkt_new_dev(struct pktcdvd_device *pd, dev_t dev)
+{
+	int i;
+	int ret = 0;
+	char b[BDEVNAME_SIZE];
+	struct block_device *bdev;
+
+	if (pd->pkt_dev == dev) {
+		printk(DRIVER_NAME": Recursive setup not allowed\n");
+		return -EBUSY;
+	}
+	for (i = 0; i < MAX_WRITERS; i++) {
+		struct pktcdvd_device *pd2 = pkt_devs[i];
+		if (!pd2)
+			continue;
+		if (pd2->bdev->bd_dev == dev) {
+			printk(DRIVER_NAME": %s already setup\n", bdevname(pd2->bdev, b));
+			return -EBUSY;
+		}
+		if (pd2->pkt_dev == dev) {
+			printk(DRIVER_NAME": Can't chain pktcdvd devices\n");
+			return -EBUSY;
+		}
+	}
+
+	bdev = bdget(dev);
+	if (!bdev)
+		return -ENOMEM;
+	ret = blkdev_get(bdev, FMODE_READ | FMODE_NDELAY, NULL);
+	if (ret)
+		return ret;
+
+	/* This is safe, since we have a reference from open(). */
+	__module_get(THIS_MODULE);
+
+	pd->bdev = bdev;
+	set_blocksize(bdev, CD_FRAMESIZE);
+
+	pkt_init_queue(pd);
+
+	atomic_set(&pd->cdrw.pending_bios, 0);
+	pd->cdrw.thread = kthread_run(kcdrwd, pd, "%s", pd->name);
+	if (IS_ERR(pd->cdrw.thread)) {
+		printk(DRIVER_NAME": can't start kernel thread\n");
+		ret = -ENOMEM;
+		goto out_mem;
+	}
+
+	proc_create_data(pd->name, 0, pkt_proc, &pkt_proc_fops, pd);
+	DPRINTK(DRIVER_NAME": writer %s mapped to %s\n", pd->name, bdevname(bdev, b));
+	return 0;
+
+out_mem:
+	blkdev_put(bdev, FMODE_READ | FMODE_NDELAY);
+	/* This is safe: open() is still holding a reference. */
+	module_put(THIS_MODULE);
+	return ret;
+}
+
+static int pkt_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, unsigned long arg)
+{
+	struct pktcdvd_device *pd = bdev->bd_disk->private_data;
+	int ret;
+
+	VPRINTK("pkt_ioctl: cmd %x, dev %d:%d\n", cmd,
+		MAJOR(bdev->bd_dev), MINOR(bdev->bd_dev));
+
+	mutex_lock(&pktcdvd_mutex);
+	switch (cmd) {
+	case CDROMEJECT:
+		/*
+		 * The door gets locked when the device is opened, so we
+		 * have to unlock it or else the eject command fails.
+		 */
+		if (pd->refcnt == 1)
+			pkt_lock_door(pd, 0);
+		/* fallthru */
+	/*
+	 * forward selected CDROM ioctls to CD-ROM, for UDF
+	 */
+	case CDROMMULTISESSION:
+	case CDROMREADTOCENTRY:
+	case CDROM_LAST_WRITTEN:
+	case CDROM_SEND_PACKET:
+	case SCSI_IOCTL_SEND_COMMAND:
+		ret = __blkdev_driver_ioctl(pd->bdev, mode, cmd, arg);
+		break;
+
+	default:
+		VPRINTK(DRIVER_NAME": Unknown ioctl for %s (%x)\n", pd->name, cmd);
+		ret = -ENOTTY;
+	}
+	mutex_unlock(&pktcdvd_mutex);
+
+	return ret;
+}
+
+static unsigned int pkt_check_events(struct gendisk *disk,
+				     unsigned int clearing)
+{
+	struct pktcdvd_device *pd = disk->private_data;
+	struct gendisk *attached_disk;
+
+	if (!pd)
+		return 0;
+	if (!pd->bdev)
+		return 0;
+	attached_disk = pd->bdev->bd_disk;
+	if (!attached_disk || !attached_disk->fops->check_events)
+		return 0;
+	return attached_disk->fops->check_events(attached_disk, clearing);
+}
+
+static const struct block_device_operations pktcdvd_ops = {
+	.owner =		THIS_MODULE,
+	.open =			pkt_open,
+	.release =		pkt_close,
+	.ioctl =		pkt_ioctl,
+	.check_events =		pkt_check_events,
+};
+
+static char *pktcdvd_devnode(struct gendisk *gd, mode_t *mode)
+{
+	return kasprintf(GFP_KERNEL, "pktcdvd/%s", gd->disk_name);
+}
+
+/*
+ * Set up mapping from pktcdvd device to CD-ROM device.
+ */
+static int pkt_setup_dev(dev_t dev, dev_t* pkt_dev)
+{
+	int idx;
+	int ret = -ENOMEM;
+	struct pktcdvd_device *pd;
+	struct gendisk *disk;
+
+	mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
+
+	for (idx = 0; idx < MAX_WRITERS; idx++)
+		if (!pkt_devs[idx])
+			break;
+	if (idx == MAX_WRITERS) {
+		printk(DRIVER_NAME": max %d writers supported\n", MAX_WRITERS);
+		ret = -EBUSY;
+		goto out_mutex;
+	}
+
+	pd = kzalloc(sizeof(struct pktcdvd_device), GFP_KERNEL);
+	if (!pd)
+		goto out_mutex;
+
+	pd->rb_pool = mempool_create_kmalloc_pool(PKT_RB_POOL_SIZE,
+						  sizeof(struct pkt_rb_node));
+	if (!pd->rb_pool)
+		goto out_mem;
+
+	INIT_LIST_HEAD(&pd->cdrw.pkt_free_list);
+	INIT_LIST_HEAD(&pd->cdrw.pkt_active_list);
+	spin_lock_init(&pd->cdrw.active_list_lock);
+
+	spin_lock_init(&pd->lock);
+	spin_lock_init(&pd->iosched.lock);
+	bio_list_init(&pd->iosched.read_queue);
+	bio_list_init(&pd->iosched.write_queue);
+	sprintf(pd->name, DRIVER_NAME"%d", idx);
+	init_waitqueue_head(&pd->wqueue);
+	pd->bio_queue = RB_ROOT;
+
+	pd->write_congestion_on  = write_congestion_on;
+	pd->write_congestion_off = write_congestion_off;
+
+	disk = alloc_disk(1);
+	if (!disk)
+		goto out_mem;
+	pd->disk = disk;
+	disk->major = pktdev_major;
+	disk->first_minor = idx;
+	disk->fops = &pktcdvd_ops;
+	disk->flags = GENHD_FL_REMOVABLE;
+	strcpy(disk->disk_name, pd->name);
+	disk->devnode = pktcdvd_devnode;
+	disk->private_data = pd;
+	disk->queue = blk_alloc_queue(GFP_KERNEL);
+	if (!disk->queue)
+		goto out_mem2;
+
+	pd->pkt_dev = MKDEV(pktdev_major, idx);
+	ret = pkt_new_dev(pd, dev);
+	if (ret)
+		goto out_new_dev;
+
+	/* inherit events of the host device */
+	disk->events = pd->bdev->bd_disk->events;
+	disk->async_events = pd->bdev->bd_disk->async_events;
+
+	add_disk(disk);
+
+	pkt_sysfs_dev_new(pd);
+	pkt_debugfs_dev_new(pd);
+
+	pkt_devs[idx] = pd;
+	if (pkt_dev)
+		*pkt_dev = pd->pkt_dev;
+
+	mutex_unlock(&ctl_mutex);
+	return 0;
+
+out_new_dev:
+	blk_cleanup_queue(disk->queue);
+out_mem2:
+	put_disk(disk);
+out_mem:
+	if (pd->rb_pool)
+		mempool_destroy(pd->rb_pool);
+	kfree(pd);
+out_mutex:
+	mutex_unlock(&ctl_mutex);
+	printk(DRIVER_NAME": setup of pktcdvd device failed\n");
+	return ret;
+}
+
+/*
+ * Tear down mapping from pktcdvd device to CD-ROM device.
+ */
+static int pkt_remove_dev(dev_t pkt_dev)
+{
+	struct pktcdvd_device *pd;
+	int idx;
+	int ret = 0;
+
+	mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
+
+	for (idx = 0; idx < MAX_WRITERS; idx++) {
+		pd = pkt_devs[idx];
+		if (pd && (pd->pkt_dev == pkt_dev))
+			break;
+	}
+	if (idx == MAX_WRITERS) {
+		DPRINTK(DRIVER_NAME": dev not setup\n");
+		ret = -ENXIO;
+		goto out;
+	}
+
+	if (pd->refcnt > 0) {
+		ret = -EBUSY;
+		goto out;
+	}
+	if (!IS_ERR(pd->cdrw.thread))
+		kthread_stop(pd->cdrw.thread);
+
+	pkt_devs[idx] = NULL;
+
+	pkt_debugfs_dev_remove(pd);
+	pkt_sysfs_dev_remove(pd);
+
+	blkdev_put(pd->bdev, FMODE_READ | FMODE_NDELAY);
+
+	remove_proc_entry(pd->name, pkt_proc);
+	DPRINTK(DRIVER_NAME": writer %s unmapped\n", pd->name);
+
+	del_gendisk(pd->disk);
+	blk_cleanup_queue(pd->disk->queue);
+	put_disk(pd->disk);
+
+	mempool_destroy(pd->rb_pool);
+	kfree(pd);
+
+	/* This is safe: open() is still holding a reference. */
+	module_put(THIS_MODULE);
+
+out:
+	mutex_unlock(&ctl_mutex);
+	return ret;
+}
+
+static void pkt_get_status(struct pkt_ctrl_command *ctrl_cmd)
+{
+	struct pktcdvd_device *pd;
+
+	mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
+
+	pd = pkt_find_dev_from_minor(ctrl_cmd->dev_index);
+	if (pd) {
+		ctrl_cmd->dev = new_encode_dev(pd->bdev->bd_dev);
+		ctrl_cmd->pkt_dev = new_encode_dev(pd->pkt_dev);
+	} else {
+		ctrl_cmd->dev = 0;
+		ctrl_cmd->pkt_dev = 0;
+	}
+	ctrl_cmd->num_devices = MAX_WRITERS;
+
+	mutex_unlock(&ctl_mutex);
+}
+
+static long pkt_ctl_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+	void __user *argp = (void __user *)arg;
+	struct pkt_ctrl_command ctrl_cmd;
+	int ret = 0;
+	dev_t pkt_dev = 0;
+
+	if (cmd != PACKET_CTRL_CMD)
+		return -ENOTTY;
+
+	if (copy_from_user(&ctrl_cmd, argp, sizeof(struct pkt_ctrl_command)))
+		return -EFAULT;
+
+	switch (ctrl_cmd.command) {
+	case PKT_CTRL_CMD_SETUP:
+		if (!capable(CAP_SYS_ADMIN))
+			return -EPERM;
+		ret = pkt_setup_dev(new_decode_dev(ctrl_cmd.dev), &pkt_dev);
+		ctrl_cmd.pkt_dev = new_encode_dev(pkt_dev);
+		break;
+	case PKT_CTRL_CMD_TEARDOWN:
+		if (!capable(CAP_SYS_ADMIN))
+			return -EPERM;
+		ret = pkt_remove_dev(new_decode_dev(ctrl_cmd.pkt_dev));
+		break;
+	case PKT_CTRL_CMD_STATUS:
+		pkt_get_status(&ctrl_cmd);
+		break;
+	default:
+		return -ENOTTY;
+	}
+
+	if (copy_to_user(argp, &ctrl_cmd, sizeof(struct pkt_ctrl_command)))
+		return -EFAULT;
+	return ret;
+}
+
+#ifdef CONFIG_COMPAT
+static long pkt_ctl_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+	return pkt_ctl_ioctl(file, cmd, (unsigned long)compat_ptr(arg));
+}
+#endif
+
+static const struct file_operations pkt_ctl_fops = {
+	.open		= nonseekable_open,
+	.unlocked_ioctl	= pkt_ctl_ioctl,
+#ifdef CONFIG_COMPAT
+	.compat_ioctl	= pkt_ctl_compat_ioctl,
+#endif
+	.owner		= THIS_MODULE,
+	.llseek		= no_llseek,
+};
+
+static struct miscdevice pkt_misc = {
+	.minor 		= MISC_DYNAMIC_MINOR,
+	.name  		= DRIVER_NAME,
+	.nodename	= "pktcdvd/control",
+	.fops  		= &pkt_ctl_fops
+};
+
+static int __init pkt_init(void)
+{
+	int ret;
+
+	mutex_init(&ctl_mutex);
+
+	psd_pool = mempool_create_kmalloc_pool(PSD_POOL_SIZE,
+					sizeof(struct packet_stacked_data));
+	if (!psd_pool)
+		return -ENOMEM;
+
+	ret = register_blkdev(pktdev_major, DRIVER_NAME);
+	if (ret < 0) {
+		printk(DRIVER_NAME": Unable to register block device\n");
+		goto out2;
+	}
+	if (!pktdev_major)
+		pktdev_major = ret;
+
+	ret = pkt_sysfs_init();
+	if (ret)
+		goto out;
+
+	pkt_debugfs_init();
+
+	ret = misc_register(&pkt_misc);
+	if (ret) {
+		printk(DRIVER_NAME": Unable to register misc device\n");
+		goto out_misc;
+	}
+
+	pkt_proc = proc_mkdir("driver/"DRIVER_NAME, NULL);
+
+	return 0;
+
+out_misc:
+	pkt_debugfs_cleanup();
+	pkt_sysfs_cleanup();
+out:
+	unregister_blkdev(pktdev_major, DRIVER_NAME);
+out2:
+	mempool_destroy(psd_pool);
+	return ret;
+}
+
+static void __exit pkt_exit(void)
+{
+	remove_proc_entry("driver/"DRIVER_NAME, NULL);
+	misc_deregister(&pkt_misc);
+
+	pkt_debugfs_cleanup();
+	pkt_sysfs_cleanup();
+
+	unregister_blkdev(pktdev_major, DRIVER_NAME);
+	mempool_destroy(psd_pool);
+}
+
+MODULE_DESCRIPTION("Packet writing layer for CD/DVD drives");
+MODULE_AUTHOR("Jens Axboe <axboe@suse.de>");
+MODULE_LICENSE("GPL");
+
+module_init(pkt_init);
+module_exit(pkt_exit);
diff --git a/drivers/block/ps3disk.c b/drivers/block/ps3disk.c
new file mode 100644
index 00000000..8e1ce2e2
--- /dev/null
+++ b/drivers/block/ps3disk.c
@@ -0,0 +1,589 @@
+/*
+ * PS3 Disk Storage Driver
+ *
+ * Copyright (C) 2007 Sony Computer Entertainment Inc.
+ * Copyright 2007 Sony Corp.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published
+ * by the Free Software Foundation; version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <linux/ata.h>
+#include <linux/blkdev.h>
+#include <linux/slab.h>
+
+#include <asm/lv1call.h>
+#include <asm/ps3stor.h>
+#include <asm/firmware.h>
+
+
+#define DEVICE_NAME		"ps3disk"
+
+#define BOUNCE_SIZE		(64*1024)
+
+#define PS3DISK_MAX_DISKS	16
+#define PS3DISK_MINORS		16
+
+
+#define PS3DISK_NAME		"ps3d%c"
+
+
+struct ps3disk_private {
+	spinlock_t lock;		/* Request queue spinlock */
+	struct request_queue *queue;
+	struct gendisk *gendisk;
+	unsigned int blocking_factor;
+	struct request *req;
+	u64 raw_capacity;
+	unsigned char model[ATA_ID_PROD_LEN+1];
+};
+
+
+#define LV1_STORAGE_SEND_ATA_COMMAND	(2)
+#define LV1_STORAGE_ATA_HDDOUT		(0x23)
+
+struct lv1_ata_cmnd_block {
+	u16	features;
+	u16	sector_count;
+	u16	LBA_low;
+	u16	LBA_mid;
+	u16	LBA_high;
+	u8	device;
+	u8	command;
+	u32	is_ext;
+	u32	proto;
+	u32	in_out;
+	u32	size;
+	u64	buffer;
+	u32	arglen;
+};
+
+enum lv1_ata_proto {
+	NON_DATA_PROTO     = 0,
+	PIO_DATA_IN_PROTO  = 1,
+	PIO_DATA_OUT_PROTO = 2,
+	DMA_PROTO = 3
+};
+
+enum lv1_ata_in_out {
+	DIR_WRITE = 0,			/* memory -> device */
+	DIR_READ = 1			/* device -> memory */
+};
+
+static int ps3disk_major;
+
+
+static const struct block_device_operations ps3disk_fops = {
+	.owner		= THIS_MODULE,
+};
+
+
+static void ps3disk_scatter_gather(struct ps3_storage_device *dev,
+				   struct request *req, int gather)
+{
+	unsigned int offset = 0;
+	struct req_iterator iter;
+	struct bio_vec *bvec;
+	unsigned int i = 0;
+	size_t size;
+	void *buf;
+
+	rq_for_each_segment(bvec, req, iter) {
+		unsigned long flags;
+		dev_dbg(&dev->sbd.core,
+			"%s:%u: bio %u: %u segs %u sectors from %lu\n",
+			__func__, __LINE__, i, bio_segments(iter.bio),
+			bio_sectors(iter.bio), iter.bio->bi_sector);
+
+		size = bvec->bv_len;
+		buf = bvec_kmap_irq(bvec, &flags);
+		if (gather)
+			memcpy(dev->bounce_buf+offset, buf, size);
+		else
+			memcpy(buf, dev->bounce_buf+offset, size);
+		offset += size;
+		flush_kernel_dcache_page(bvec->bv_page);
+		bvec_kunmap_irq(buf, &flags);
+		i++;
+	}
+}
+
+static int ps3disk_submit_request_sg(struct ps3_storage_device *dev,
+				     struct request *req)
+{
+	struct ps3disk_private *priv = ps3_system_bus_get_drvdata(&dev->sbd);
+	int write = rq_data_dir(req), res;
+	const char *op = write ? "write" : "read";
+	u64 start_sector, sectors;
+	unsigned int region_id = dev->regions[dev->region_idx].id;
+
+#ifdef DEBUG
+	unsigned int n = 0;
+	struct bio_vec *bv;
+	struct req_iterator iter;
+
+	rq_for_each_segment(bv, req, iter)
+		n++;
+	dev_dbg(&dev->sbd.core,
+		"%s:%u: %s req has %u bvecs for %u sectors\n",
+		__func__, __LINE__, op, n, blk_rq_sectors(req));
+#endif
+
+	start_sector = blk_rq_pos(req) * priv->blocking_factor;
+	sectors = blk_rq_sectors(req) * priv->blocking_factor;
+	dev_dbg(&dev->sbd.core, "%s:%u: %s %llu sectors starting at %llu\n",
+		__func__, __LINE__, op, sectors, start_sector);
+
+	if (write) {
+		ps3disk_scatter_gather(dev, req, 1);
+
+		res = lv1_storage_write(dev->sbd.dev_id, region_id,
+					start_sector, sectors, 0,
+					dev->bounce_lpar, &dev->tag);
+	} else {
+		res = lv1_storage_read(dev->sbd.dev_id, region_id,
+				       start_sector, sectors, 0,
+				       dev->bounce_lpar, &dev->tag);
+	}
+	if (res) {
+		dev_err(&dev->sbd.core, "%s:%u: %s failed %d\n", __func__,
+			__LINE__, op, res);
+		__blk_end_request_all(req, -EIO);
+		return 0;
+	}
+
+	priv->req = req;
+	return 1;
+}
+
+static int ps3disk_submit_flush_request(struct ps3_storage_device *dev,
+					struct request *req)
+{
+	struct ps3disk_private *priv = ps3_system_bus_get_drvdata(&dev->sbd);
+	u64 res;
+
+	dev_dbg(&dev->sbd.core, "%s:%u: flush request\n", __func__, __LINE__);
+
+	res = lv1_storage_send_device_command(dev->sbd.dev_id,
+					      LV1_STORAGE_ATA_HDDOUT, 0, 0, 0,
+					      0, &dev->tag);
+	if (res) {
+		dev_err(&dev->sbd.core, "%s:%u: sync cache failed 0x%llx\n",
+			__func__, __LINE__, res);
+		__blk_end_request_all(req, -EIO);
+		return 0;
+	}
+
+	priv->req = req;
+	return 1;
+}
+
+static void ps3disk_do_request(struct ps3_storage_device *dev,
+			       struct request_queue *q)
+{
+	struct request *req;
+
+	dev_dbg(&dev->sbd.core, "%s:%u\n", __func__, __LINE__);
+
+	while ((req = blk_fetch_request(q))) {
+		if (req->cmd_flags & REQ_FLUSH) {
+			if (ps3disk_submit_flush_request(dev, req))
+				break;
+		} else if (req->cmd_type == REQ_TYPE_FS) {
+			if (ps3disk_submit_request_sg(dev, req))
+				break;
+		} else {
+			blk_dump_rq_flags(req, DEVICE_NAME " bad request");
+			__blk_end_request_all(req, -EIO);
+			continue;
+		}
+	}
+}
+
+static void ps3disk_request(struct request_queue *q)
+{
+	struct ps3_storage_device *dev = q->queuedata;
+	struct ps3disk_private *priv = ps3_system_bus_get_drvdata(&dev->sbd);
+
+	if (priv->req) {
+		dev_dbg(&dev->sbd.core, "%s:%u busy\n", __func__, __LINE__);
+		return;
+	}
+
+	ps3disk_do_request(dev, q);
+}
+
+static irqreturn_t ps3disk_interrupt(int irq, void *data)
+{
+	struct ps3_storage_device *dev = data;
+	struct ps3disk_private *priv;
+	struct request *req;
+	int res, read, error;
+	u64 tag, status;
+	const char *op;
+
+	res = lv1_storage_get_async_status(dev->sbd.dev_id, &tag, &status);
+
+	if (tag != dev->tag)
+		dev_err(&dev->sbd.core,
+			"%s:%u: tag mismatch, got %llx, expected %llx\n",
+			__func__, __LINE__, tag, dev->tag);
+
+	if (res) {
+		dev_err(&dev->sbd.core, "%s:%u: res=%d status=0x%llx\n",
+			__func__, __LINE__, res, status);
+		return IRQ_HANDLED;
+	}
+
+	priv = ps3_system_bus_get_drvdata(&dev->sbd);
+	req = priv->req;
+	if (!req) {
+		dev_dbg(&dev->sbd.core,
+			"%s:%u non-block layer request completed\n", __func__,
+			__LINE__);
+		dev->lv1_status = status;
+		complete(&dev->done);
+		return IRQ_HANDLED;
+	}
+
+	if (req->cmd_flags & REQ_FLUSH) {
+		read = 0;
+		op = "flush";
+	} else {
+		read = !rq_data_dir(req);
+		op = read ? "read" : "write";
+	}
+	if (status) {
+		dev_dbg(&dev->sbd.core, "%s:%u: %s failed 0x%llx\n", __func__,
+			__LINE__, op, status);
+		error = -EIO;
+	} else {
+		dev_dbg(&dev->sbd.core, "%s:%u: %s completed\n", __func__,
+			__LINE__, op);
+		error = 0;
+		if (read)
+			ps3disk_scatter_gather(dev, req, 0);
+	}
+
+	spin_lock(&priv->lock);
+	__blk_end_request_all(req, error);
+	priv->req = NULL;
+	ps3disk_do_request(dev, priv->queue);
+	spin_unlock(&priv->lock);
+
+	return IRQ_HANDLED;
+}
+
+static int ps3disk_sync_cache(struct ps3_storage_device *dev)
+{
+	u64 res;
+
+	dev_dbg(&dev->sbd.core, "%s:%u: sync cache\n", __func__, __LINE__);
+
+	res = ps3stor_send_command(dev, LV1_STORAGE_ATA_HDDOUT, 0, 0, 0, 0);
+	if (res) {
+		dev_err(&dev->sbd.core, "%s:%u: sync cache failed 0x%llx\n",
+			__func__, __LINE__, res);
+		return -EIO;
+	}
+	return 0;
+}
+
+
+/* ATA helpers copied from drivers/ata/libata-core.c */
+
+static void swap_buf_le16(u16 *buf, unsigned int buf_words)
+{
+#ifdef __BIG_ENDIAN
+	unsigned int i;
+
+	for (i = 0; i < buf_words; i++)
+		buf[i] = le16_to_cpu(buf[i]);
+#endif /* __BIG_ENDIAN */
+}
+
+static u64 ata_id_n_sectors(const u16 *id)
+{
+	if (ata_id_has_lba(id)) {
+		if (ata_id_has_lba48(id))
+			return ata_id_u64(id, 100);
+		else
+			return ata_id_u32(id, 60);
+	} else {
+		if (ata_id_current_chs_valid(id))
+			return ata_id_u32(id, 57);
+		else
+			return id[1] * id[3] * id[6];
+	}
+}
+
+static void ata_id_string(const u16 *id, unsigned char *s, unsigned int ofs,
+			  unsigned int len)
+{
+	unsigned int c;
+
+	while (len > 0) {
+		c = id[ofs] >> 8;
+		*s = c;
+		s++;
+
+		c = id[ofs] & 0xff;
+		*s = c;
+		s++;
+
+		ofs++;
+		len -= 2;
+	}
+}
+
+static void ata_id_c_string(const u16 *id, unsigned char *s, unsigned int ofs,
+			    unsigned int len)
+{
+	unsigned char *p;
+
+	WARN_ON(!(len & 1));
+
+	ata_id_string(id, s, ofs, len - 1);
+
+	p = s + strnlen(s, len - 1);
+	while (p > s && p[-1] == ' ')
+		p--;
+	*p = '\0';
+}
+
+static int ps3disk_identify(struct ps3_storage_device *dev)
+{
+	struct ps3disk_private *priv = ps3_system_bus_get_drvdata(&dev->sbd);
+	struct lv1_ata_cmnd_block ata_cmnd;
+	u16 *id = dev->bounce_buf;
+	u64 res;
+
+	dev_dbg(&dev->sbd.core, "%s:%u: identify disk\n", __func__, __LINE__);
+
+	memset(&ata_cmnd, 0, sizeof(struct lv1_ata_cmnd_block));
+	ata_cmnd.command = ATA_CMD_ID_ATA;
+	ata_cmnd.sector_count = 1;
+	ata_cmnd.size = ata_cmnd.arglen = ATA_ID_WORDS * 2;
+	ata_cmnd.buffer = dev->bounce_lpar;
+	ata_cmnd.proto = PIO_DATA_IN_PROTO;
+	ata_cmnd.in_out = DIR_READ;
+
+	res = ps3stor_send_command(dev, LV1_STORAGE_SEND_ATA_COMMAND,
+				   ps3_mm_phys_to_lpar(__pa(&ata_cmnd)),
+				   sizeof(ata_cmnd), ata_cmnd.buffer,
+				   ata_cmnd.arglen);
+	if (res) {
+		dev_err(&dev->sbd.core, "%s:%u: identify disk failed 0x%llx\n",
+			__func__, __LINE__, res);
+		return -EIO;
+	}
+
+	swap_buf_le16(id, ATA_ID_WORDS);
+
+	/* All we're interested in are raw capacity and model name */
+	priv->raw_capacity = ata_id_n_sectors(id);
+	ata_id_c_string(id, priv->model, ATA_ID_PROD, sizeof(priv->model));
+	return 0;
+}
+
+static unsigned long ps3disk_mask;
+
+static DEFINE_MUTEX(ps3disk_mask_mutex);
+
+static int __devinit ps3disk_probe(struct ps3_system_bus_device *_dev)
+{
+	struct ps3_storage_device *dev = to_ps3_storage_device(&_dev->core);
+	struct ps3disk_private *priv;
+	int error;
+	unsigned int devidx;
+	struct request_queue *queue;
+	struct gendisk *gendisk;
+
+	if (dev->blk_size < 512) {
+		dev_err(&dev->sbd.core,
+			"%s:%u: cannot handle block size %llu\n", __func__,
+			__LINE__, dev->blk_size);
+		return -EINVAL;
+	}
+
+	BUILD_BUG_ON(PS3DISK_MAX_DISKS > BITS_PER_LONG);
+	mutex_lock(&ps3disk_mask_mutex);
+	devidx = find_first_zero_bit(&ps3disk_mask, PS3DISK_MAX_DISKS);
+	if (devidx >= PS3DISK_MAX_DISKS) {
+		dev_err(&dev->sbd.core, "%s:%u: Too many disks\n", __func__,
+			__LINE__);
+		mutex_unlock(&ps3disk_mask_mutex);
+		return -ENOSPC;
+	}
+	__set_bit(devidx, &ps3disk_mask);
+	mutex_unlock(&ps3disk_mask_mutex);
+
+	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+	if (!priv) {
+		error = -ENOMEM;
+		goto fail;
+	}
+
+	ps3_system_bus_set_drvdata(_dev, priv);
+	spin_lock_init(&priv->lock);
+
+	dev->bounce_size = BOUNCE_SIZE;
+	dev->bounce_buf = kmalloc(BOUNCE_SIZE, GFP_DMA);
+	if (!dev->bounce_buf) {
+		error = -ENOMEM;
+		goto fail_free_priv;
+	}
+
+	error = ps3stor_setup(dev, ps3disk_interrupt);
+	if (error)
+		goto fail_free_bounce;
+
+	ps3disk_identify(dev);
+
+	queue = blk_init_queue(ps3disk_request, &priv->lock);
+	if (!queue) {
+		dev_err(&dev->sbd.core, "%s:%u: blk_init_queue failed\n",
+			__func__, __LINE__);
+		error = -ENOMEM;
+		goto fail_teardown;
+	}
+
+	priv->queue = queue;
+	queue->queuedata = dev;
+
+	blk_queue_bounce_limit(queue, BLK_BOUNCE_HIGH);
+
+	blk_queue_max_hw_sectors(queue, dev->bounce_size >> 9);
+	blk_queue_segment_boundary(queue, -1UL);
+	blk_queue_dma_alignment(queue, dev->blk_size-1);
+	blk_queue_logical_block_size(queue, dev->blk_size);
+
+	blk_queue_flush(queue, REQ_FLUSH);
+
+	blk_queue_max_segments(queue, -1);
+	blk_queue_max_segment_size(queue, dev->bounce_size);
+
+	gendisk = alloc_disk(PS3DISK_MINORS);
+	if (!gendisk) {
+		dev_err(&dev->sbd.core, "%s:%u: alloc_disk failed\n", __func__,
+			__LINE__);
+		error = -ENOMEM;
+		goto fail_cleanup_queue;
+	}
+
+	priv->gendisk = gendisk;
+	gendisk->major = ps3disk_major;
+	gendisk->first_minor = devidx * PS3DISK_MINORS;
+	gendisk->fops = &ps3disk_fops;
+	gendisk->queue = queue;
+	gendisk->private_data = dev;
+	gendisk->driverfs_dev = &dev->sbd.core;
+	snprintf(gendisk->disk_name, sizeof(gendisk->disk_name), PS3DISK_NAME,
+		 devidx+'a');
+	priv->blocking_factor = dev->blk_size >> 9;
+	set_capacity(gendisk,
+		     dev->regions[dev->region_idx].size*priv->blocking_factor);
+
+	dev_info(&dev->sbd.core,
+		 "%s is a %s (%llu MiB total, %lu MiB for OtherOS)\n",
+		 gendisk->disk_name, priv->model, priv->raw_capacity >> 11,
+		 get_capacity(gendisk) >> 11);
+
+	add_disk(gendisk);
+	return 0;
+
+fail_cleanup_queue:
+	blk_cleanup_queue(queue);
+fail_teardown:
+	ps3stor_teardown(dev);
+fail_free_bounce:
+	kfree(dev->bounce_buf);
+fail_free_priv:
+	kfree(priv);
+	ps3_system_bus_set_drvdata(_dev, NULL);
+fail:
+	mutex_lock(&ps3disk_mask_mutex);
+	__clear_bit(devidx, &ps3disk_mask);
+	mutex_unlock(&ps3disk_mask_mutex);
+	return error;
+}
+
+static int ps3disk_remove(struct ps3_system_bus_device *_dev)
+{
+	struct ps3_storage_device *dev = to_ps3_storage_device(&_dev->core);
+	struct ps3disk_private *priv = ps3_system_bus_get_drvdata(&dev->sbd);
+
+	mutex_lock(&ps3disk_mask_mutex);
+	__clear_bit(MINOR(disk_devt(priv->gendisk)) / PS3DISK_MINORS,
+		    &ps3disk_mask);
+	mutex_unlock(&ps3disk_mask_mutex);
+	del_gendisk(priv->gendisk);
+	blk_cleanup_queue(priv->queue);
+	put_disk(priv->gendisk);
+	dev_notice(&dev->sbd.core, "Synchronizing disk cache\n");
+	ps3disk_sync_cache(dev);
+	ps3stor_teardown(dev);
+	kfree(dev->bounce_buf);
+	kfree(priv);
+	ps3_system_bus_set_drvdata(_dev, NULL);
+	return 0;
+}
+
+static struct ps3_system_bus_driver ps3disk = {
+	.match_id	= PS3_MATCH_ID_STOR_DISK,
+	.core.name	= DEVICE_NAME,
+	.core.owner	= THIS_MODULE,
+	.probe		= ps3disk_probe,
+	.remove		= ps3disk_remove,
+	.shutdown	= ps3disk_remove,
+};
+
+
+static int __init ps3disk_init(void)
+{
+	int error;
+
+	if (!firmware_has_feature(FW_FEATURE_PS3_LV1))
+		return -ENODEV;
+
+	error = register_blkdev(0, DEVICE_NAME);
+	if (error <= 0) {
+		printk(KERN_ERR "%s:%u: register_blkdev failed %d\n", __func__,
+		       __LINE__, error);
+		return error;
+	}
+	ps3disk_major = error;
+
+	pr_info("%s:%u: registered block device major %d\n", __func__,
+		__LINE__, ps3disk_major);
+
+	error = ps3_system_bus_driver_register(&ps3disk);
+	if (error)
+		unregister_blkdev(ps3disk_major, DEVICE_NAME);
+
+	return error;
+}
+
+static void __exit ps3disk_exit(void)
+{
+	ps3_system_bus_driver_unregister(&ps3disk);
+	unregister_blkdev(ps3disk_major, DEVICE_NAME);
+}
+
+module_init(ps3disk_init);
+module_exit(ps3disk_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("PS3 Disk Storage Driver");
+MODULE_AUTHOR("Sony Corporation");
+MODULE_ALIAS(PS3_MODULE_ALIAS_STOR_DISK);
diff --git a/drivers/block/ps3vram.c b/drivers/block/ps3vram.c
new file mode 100644
index 00000000..b3bdb8af
--- /dev/null
+++ b/drivers/block/ps3vram.c
@@ -0,0 +1,879 @@
+/*
+ * ps3vram - Use extra PS3 video ram as MTD block device.
+ *
+ * Copyright 2009 Sony Corporation
+ *
+ * Based on the MTD ps3vram driver, which is
+ * Copyright (c) 2007-2008 Jim Paris <jim@jtan.com>
+ * Added support RSX DMA Vivien Chappelier <vivien.chappelier@free.fr>
+ */
+
+#include <linux/blkdev.h>
+#include <linux/delay.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/slab.h>
+
+#include <asm/cell-regs.h>
+#include <asm/firmware.h>
+#include <asm/lv1call.h>
+#include <asm/ps3.h>
+#include <asm/ps3gpu.h>
+
+
+#define DEVICE_NAME		"ps3vram"
+
+
+#define XDR_BUF_SIZE (2 * 1024 * 1024) /* XDR buffer (must be 1MiB aligned) */
+#define XDR_IOIF 0x0c000000
+
+#define FIFO_BASE XDR_IOIF
+#define FIFO_SIZE (64 * 1024)
+
+#define DMA_PAGE_SIZE (4 * 1024)
+
+#define CACHE_PAGE_SIZE (256 * 1024)
+#define CACHE_PAGE_COUNT ((XDR_BUF_SIZE - FIFO_SIZE) / CACHE_PAGE_SIZE)
+
+#define CACHE_OFFSET CACHE_PAGE_SIZE
+#define FIFO_OFFSET 0
+
+#define CTRL_PUT 0x10
+#define CTRL_GET 0x11
+#define CTRL_TOP 0x15
+
+#define UPLOAD_SUBCH	1
+#define DOWNLOAD_SUBCH	2
+
+#define NV_MEMORY_TO_MEMORY_FORMAT_OFFSET_IN	0x0000030c
+#define NV_MEMORY_TO_MEMORY_FORMAT_NOTIFY	0x00000104
+
+#define CACHE_PAGE_PRESENT 1
+#define CACHE_PAGE_DIRTY   2
+
+struct ps3vram_tag {
+	unsigned int address;
+	unsigned int flags;
+};
+
+struct ps3vram_cache {
+	unsigned int page_count;
+	unsigned int page_size;
+	struct ps3vram_tag *tags;
+	unsigned int hit;
+	unsigned int miss;
+};
+
+struct ps3vram_priv {
+	struct request_queue *queue;
+	struct gendisk *gendisk;
+
+	u64 size;
+
+	u64 memory_handle;
+	u64 context_handle;
+	u32 *ctrl;
+	void *reports;
+	u8 *xdr_buf;
+
+	u32 *fifo_base;
+	u32 *fifo_ptr;
+
+	struct ps3vram_cache cache;
+
+	spinlock_t lock;	/* protecting list of bios */
+	struct bio_list list;
+};
+
+
+static int ps3vram_major;
+
+
+static const struct block_device_operations ps3vram_fops = {
+	.owner		= THIS_MODULE,
+};
+
+
+#define DMA_NOTIFIER_HANDLE_BASE 0x66604200 /* first DMA notifier handle */
+#define DMA_NOTIFIER_OFFSET_BASE 0x1000     /* first DMA notifier offset */
+#define DMA_NOTIFIER_SIZE        0x40
+#define NOTIFIER 7	/* notifier used for completion report */
+
+static char *size = "256M";
+module_param(size, charp, 0);
+MODULE_PARM_DESC(size, "memory size");
+
+static u32 *ps3vram_get_notifier(void *reports, int notifier)
+{
+	return reports + DMA_NOTIFIER_OFFSET_BASE +
+	       DMA_NOTIFIER_SIZE * notifier;
+}
+
+static void ps3vram_notifier_reset(struct ps3_system_bus_device *dev)
+{
+	struct ps3vram_priv *priv = ps3_system_bus_get_drvdata(dev);
+	u32 *notify = ps3vram_get_notifier(priv->reports, NOTIFIER);
+	int i;
+
+	for (i = 0; i < 4; i++)
+		notify[i] = 0xffffffff;
+}
+
+static int ps3vram_notifier_wait(struct ps3_system_bus_device *dev,
+				 unsigned int timeout_ms)
+{
+	struct ps3vram_priv *priv = ps3_system_bus_get_drvdata(dev);
+	u32 *notify = ps3vram_get_notifier(priv->reports, NOTIFIER);
+	unsigned long timeout;
+
+	for (timeout = 20; timeout; timeout--) {
+		if (!notify[3])
+			return 0;
+		udelay(10);
+	}
+
+	timeout = jiffies + msecs_to_jiffies(timeout_ms);
+
+	do {
+		if (!notify[3])
+			return 0;
+		msleep(1);
+	} while (time_before(jiffies, timeout));
+
+	return -ETIMEDOUT;
+}
+
+static void ps3vram_init_ring(struct ps3_system_bus_device *dev)
+{
+	struct ps3vram_priv *priv = ps3_system_bus_get_drvdata(dev);
+
+	priv->ctrl[CTRL_PUT] = FIFO_BASE + FIFO_OFFSET;
+	priv->ctrl[CTRL_GET] = FIFO_BASE + FIFO_OFFSET;
+}
+
+static int ps3vram_wait_ring(struct ps3_system_bus_device *dev,
+			     unsigned int timeout_ms)
+{
+	struct ps3vram_priv *priv = ps3_system_bus_get_drvdata(dev);
+	unsigned long timeout = jiffies + msecs_to_jiffies(timeout_ms);
+
+	do {
+		if (priv->ctrl[CTRL_PUT] == priv->ctrl[CTRL_GET])
+			return 0;
+		msleep(1);
+	} while (time_before(jiffies, timeout));
+
+	dev_warn(&dev->core, "FIFO timeout (%08x/%08x/%08x)\n",
+		 priv->ctrl[CTRL_PUT], priv->ctrl[CTRL_GET],
+		 priv->ctrl[CTRL_TOP]);
+
+	return -ETIMEDOUT;
+}
+
+static void ps3vram_out_ring(struct ps3vram_priv *priv, u32 data)
+{
+	*(priv->fifo_ptr)++ = data;
+}
+
+static void ps3vram_begin_ring(struct ps3vram_priv *priv, u32 chan, u32 tag,
+			       u32 size)
+{
+	ps3vram_out_ring(priv, (size << 18) | (chan << 13) | tag);
+}
+
+static void ps3vram_rewind_ring(struct ps3_system_bus_device *dev)
+{
+	struct ps3vram_priv *priv = ps3_system_bus_get_drvdata(dev);
+	int status;
+
+	ps3vram_out_ring(priv, 0x20000000 | (FIFO_BASE + FIFO_OFFSET));
+
+	priv->ctrl[CTRL_PUT] = FIFO_BASE + FIFO_OFFSET;
+
+	/* asking the HV for a blit will kick the FIFO */
+	status = lv1_gpu_fb_blit(priv->context_handle, 0, 0, 0, 0);
+	if (status)
+		dev_err(&dev->core, "%s: lv1_gpu_fb_blit failed %d\n",
+			__func__, status);
+
+	priv->fifo_ptr = priv->fifo_base;
+}
+
+static void ps3vram_fire_ring(struct ps3_system_bus_device *dev)
+{
+	struct ps3vram_priv *priv = ps3_system_bus_get_drvdata(dev);
+	int status;
+
+	mutex_lock(&ps3_gpu_mutex);
+
+	priv->ctrl[CTRL_PUT] = FIFO_BASE + FIFO_OFFSET +
+			       (priv->fifo_ptr - priv->fifo_base) * sizeof(u32);
+
+	/* asking the HV for a blit will kick the FIFO */
+	status = lv1_gpu_fb_blit(priv->context_handle, 0, 0, 0, 0);
+	if (status)
+		dev_err(&dev->core, "%s: lv1_gpu_fb_blit failed %d\n",
+			__func__, status);
+
+	if ((priv->fifo_ptr - priv->fifo_base) * sizeof(u32) >
+	    FIFO_SIZE - 1024) {
+		dev_dbg(&dev->core, "FIFO full, rewinding\n");
+		ps3vram_wait_ring(dev, 200);
+		ps3vram_rewind_ring(dev);
+	}
+
+	mutex_unlock(&ps3_gpu_mutex);
+}
+
+static void ps3vram_bind(struct ps3_system_bus_device *dev)
+{
+	struct ps3vram_priv *priv = ps3_system_bus_get_drvdata(dev);
+
+	ps3vram_begin_ring(priv, UPLOAD_SUBCH, 0, 1);
+	ps3vram_out_ring(priv, 0x31337303);
+	ps3vram_begin_ring(priv, UPLOAD_SUBCH, 0x180, 3);
+	ps3vram_out_ring(priv, DMA_NOTIFIER_HANDLE_BASE + NOTIFIER);
+	ps3vram_out_ring(priv, 0xfeed0001);	/* DMA system RAM instance */
+	ps3vram_out_ring(priv, 0xfeed0000);     /* DMA video RAM instance */
+
+	ps3vram_begin_ring(priv, DOWNLOAD_SUBCH, 0, 1);
+	ps3vram_out_ring(priv, 0x3137c0de);
+	ps3vram_begin_ring(priv, DOWNLOAD_SUBCH, 0x180, 3);
+	ps3vram_out_ring(priv, DMA_NOTIFIER_HANDLE_BASE + NOTIFIER);
+	ps3vram_out_ring(priv, 0xfeed0000);	/* DMA video RAM instance */
+	ps3vram_out_ring(priv, 0xfeed0001);	/* DMA system RAM instance */
+
+	ps3vram_fire_ring(dev);
+}
+
+static int ps3vram_upload(struct ps3_system_bus_device *dev,
+			  unsigned int src_offset, unsigned int dst_offset,
+			  int len, int count)
+{
+	struct ps3vram_priv *priv = ps3_system_bus_get_drvdata(dev);
+
+	ps3vram_begin_ring(priv, UPLOAD_SUBCH,
+			   NV_MEMORY_TO_MEMORY_FORMAT_OFFSET_IN, 8);
+	ps3vram_out_ring(priv, XDR_IOIF + src_offset);
+	ps3vram_out_ring(priv, dst_offset);
+	ps3vram_out_ring(priv, len);
+	ps3vram_out_ring(priv, len);
+	ps3vram_out_ring(priv, len);
+	ps3vram_out_ring(priv, count);
+	ps3vram_out_ring(priv, (1 << 8) | 1);
+	ps3vram_out_ring(priv, 0);
+
+	ps3vram_notifier_reset(dev);
+	ps3vram_begin_ring(priv, UPLOAD_SUBCH,
+			   NV_MEMORY_TO_MEMORY_FORMAT_NOTIFY, 1);
+	ps3vram_out_ring(priv, 0);
+	ps3vram_begin_ring(priv, UPLOAD_SUBCH, 0x100, 1);
+	ps3vram_out_ring(priv, 0);
+	ps3vram_fire_ring(dev);
+	if (ps3vram_notifier_wait(dev, 200) < 0) {
+		dev_warn(&dev->core, "%s: Notifier timeout\n", __func__);
+		return -1;
+	}
+
+	return 0;
+}
+
+static int ps3vram_download(struct ps3_system_bus_device *dev,
+			    unsigned int src_offset, unsigned int dst_offset,
+			    int len, int count)
+{
+	struct ps3vram_priv *priv = ps3_system_bus_get_drvdata(dev);
+
+	ps3vram_begin_ring(priv, DOWNLOAD_SUBCH,
+			   NV_MEMORY_TO_MEMORY_FORMAT_OFFSET_IN, 8);
+	ps3vram_out_ring(priv, src_offset);
+	ps3vram_out_ring(priv, XDR_IOIF + dst_offset);
+	ps3vram_out_ring(priv, len);
+	ps3vram_out_ring(priv, len);
+	ps3vram_out_ring(priv, len);
+	ps3vram_out_ring(priv, count);
+	ps3vram_out_ring(priv, (1 << 8) | 1);
+	ps3vram_out_ring(priv, 0);
+
+	ps3vram_notifier_reset(dev);
+	ps3vram_begin_ring(priv, DOWNLOAD_SUBCH,
+			   NV_MEMORY_TO_MEMORY_FORMAT_NOTIFY, 1);
+	ps3vram_out_ring(priv, 0);
+	ps3vram_begin_ring(priv, DOWNLOAD_SUBCH, 0x100, 1);
+	ps3vram_out_ring(priv, 0);
+	ps3vram_fire_ring(dev);
+	if (ps3vram_notifier_wait(dev, 200) < 0) {
+		dev_warn(&dev->core, "%s: Notifier timeout\n", __func__);
+		return -1;
+	}
+
+	return 0;
+}
+
+static void ps3vram_cache_evict(struct ps3_system_bus_device *dev, int entry)
+{
+	struct ps3vram_priv *priv = ps3_system_bus_get_drvdata(dev);
+	struct ps3vram_cache *cache = &priv->cache;
+
+	if (!(cache->tags[entry].flags & CACHE_PAGE_DIRTY))
+		return;
+
+	dev_dbg(&dev->core, "Flushing %d: 0x%08x\n", entry,
+		cache->tags[entry].address);
+	if (ps3vram_upload(dev, CACHE_OFFSET + entry * cache->page_size,
+			   cache->tags[entry].address, DMA_PAGE_SIZE,
+			   cache->page_size / DMA_PAGE_SIZE) < 0) {
+		dev_err(&dev->core,
+			"Failed to upload from 0x%x to " "0x%x size 0x%x\n",
+			entry * cache->page_size, cache->tags[entry].address,
+			cache->page_size);
+	}
+	cache->tags[entry].flags &= ~CACHE_PAGE_DIRTY;
+}
+
+static void ps3vram_cache_load(struct ps3_system_bus_device *dev, int entry,
+			       unsigned int address)
+{
+	struct ps3vram_priv *priv = ps3_system_bus_get_drvdata(dev);
+	struct ps3vram_cache *cache = &priv->cache;
+
+	dev_dbg(&dev->core, "Fetching %d: 0x%08x\n", entry, address);
+	if (ps3vram_download(dev, address,
+			     CACHE_OFFSET + entry * cache->page_size,
+			     DMA_PAGE_SIZE,
+			     cache->page_size / DMA_PAGE_SIZE) < 0) {
+		dev_err(&dev->core,
+			"Failed to download from 0x%x to 0x%x size 0x%x\n",
+			address, entry * cache->page_size, cache->page_size);
+	}
+
+	cache->tags[entry].address = address;
+	cache->tags[entry].flags |= CACHE_PAGE_PRESENT;
+}
+
+
+static void ps3vram_cache_flush(struct ps3_system_bus_device *dev)
+{
+	struct ps3vram_priv *priv = ps3_system_bus_get_drvdata(dev);
+	struct ps3vram_cache *cache = &priv->cache;
+	int i;
+
+	dev_dbg(&dev->core, "FLUSH\n");
+	for (i = 0; i < cache->page_count; i++) {
+		ps3vram_cache_evict(dev, i);
+		cache->tags[i].flags = 0;
+	}
+}
+
+static unsigned int ps3vram_cache_match(struct ps3_system_bus_device *dev,
+					loff_t address)
+{
+	struct ps3vram_priv *priv = ps3_system_bus_get_drvdata(dev);
+	struct ps3vram_cache *cache = &priv->cache;
+	unsigned int base;
+	unsigned int offset;
+	int i;
+	static int counter;
+
+	offset = (unsigned int) (address & (cache->page_size - 1));
+	base = (unsigned int) (address - offset);
+
+	/* fully associative check */
+	for (i = 0; i < cache->page_count; i++) {
+		if ((cache->tags[i].flags & CACHE_PAGE_PRESENT) &&
+		    cache->tags[i].address == base) {
+			cache->hit++;
+			dev_dbg(&dev->core, "Found entry %d: 0x%08x\n", i,
+				cache->tags[i].address);
+			return i;
+		}
+	}
+
+	/* choose a random entry */
+	i = (jiffies + (counter++)) % cache->page_count;
+	dev_dbg(&dev->core, "Using entry %d\n", i);
+
+	ps3vram_cache_evict(dev, i);
+	ps3vram_cache_load(dev, i, base);
+
+	cache->miss++;
+	return i;
+}
+
+static int ps3vram_cache_init(struct ps3_system_bus_device *dev)
+{
+	struct ps3vram_priv *priv = ps3_system_bus_get_drvdata(dev);
+
+	priv->cache.page_count = CACHE_PAGE_COUNT;
+	priv->cache.page_size = CACHE_PAGE_SIZE;
+	priv->cache.tags = kzalloc(sizeof(struct ps3vram_tag) *
+				   CACHE_PAGE_COUNT, GFP_KERNEL);
+	if (priv->cache.tags == NULL) {
+		dev_err(&dev->core, "Could not allocate cache tags\n");
+		return -ENOMEM;
+	}
+
+	dev_info(&dev->core, "Created ram cache: %d entries, %d KiB each\n",
+		CACHE_PAGE_COUNT, CACHE_PAGE_SIZE / 1024);
+
+	return 0;
+}
+
+static void ps3vram_cache_cleanup(struct ps3_system_bus_device *dev)
+{
+	struct ps3vram_priv *priv = ps3_system_bus_get_drvdata(dev);
+
+	ps3vram_cache_flush(dev);
+	kfree(priv->cache.tags);
+}
+
+static int ps3vram_read(struct ps3_system_bus_device *dev, loff_t from,
+			size_t len, size_t *retlen, u_char *buf)
+{
+	struct ps3vram_priv *priv = ps3_system_bus_get_drvdata(dev);
+	unsigned int cached, count;
+
+	dev_dbg(&dev->core, "%s: from=0x%08x len=0x%zx\n", __func__,
+		(unsigned int)from, len);
+
+	if (from >= priv->size)
+		return -EIO;
+
+	if (len > priv->size - from)
+		len = priv->size - from;
+
+	/* Copy from vram to buf */
+	count = len;
+	while (count) {
+		unsigned int offset, avail;
+		unsigned int entry;
+
+		offset = (unsigned int) (from & (priv->cache.page_size - 1));
+		avail  = priv->cache.page_size - offset;
+
+		entry = ps3vram_cache_match(dev, from);
+		cached = CACHE_OFFSET + entry * priv->cache.page_size + offset;
+
+		dev_dbg(&dev->core, "%s: from=%08x cached=%08x offset=%08x "
+			"avail=%08x count=%08x\n", __func__,
+			(unsigned int)from, cached, offset, avail, count);
+
+		if (avail > count)
+			avail = count;
+		memcpy(buf, priv->xdr_buf + cached, avail);
+
+		buf += avail;
+		count -= avail;
+		from += avail;
+	}
+
+	*retlen = len;
+	return 0;
+}
+
+static int ps3vram_write(struct ps3_system_bus_device *dev, loff_t to,
+			 size_t len, size_t *retlen, const u_char *buf)
+{
+	struct ps3vram_priv *priv = ps3_system_bus_get_drvdata(dev);
+	unsigned int cached, count;
+
+	if (to >= priv->size)
+		return -EIO;
+
+	if (len > priv->size - to)
+		len = priv->size - to;
+
+	/* Copy from buf to vram */
+	count = len;
+	while (count) {
+		unsigned int offset, avail;
+		unsigned int entry;
+
+		offset = (unsigned int) (to & (priv->cache.page_size - 1));
+		avail  = priv->cache.page_size - offset;
+
+		entry = ps3vram_cache_match(dev, to);
+		cached = CACHE_OFFSET + entry * priv->cache.page_size + offset;
+
+		dev_dbg(&dev->core, "%s: to=%08x cached=%08x offset=%08x "
+			"avail=%08x count=%08x\n", __func__, (unsigned int)to,
+			cached, offset, avail, count);
+
+		if (avail > count)
+			avail = count;
+		memcpy(priv->xdr_buf + cached, buf, avail);
+
+		priv->cache.tags[entry].flags |= CACHE_PAGE_DIRTY;
+
+		buf += avail;
+		count -= avail;
+		to += avail;
+	}
+
+	*retlen = len;
+	return 0;
+}
+
+static int ps3vram_proc_show(struct seq_file *m, void *v)
+{
+	struct ps3vram_priv *priv = m->private;
+
+	seq_printf(m, "hit:%u\nmiss:%u\n", priv->cache.hit, priv->cache.miss);
+	return 0;
+}
+
+static int ps3vram_proc_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, ps3vram_proc_show, PDE(inode)->data);
+}
+
+static const struct file_operations ps3vram_proc_fops = {
+	.owner		= THIS_MODULE,
+	.open		= ps3vram_proc_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static void __devinit ps3vram_proc_init(struct ps3_system_bus_device *dev)
+{
+	struct ps3vram_priv *priv = ps3_system_bus_get_drvdata(dev);
+	struct proc_dir_entry *pde;
+
+	pde = proc_create_data(DEVICE_NAME, 0444, NULL, &ps3vram_proc_fops,
+			       priv);
+	if (!pde)
+		dev_warn(&dev->core, "failed to create /proc entry\n");
+}
+
+static struct bio *ps3vram_do_bio(struct ps3_system_bus_device *dev,
+				  struct bio *bio)
+{
+	struct ps3vram_priv *priv = ps3_system_bus_get_drvdata(dev);
+	int write = bio_data_dir(bio) == WRITE;
+	const char *op = write ? "write" : "read";
+	loff_t offset = bio->bi_sector << 9;
+	int error = 0;
+	struct bio_vec *bvec;
+	unsigned int i;
+	struct bio *next;
+
+	bio_for_each_segment(bvec, bio, i) {
+		/* PS3 is ppc64, so we don't handle highmem */
+		char *ptr = page_address(bvec->bv_page) + bvec->bv_offset;
+		size_t len = bvec->bv_len, retlen;
+
+		dev_dbg(&dev->core, "    %s %zu bytes at offset %llu\n", op,
+			len, offset);
+		if (write)
+			error = ps3vram_write(dev, offset, len, &retlen, ptr);
+		else
+			error = ps3vram_read(dev, offset, len, &retlen, ptr);
+
+		if (error) {
+			dev_err(&dev->core, "%s failed\n", op);
+			goto out;
+		}
+
+		if (retlen != len) {
+			dev_err(&dev->core, "Short %s\n", op);
+			error = -EIO;
+			goto out;
+		}
+
+		offset += len;
+	}
+
+	dev_dbg(&dev->core, "%s completed\n", op);
+
+out:
+	spin_lock_irq(&priv->lock);
+	bio_list_pop(&priv->list);
+	next = bio_list_peek(&priv->list);
+	spin_unlock_irq(&priv->lock);
+
+	bio_endio(bio, error);
+	return next;
+}
+
+static int ps3vram_make_request(struct request_queue *q, struct bio *bio)
+{
+	struct ps3_system_bus_device *dev = q->queuedata;
+	struct ps3vram_priv *priv = ps3_system_bus_get_drvdata(dev);
+	int busy;
+
+	dev_dbg(&dev->core, "%s\n", __func__);
+
+	spin_lock_irq(&priv->lock);
+	busy = !bio_list_empty(&priv->list);
+	bio_list_add(&priv->list, bio);
+	spin_unlock_irq(&priv->lock);
+
+	if (busy)
+		return 0;
+
+	do {
+		bio = ps3vram_do_bio(dev, bio);
+	} while (bio);
+
+	return 0;
+}
+
+static int __devinit ps3vram_probe(struct ps3_system_bus_device *dev)
+{
+	struct ps3vram_priv *priv;
+	int error, status;
+	struct request_queue *queue;
+	struct gendisk *gendisk;
+	u64 ddr_size, ddr_lpar, ctrl_lpar, info_lpar, reports_lpar,
+	    reports_size, xdr_lpar;
+	char *rest;
+
+	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+	if (!priv) {
+		error = -ENOMEM;
+		goto fail;
+	}
+
+	spin_lock_init(&priv->lock);
+	bio_list_init(&priv->list);
+	ps3_system_bus_set_drvdata(dev, priv);
+
+	/* Allocate XDR buffer (1MiB aligned) */
+	priv->xdr_buf = (void *)__get_free_pages(GFP_KERNEL,
+		get_order(XDR_BUF_SIZE));
+	if (priv->xdr_buf == NULL) {
+		dev_err(&dev->core, "Could not allocate XDR buffer\n");
+		error = -ENOMEM;
+		goto fail_free_priv;
+	}
+
+	/* Put FIFO at begginning of XDR buffer */
+	priv->fifo_base = (u32 *) (priv->xdr_buf + FIFO_OFFSET);
+	priv->fifo_ptr = priv->fifo_base;
+
+	/* XXX: Need to open GPU, in case ps3fb or snd_ps3 aren't loaded */
+	if (ps3_open_hv_device(dev)) {
+		dev_err(&dev->core, "ps3_open_hv_device failed\n");
+		error = -EAGAIN;
+		goto out_free_xdr_buf;
+	}
+
+	/* Request memory */
+	status = -1;
+	ddr_size = ALIGN(memparse(size, &rest), 1024*1024);
+	if (!ddr_size) {
+		dev_err(&dev->core, "Specified size is too small\n");
+		error = -EINVAL;
+		goto out_close_gpu;
+	}
+
+	while (ddr_size > 0) {
+		status = lv1_gpu_memory_allocate(ddr_size, 0, 0, 0, 0,
+						 &priv->memory_handle,
+						 &ddr_lpar);
+		if (!status)
+			break;
+		ddr_size -= 1024*1024;
+	}
+	if (status) {
+		dev_err(&dev->core, "lv1_gpu_memory_allocate failed %d\n",
+			status);
+		error = -ENOMEM;
+		goto out_close_gpu;
+	}
+
+	/* Request context */
+	status = lv1_gpu_context_allocate(priv->memory_handle, 0,
+					  &priv->context_handle, &ctrl_lpar,
+					  &info_lpar, &reports_lpar,
+					  &reports_size);
+	if (status) {
+		dev_err(&dev->core, "lv1_gpu_context_allocate failed %d\n",
+			status);
+		error = -ENOMEM;
+		goto out_free_memory;
+	}
+
+	/* Map XDR buffer to RSX */
+	xdr_lpar = ps3_mm_phys_to_lpar(__pa(priv->xdr_buf));
+	status = lv1_gpu_context_iomap(priv->context_handle, XDR_IOIF,
+				       xdr_lpar, XDR_BUF_SIZE,
+				       CBE_IOPTE_PP_W | CBE_IOPTE_PP_R |
+				       CBE_IOPTE_M);
+	if (status) {
+		dev_err(&dev->core, "lv1_gpu_context_iomap failed %d\n",
+			status);
+		error = -ENOMEM;
+		goto out_free_context;
+	}
+
+	priv->ctrl = ioremap(ctrl_lpar, 64 * 1024);
+	if (!priv->ctrl) {
+		dev_err(&dev->core, "ioremap CTRL failed\n");
+		error = -ENOMEM;
+		goto out_unmap_context;
+	}
+
+	priv->reports = ioremap(reports_lpar, reports_size);
+	if (!priv->reports) {
+		dev_err(&dev->core, "ioremap REPORTS failed\n");
+		error = -ENOMEM;
+		goto out_unmap_ctrl;
+	}
+
+	mutex_lock(&ps3_gpu_mutex);
+	ps3vram_init_ring(dev);
+	mutex_unlock(&ps3_gpu_mutex);
+
+	priv->size = ddr_size;
+
+	ps3vram_bind(dev);
+
+	mutex_lock(&ps3_gpu_mutex);
+	error = ps3vram_wait_ring(dev, 100);
+	mutex_unlock(&ps3_gpu_mutex);
+	if (error < 0) {
+		dev_err(&dev->core, "Failed to initialize channels\n");
+		error = -ETIMEDOUT;
+		goto out_unmap_reports;
+	}
+
+	ps3vram_cache_init(dev);
+	ps3vram_proc_init(dev);
+
+	queue = blk_alloc_queue(GFP_KERNEL);
+	if (!queue) {
+		dev_err(&dev->core, "blk_alloc_queue failed\n");
+		error = -ENOMEM;
+		goto out_cache_cleanup;
+	}
+
+	priv->queue = queue;
+	queue->queuedata = dev;
+	blk_queue_make_request(queue, ps3vram_make_request);
+	blk_queue_max_segments(queue, BLK_MAX_SEGMENTS);
+	blk_queue_max_segment_size(queue, BLK_MAX_SEGMENT_SIZE);
+	blk_queue_max_hw_sectors(queue, BLK_SAFE_MAX_SECTORS);
+
+	gendisk = alloc_disk(1);
+	if (!gendisk) {
+		dev_err(&dev->core, "alloc_disk failed\n");
+		error = -ENOMEM;
+		goto fail_cleanup_queue;
+	}
+
+	priv->gendisk = gendisk;
+	gendisk->major = ps3vram_major;
+	gendisk->first_minor = 0;
+	gendisk->fops = &ps3vram_fops;
+	gendisk->queue = queue;
+	gendisk->private_data = dev;
+	gendisk->driverfs_dev = &dev->core;
+	strlcpy(gendisk->disk_name, DEVICE_NAME, sizeof(gendisk->disk_name));
+	set_capacity(gendisk, priv->size >> 9);
+
+	dev_info(&dev->core, "%s: Using %lu MiB of GPU memory\n",
+		 gendisk->disk_name, get_capacity(gendisk) >> 11);
+
+	add_disk(gendisk);
+	return 0;
+
+fail_cleanup_queue:
+	blk_cleanup_queue(queue);
+out_cache_cleanup:
+	remove_proc_entry(DEVICE_NAME, NULL);
+	ps3vram_cache_cleanup(dev);
+out_unmap_reports:
+	iounmap(priv->reports);
+out_unmap_ctrl:
+	iounmap(priv->ctrl);
+out_unmap_context:
+	lv1_gpu_context_iomap(priv->context_handle, XDR_IOIF, xdr_lpar,
+			      XDR_BUF_SIZE, CBE_IOPTE_M);
+out_free_context:
+	lv1_gpu_context_free(priv->context_handle);
+out_free_memory:
+	lv1_gpu_memory_free(priv->memory_handle);
+out_close_gpu:
+	ps3_close_hv_device(dev);
+out_free_xdr_buf:
+	free_pages((unsigned long) priv->xdr_buf, get_order(XDR_BUF_SIZE));
+fail_free_priv:
+	kfree(priv);
+	ps3_system_bus_set_drvdata(dev, NULL);
+fail:
+	return error;
+}
+
+static int ps3vram_remove(struct ps3_system_bus_device *dev)
+{
+	struct ps3vram_priv *priv = ps3_system_bus_get_drvdata(dev);
+
+	del_gendisk(priv->gendisk);
+	put_disk(priv->gendisk);
+	blk_cleanup_queue(priv->queue);
+	remove_proc_entry(DEVICE_NAME, NULL);
+	ps3vram_cache_cleanup(dev);
+	iounmap(priv->reports);
+	iounmap(priv->ctrl);
+	lv1_gpu_context_iomap(priv->context_handle, XDR_IOIF,
+			      ps3_mm_phys_to_lpar(__pa(priv->xdr_buf)),
+			      XDR_BUF_SIZE, CBE_IOPTE_M);
+	lv1_gpu_context_free(priv->context_handle);
+	lv1_gpu_memory_free(priv->memory_handle);
+	ps3_close_hv_device(dev);
+	free_pages((unsigned long) priv->xdr_buf, get_order(XDR_BUF_SIZE));
+	kfree(priv);
+	ps3_system_bus_set_drvdata(dev, NULL);
+	return 0;
+}
+
+static struct ps3_system_bus_driver ps3vram = {
+	.match_id	= PS3_MATCH_ID_GPU,
+	.match_sub_id	= PS3_MATCH_SUB_ID_GPU_RAMDISK,
+	.core.name	= DEVICE_NAME,
+	.core.owner	= THIS_MODULE,
+	.probe		= ps3vram_probe,
+	.remove		= ps3vram_remove,
+	.shutdown	= ps3vram_remove,
+};
+
+
+static int __init ps3vram_init(void)
+{
+	int error;
+
+	if (!firmware_has_feature(FW_FEATURE_PS3_LV1))
+		return -ENODEV;
+
+	error = register_blkdev(0, DEVICE_NAME);
+	if (error <= 0) {
+		pr_err("%s: register_blkdev failed %d\n", DEVICE_NAME, error);
+		return error;
+	}
+	ps3vram_major = error;
+
+	pr_info("%s: registered block device major %d\n", DEVICE_NAME,
+		ps3vram_major);
+
+	error = ps3_system_bus_driver_register(&ps3vram);
+	if (error)
+		unregister_blkdev(ps3vram_major, DEVICE_NAME);
+
+	return error;
+}
+
+static void __exit ps3vram_exit(void)
+{
+	ps3_system_bus_driver_unregister(&ps3vram);
+	unregister_blkdev(ps3vram_major, DEVICE_NAME);
+}
+
+module_init(ps3vram_init);
+module_exit(ps3vram_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("PS3 Video RAM Storage Driver");
+MODULE_AUTHOR("Sony Corporation");
+MODULE_ALIAS(PS3_MODULE_ALIAS_GPU_RAMDISK);
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
new file mode 100644
index 00000000..12780986
--- /dev/null
+++ b/drivers/block/rbd.c
@@ -0,0 +1,2499 @@
+/*
+   rbd.c -- Export ceph rados objects as a Linux block device
+
+
+   based on drivers/block/osdblk.c:
+
+   Copyright 2009 Red Hat, Inc.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; see the file COPYING.  If not, write to
+   the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+
+
+
+   For usage instructions, please refer to:
+
+                 Documentation/ABI/testing/sysfs-bus-rbd
+
+ */
+
+#include <linux/ceph/libceph.h>
+#include <linux/ceph/osd_client.h>
+#include <linux/ceph/mon_client.h>
+#include <linux/ceph/decode.h>
+#include <linux/parser.h>
+
+#include <linux/kernel.h>
+#include <linux/device.h>
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/blkdev.h>
+
+#include "rbd_types.h"
+
+#define DRV_NAME "rbd"
+#define DRV_NAME_LONG "rbd (rados block device)"
+
+#define RBD_MINORS_PER_MAJOR	256		/* max minors per blkdev */
+
+#define RBD_MAX_MD_NAME_LEN	(96 + sizeof(RBD_SUFFIX))
+#define RBD_MAX_POOL_NAME_LEN	64
+#define RBD_MAX_SNAP_NAME_LEN	32
+#define RBD_MAX_OPT_LEN		1024
+
+#define RBD_SNAP_HEAD_NAME	"-"
+
+#define DEV_NAME_LEN		32
+
+#define RBD_NOTIFY_TIMEOUT_DEFAULT 10
+
+/*
+ * block device image metadata (in-memory version)
+ */
+struct rbd_image_header {
+	u64 image_size;
+	char block_name[32];
+	__u8 obj_order;
+	__u8 crypt_type;
+	__u8 comp_type;
+	struct rw_semaphore snap_rwsem;
+	struct ceph_snap_context *snapc;
+	size_t snap_names_len;
+	u64 snap_seq;
+	u32 total_snaps;
+
+	char *snap_names;
+	u64 *snap_sizes;
+
+	u64 obj_version;
+};
+
+struct rbd_options {
+	int	notify_timeout;
+};
+
+/*
+ * an instance of the client.  multiple devices may share a client.
+ */
+struct rbd_client {
+	struct ceph_client	*client;
+	struct rbd_options	*rbd_opts;
+	struct kref		kref;
+	struct list_head	node;
+};
+
+struct rbd_req_coll;
+
+/*
+ * a single io request
+ */
+struct rbd_request {
+	struct request		*rq;		/* blk layer request */
+	struct bio		*bio;		/* cloned bio */
+	struct page		**pages;	/* list of used pages */
+	u64			len;
+	int			coll_index;
+	struct rbd_req_coll	*coll;
+};
+
+struct rbd_req_status {
+	int done;
+	int rc;
+	u64 bytes;
+};
+
+/*
+ * a collection of requests
+ */
+struct rbd_req_coll {
+	int			total;
+	int			num_done;
+	struct kref		kref;
+	struct rbd_req_status	status[0];
+};
+
+struct rbd_snap {
+	struct	device		dev;
+	const char		*name;
+	size_t			size;
+	struct list_head	node;
+	u64			id;
+};
+
+/*
+ * a single device
+ */
+struct rbd_device {
+	int			id;		/* blkdev unique id */
+
+	int			major;		/* blkdev assigned major */
+	struct gendisk		*disk;		/* blkdev's gendisk and rq */
+	struct request_queue	*q;
+
+	struct ceph_client	*client;
+	struct rbd_client	*rbd_client;
+
+	char			name[DEV_NAME_LEN]; /* blkdev name, e.g. rbd3 */
+
+	spinlock_t		lock;		/* queue lock */
+
+	struct rbd_image_header	header;
+	char			obj[RBD_MAX_OBJ_NAME_LEN]; /* rbd image name */
+	int			obj_len;
+	char			obj_md_name[RBD_MAX_MD_NAME_LEN]; /* hdr nm. */
+	char			pool_name[RBD_MAX_POOL_NAME_LEN];
+	int			poolid;
+
+	struct ceph_osd_event   *watch_event;
+	struct ceph_osd_request *watch_request;
+
+	char                    snap_name[RBD_MAX_SNAP_NAME_LEN];
+	u32 cur_snap;	/* index+1 of current snapshot within snap context
+			   0 - for the head */
+	int read_only;
+
+	struct list_head	node;
+
+	/* list of snapshots */
+	struct list_head	snaps;
+
+	/* sysfs related */
+	struct device		dev;
+};
+
+static struct bus_type rbd_bus_type = {
+	.name		= "rbd",
+};
+
+static spinlock_t node_lock;      /* protects client get/put */
+
+static DEFINE_MUTEX(ctl_mutex);	  /* Serialize open/close/setup/teardown */
+static LIST_HEAD(rbd_dev_list);    /* devices */
+static LIST_HEAD(rbd_client_list);      /* clients */
+
+static int __rbd_init_snaps_header(struct rbd_device *rbd_dev);
+static void rbd_dev_release(struct device *dev);
+static ssize_t rbd_snap_rollback(struct device *dev,
+				 struct device_attribute *attr,
+				 const char *buf,
+				 size_t size);
+static ssize_t rbd_snap_add(struct device *dev,
+			    struct device_attribute *attr,
+			    const char *buf,
+			    size_t count);
+static void __rbd_remove_snap_dev(struct rbd_device *rbd_dev,
+				  struct rbd_snap *snap);;
+
+
+static struct rbd_device *dev_to_rbd(struct device *dev)
+{
+	return container_of(dev, struct rbd_device, dev);
+}
+
+static struct device *rbd_get_dev(struct rbd_device *rbd_dev)
+{
+	return get_device(&rbd_dev->dev);
+}
+
+static void rbd_put_dev(struct rbd_device *rbd_dev)
+{
+	put_device(&rbd_dev->dev);
+}
+
+static int __rbd_update_snaps(struct rbd_device *rbd_dev);
+
+static int rbd_open(struct block_device *bdev, fmode_t mode)
+{
+	struct gendisk *disk = bdev->bd_disk;
+	struct rbd_device *rbd_dev = disk->private_data;
+
+	rbd_get_dev(rbd_dev);
+
+	set_device_ro(bdev, rbd_dev->read_only);
+
+	if ((mode & FMODE_WRITE) && rbd_dev->read_only)
+		return -EROFS;
+
+	return 0;
+}
+
+static int rbd_release(struct gendisk *disk, fmode_t mode)
+{
+	struct rbd_device *rbd_dev = disk->private_data;
+
+	rbd_put_dev(rbd_dev);
+
+	return 0;
+}
+
+static const struct block_device_operations rbd_bd_ops = {
+	.owner			= THIS_MODULE,
+	.open			= rbd_open,
+	.release		= rbd_release,
+};
+
+/*
+ * Initialize an rbd client instance.
+ * We own *opt.
+ */
+static struct rbd_client *rbd_client_create(struct ceph_options *opt,
+					    struct rbd_options *rbd_opts)
+{
+	struct rbd_client *rbdc;
+	int ret = -ENOMEM;
+
+	dout("rbd_client_create\n");
+	rbdc = kmalloc(sizeof(struct rbd_client), GFP_KERNEL);
+	if (!rbdc)
+		goto out_opt;
+
+	kref_init(&rbdc->kref);
+	INIT_LIST_HEAD(&rbdc->node);
+
+	rbdc->client = ceph_create_client(opt, rbdc);
+	if (IS_ERR(rbdc->client))
+		goto out_rbdc;
+	opt = NULL; /* Now rbdc->client is responsible for opt */
+
+	ret = ceph_open_session(rbdc->client);
+	if (ret < 0)
+		goto out_err;
+
+	rbdc->rbd_opts = rbd_opts;
+
+	spin_lock(&node_lock);
+	list_add_tail(&rbdc->node, &rbd_client_list);
+	spin_unlock(&node_lock);
+
+	dout("rbd_client_create created %p\n", rbdc);
+	return rbdc;
+
+out_err:
+	ceph_destroy_client(rbdc->client);
+out_rbdc:
+	kfree(rbdc);
+out_opt:
+	if (opt)
+		ceph_destroy_options(opt);
+	return ERR_PTR(ret);
+}
+
+/*
+ * Find a ceph client with specific addr and configuration.
+ */
+static struct rbd_client *__rbd_client_find(struct ceph_options *opt)
+{
+	struct rbd_client *client_node;
+
+	if (opt->flags & CEPH_OPT_NOSHARE)
+		return NULL;
+
+	list_for_each_entry(client_node, &rbd_client_list, node)
+		if (ceph_compare_options(opt, client_node->client) == 0)
+			return client_node;
+	return NULL;
+}
+
+/*
+ * mount options
+ */
+enum {
+	Opt_notify_timeout,
+	Opt_last_int,
+	/* int args above */
+	Opt_last_string,
+	/* string args above */
+};
+
+static match_table_t rbdopt_tokens = {
+	{Opt_notify_timeout, "notify_timeout=%d"},
+	/* int args above */
+	/* string args above */
+	{-1, NULL}
+};
+
+static int parse_rbd_opts_token(char *c, void *private)
+{
+	struct rbd_options *rbdopt = private;
+	substring_t argstr[MAX_OPT_ARGS];
+	int token, intval, ret;
+
+	token = match_token((char *)c, rbdopt_tokens, argstr);
+	if (token < 0)
+		return -EINVAL;
+
+	if (token < Opt_last_int) {
+		ret = match_int(&argstr[0], &intval);
+		if (ret < 0) {
+			pr_err("bad mount option arg (not int) "
+			       "at '%s'\n", c);
+			return ret;
+		}
+		dout("got int token %d val %d\n", token, intval);
+	} else if (token > Opt_last_int && token < Opt_last_string) {
+		dout("got string token %d val %s\n", token,
+		     argstr[0].from);
+	} else {
+		dout("got token %d\n", token);
+	}
+
+	switch (token) {
+	case Opt_notify_timeout:
+		rbdopt->notify_timeout = intval;
+		break;
+	default:
+		BUG_ON(token);
+	}
+	return 0;
+}
+
+/*
+ * Get a ceph client with specific addr and configuration, if one does
+ * not exist create it.
+ */
+static int rbd_get_client(struct rbd_device *rbd_dev, const char *mon_addr,
+			  char *options)
+{
+	struct rbd_client *rbdc;
+	struct ceph_options *opt;
+	int ret;
+	struct rbd_options *rbd_opts;
+
+	rbd_opts = kzalloc(sizeof(*rbd_opts), GFP_KERNEL);
+	if (!rbd_opts)
+		return -ENOMEM;
+
+	rbd_opts->notify_timeout = RBD_NOTIFY_TIMEOUT_DEFAULT;
+
+	ret = ceph_parse_options(&opt, options, mon_addr,
+				 mon_addr + strlen(mon_addr), parse_rbd_opts_token, rbd_opts);
+	if (ret < 0)
+		goto done_err;
+
+	spin_lock(&node_lock);
+	rbdc = __rbd_client_find(opt);
+	if (rbdc) {
+		ceph_destroy_options(opt);
+
+		/* using an existing client */
+		kref_get(&rbdc->kref);
+		rbd_dev->rbd_client = rbdc;
+		rbd_dev->client = rbdc->client;
+		spin_unlock(&node_lock);
+		return 0;
+	}
+	spin_unlock(&node_lock);
+
+	rbdc = rbd_client_create(opt, rbd_opts);
+	if (IS_ERR(rbdc)) {
+		ret = PTR_ERR(rbdc);
+		goto done_err;
+	}
+
+	rbd_dev->rbd_client = rbdc;
+	rbd_dev->client = rbdc->client;
+	return 0;
+done_err:
+	kfree(rbd_opts);
+	return ret;
+}
+
+/*
+ * Destroy ceph client
+ */
+static void rbd_client_release(struct kref *kref)
+{
+	struct rbd_client *rbdc = container_of(kref, struct rbd_client, kref);
+
+	dout("rbd_release_client %p\n", rbdc);
+	spin_lock(&node_lock);
+	list_del(&rbdc->node);
+	spin_unlock(&node_lock);
+
+	ceph_destroy_client(rbdc->client);
+	kfree(rbdc->rbd_opts);
+	kfree(rbdc);
+}
+
+/*
+ * Drop reference to ceph client node. If it's not referenced anymore, release
+ * it.
+ */
+static void rbd_put_client(struct rbd_device *rbd_dev)
+{
+	kref_put(&rbd_dev->rbd_client->kref, rbd_client_release);
+	rbd_dev->rbd_client = NULL;
+	rbd_dev->client = NULL;
+}
+
+/*
+ * Destroy requests collection
+ */
+static void rbd_coll_release(struct kref *kref)
+{
+	struct rbd_req_coll *coll =
+		container_of(kref, struct rbd_req_coll, kref);
+
+	dout("rbd_coll_release %p\n", coll);
+	kfree(coll);
+}
+
+/*
+ * Create a new header structure, translate header format from the on-disk
+ * header.
+ */
+static int rbd_header_from_disk(struct rbd_image_header *header,
+				 struct rbd_image_header_ondisk *ondisk,
+				 int allocated_snaps,
+				 gfp_t gfp_flags)
+{
+	int i;
+	u32 snap_count = le32_to_cpu(ondisk->snap_count);
+	int ret = -ENOMEM;
+
+	init_rwsem(&header->snap_rwsem);
+	header->snap_names_len = le64_to_cpu(ondisk->snap_names_len);
+	header->snapc = kmalloc(sizeof(struct ceph_snap_context) +
+				snap_count *
+				 sizeof(struct rbd_image_snap_ondisk),
+				gfp_flags);
+	if (!header->snapc)
+		return -ENOMEM;
+	if (snap_count) {
+		header->snap_names = kmalloc(header->snap_names_len,
+					     GFP_KERNEL);
+		if (!header->snap_names)
+			goto err_snapc;
+		header->snap_sizes = kmalloc(snap_count * sizeof(u64),
+					     GFP_KERNEL);
+		if (!header->snap_sizes)
+			goto err_names;
+	} else {
+		header->snap_names = NULL;
+		header->snap_sizes = NULL;
+	}
+	memcpy(header->block_name, ondisk->block_name,
+	       sizeof(ondisk->block_name));
+
+	header->image_size = le64_to_cpu(ondisk->image_size);
+	header->obj_order = ondisk->options.order;
+	header->crypt_type = ondisk->options.crypt_type;
+	header->comp_type = ondisk->options.comp_type;
+
+	atomic_set(&header->snapc->nref, 1);
+	header->snap_seq = le64_to_cpu(ondisk->snap_seq);
+	header->snapc->num_snaps = snap_count;
+	header->total_snaps = snap_count;
+
+	if (snap_count &&
+	    allocated_snaps == snap_count) {
+		for (i = 0; i < snap_count; i++) {
+			header->snapc->snaps[i] =
+				le64_to_cpu(ondisk->snaps[i].id);
+			header->snap_sizes[i] =
+				le64_to_cpu(ondisk->snaps[i].image_size);
+		}
+
+		/* copy snapshot names */
+		memcpy(header->snap_names, &ondisk->snaps[i],
+			header->snap_names_len);
+	}
+
+	return 0;
+
+err_names:
+	kfree(header->snap_names);
+err_snapc:
+	kfree(header->snapc);
+	return ret;
+}
+
+static int snap_index(struct rbd_image_header *header, int snap_num)
+{
+	return header->total_snaps - snap_num;
+}
+
+static u64 cur_snap_id(struct rbd_device *rbd_dev)
+{
+	struct rbd_image_header *header = &rbd_dev->header;
+
+	if (!rbd_dev->cur_snap)
+		return 0;
+
+	return header->snapc->snaps[snap_index(header, rbd_dev->cur_snap)];
+}
+
+static int snap_by_name(struct rbd_image_header *header, const char *snap_name,
+			u64 *seq, u64 *size)
+{
+	int i;
+	char *p = header->snap_names;
+
+	for (i = 0; i < header->total_snaps; i++, p += strlen(p) + 1) {
+		if (strcmp(snap_name, p) == 0)
+			break;
+	}
+	if (i == header->total_snaps)
+		return -ENOENT;
+	if (seq)
+		*seq = header->snapc->snaps[i];
+
+	if (size)
+		*size = header->snap_sizes[i];
+
+	return i;
+}
+
+static int rbd_header_set_snap(struct rbd_device *dev,
+			       const char *snap_name,
+			       u64 *size)
+{
+	struct rbd_image_header *header = &dev->header;
+	struct ceph_snap_context *snapc = header->snapc;
+	int ret = -ENOENT;
+
+	down_write(&header->snap_rwsem);
+
+	if (!snap_name ||
+	    !*snap_name ||
+	    strcmp(snap_name, "-") == 0 ||
+	    strcmp(snap_name, RBD_SNAP_HEAD_NAME) == 0) {
+		if (header->total_snaps)
+			snapc->seq = header->snap_seq;
+		else
+			snapc->seq = 0;
+		dev->cur_snap = 0;
+		dev->read_only = 0;
+		if (size)
+			*size = header->image_size;
+	} else {
+		ret = snap_by_name(header, snap_name, &snapc->seq, size);
+		if (ret < 0)
+			goto done;
+
+		dev->cur_snap = header->total_snaps - ret;
+		dev->read_only = 1;
+	}
+
+	ret = 0;
+done:
+	up_write(&header->snap_rwsem);
+	return ret;
+}
+
+static void rbd_header_free(struct rbd_image_header *header)
+{
+	kfree(header->snapc);
+	kfree(header->snap_names);
+	kfree(header->snap_sizes);
+}
+
+/*
+ * get the actual striped segment name, offset and length
+ */
+static u64 rbd_get_segment(struct rbd_image_header *header,
+			   const char *block_name,
+			   u64 ofs, u64 len,
+			   char *seg_name, u64 *segofs)
+{
+	u64 seg = ofs >> header->obj_order;
+
+	if (seg_name)
+		snprintf(seg_name, RBD_MAX_SEG_NAME_LEN,
+			 "%s.%012llx", block_name, seg);
+
+	ofs = ofs & ((1 << header->obj_order) - 1);
+	len = min_t(u64, len, (1 << header->obj_order) - ofs);
+
+	if (segofs)
+		*segofs = ofs;
+
+	return len;
+}
+
+static int rbd_get_num_segments(struct rbd_image_header *header,
+				u64 ofs, u64 len)
+{
+	u64 start_seg = ofs >> header->obj_order;
+	u64 end_seg = (ofs + len - 1) >> header->obj_order;
+	return end_seg - start_seg + 1;
+}
+
+/*
+ * bio helpers
+ */
+
+static void bio_chain_put(struct bio *chain)
+{
+	struct bio *tmp;
+
+	while (chain) {
+		tmp = chain;
+		chain = chain->bi_next;
+		bio_put(tmp);
+	}
+}
+
+/*
+ * zeros a bio chain, starting at specific offset
+ */
+static void zero_bio_chain(struct bio *chain, int start_ofs)
+{
+	struct bio_vec *bv;
+	unsigned long flags;
+	void *buf;
+	int i;
+	int pos = 0;
+
+	while (chain) {
+		bio_for_each_segment(bv, chain, i) {
+			if (pos + bv->bv_len > start_ofs) {
+				int remainder = max(start_ofs - pos, 0);
+				buf = bvec_kmap_irq(bv, &flags);
+				memset(buf + remainder, 0,
+				       bv->bv_len - remainder);
+				bvec_kunmap_irq(buf, &flags);
+			}
+			pos += bv->bv_len;
+		}
+
+		chain = chain->bi_next;
+	}
+}
+
+/*
+ * bio_chain_clone - clone a chain of bios up to a certain length.
+ * might return a bio_pair that will need to be released.
+ */
+static struct bio *bio_chain_clone(struct bio **old, struct bio **next,
+				   struct bio_pair **bp,
+				   int len, gfp_t gfpmask)
+{
+	struct bio *tmp, *old_chain = *old, *new_chain = NULL, *tail = NULL;
+	int total = 0;
+
+	if (*bp) {
+		bio_pair_release(*bp);
+		*bp = NULL;
+	}
+
+	while (old_chain && (total < len)) {
+		tmp = bio_kmalloc(gfpmask, old_chain->bi_max_vecs);
+		if (!tmp)
+			goto err_out;
+
+		if (total + old_chain->bi_size > len) {
+			struct bio_pair *bp;
+
+			/*
+			 * this split can only happen with a single paged bio,
+			 * split_bio will BUG_ON if this is not the case
+			 */
+			dout("bio_chain_clone split! total=%d remaining=%d"
+			     "bi_size=%d\n",
+			     (int)total, (int)len-total,
+			     (int)old_chain->bi_size);
+
+			/* split the bio. We'll release it either in the next
+			   call, or it will have to be released outside */
+			bp = bio_split(old_chain, (len - total) / 512ULL);
+			if (!bp)
+				goto err_out;
+
+			__bio_clone(tmp, &bp->bio1);
+
+			*next = &bp->bio2;
+		} else {
+			__bio_clone(tmp, old_chain);
+			*next = old_chain->bi_next;
+		}
+
+		tmp->bi_bdev = NULL;
+		gfpmask &= ~__GFP_WAIT;
+		tmp->bi_next = NULL;
+
+		if (!new_chain) {
+			new_chain = tail = tmp;
+		} else {
+			tail->bi_next = tmp;
+			tail = tmp;
+		}
+		old_chain = old_chain->bi_next;
+
+		total += tmp->bi_size;
+	}
+
+	BUG_ON(total < len);
+
+	if (tail)
+		tail->bi_next = NULL;
+
+	*old = old_chain;
+
+	return new_chain;
+
+err_out:
+	dout("bio_chain_clone with err\n");
+	bio_chain_put(new_chain);
+	return NULL;
+}
+
+/*
+ * helpers for osd request op vectors.
+ */
+static int rbd_create_rw_ops(struct ceph_osd_req_op **ops,
+			    int num_ops,
+			    int opcode,
+			    u32 payload_len)
+{
+	*ops = kzalloc(sizeof(struct ceph_osd_req_op) * (num_ops + 1),
+		       GFP_NOIO);
+	if (!*ops)
+		return -ENOMEM;
+	(*ops)[0].op = opcode;
+	/*
+	 * op extent offset and length will be set later on
+	 * in calc_raw_layout()
+	 */
+	(*ops)[0].payload_len = payload_len;
+	return 0;
+}
+
+static void rbd_destroy_ops(struct ceph_osd_req_op *ops)
+{
+	kfree(ops);
+}
+
+static void rbd_coll_end_req_index(struct request *rq,
+				   struct rbd_req_coll *coll,
+				   int index,
+				   int ret, u64 len)
+{
+	struct request_queue *q;
+	int min, max, i;
+
+	dout("rbd_coll_end_req_index %p index %d ret %d len %lld\n",
+	     coll, index, ret, len);
+
+	if (!rq)
+		return;
+
+	if (!coll) {
+		blk_end_request(rq, ret, len);
+		return;
+	}
+
+	q = rq->q;
+
+	spin_lock_irq(q->queue_lock);
+	coll->status[index].done = 1;
+	coll->status[index].rc = ret;
+	coll->status[index].bytes = len;
+	max = min = coll->num_done;
+	while (max < coll->total && coll->status[max].done)
+		max++;
+
+	for (i = min; i<max; i++) {
+		__blk_end_request(rq, coll->status[i].rc,
+				  coll->status[i].bytes);
+		coll->num_done++;
+		kref_put(&coll->kref, rbd_coll_release);
+	}
+	spin_unlock_irq(q->queue_lock);
+}
+
+static void rbd_coll_end_req(struct rbd_request *req,
+			     int ret, u64 len)
+{
+	rbd_coll_end_req_index(req->rq, req->coll, req->coll_index, ret, len);
+}
+
+/*
+ * Send ceph osd request
+ */
+static int rbd_do_request(struct request *rq,
+			  struct rbd_device *dev,
+			  struct ceph_snap_context *snapc,
+			  u64 snapid,
+			  const char *obj, u64 ofs, u64 len,
+			  struct bio *bio,
+			  struct page **pages,
+			  int num_pages,
+			  int flags,
+			  struct ceph_osd_req_op *ops,
+			  int num_reply,
+			  struct rbd_req_coll *coll,
+			  int coll_index,
+			  void (*rbd_cb)(struct ceph_osd_request *req,
+					 struct ceph_msg *msg),
+			  struct ceph_osd_request **linger_req,
+			  u64 *ver)
+{
+	struct ceph_osd_request *req;
+	struct ceph_file_layout *layout;
+	int ret;
+	u64 bno;
+	struct timespec mtime = CURRENT_TIME;
+	struct rbd_request *req_data;
+	struct ceph_osd_request_head *reqhead;
+	struct rbd_image_header *header = &dev->header;
+
+	req_data = kzalloc(sizeof(*req_data), GFP_NOIO);
+	if (!req_data) {
+		if (coll)
+			rbd_coll_end_req_index(rq, coll, coll_index,
+					       -ENOMEM, len);
+		return -ENOMEM;
+	}
+
+	if (coll) {
+		req_data->coll = coll;
+		req_data->coll_index = coll_index;
+	}
+
+	dout("rbd_do_request obj=%s ofs=%lld len=%lld\n", obj, len, ofs);
+
+	down_read(&header->snap_rwsem);
+
+	req = ceph_osdc_alloc_request(&dev->client->osdc, flags,
+				      snapc,
+				      ops,
+				      false,
+				      GFP_NOIO, pages, bio);
+	if (!req) {
+		up_read(&header->snap_rwsem);
+		ret = -ENOMEM;
+		goto done_pages;
+	}
+
+	req->r_callback = rbd_cb;
+
+	req_data->rq = rq;
+	req_data->bio = bio;
+	req_data->pages = pages;
+	req_data->len = len;
+
+	req->r_priv = req_data;
+
+	reqhead = req->r_request->front.iov_base;
+	reqhead->snapid = cpu_to_le64(CEPH_NOSNAP);
+
+	strncpy(req->r_oid, obj, sizeof(req->r_oid));
+	req->r_oid_len = strlen(req->r_oid);
+
+	layout = &req->r_file_layout;
+	memset(layout, 0, sizeof(*layout));
+	layout->fl_stripe_unit = cpu_to_le32(1 << RBD_MAX_OBJ_ORDER);
+	layout->fl_stripe_count = cpu_to_le32(1);
+	layout->fl_object_size = cpu_to_le32(1 << RBD_MAX_OBJ_ORDER);
+	layout->fl_pg_preferred = cpu_to_le32(-1);
+	layout->fl_pg_pool = cpu_to_le32(dev->poolid);
+	ceph_calc_raw_layout(&dev->client->osdc, layout, snapid,
+			     ofs, &len, &bno, req, ops);
+
+	ceph_osdc_build_request(req, ofs, &len,
+				ops,
+				snapc,
+				&mtime,
+				req->r_oid, req->r_oid_len);
+	up_read(&header->snap_rwsem);
+
+	if (linger_req) {
+		ceph_osdc_set_request_linger(&dev->client->osdc, req);
+		*linger_req = req;
+	}
+
+	ret = ceph_osdc_start_request(&dev->client->osdc, req, false);
+	if (ret < 0)
+		goto done_err;
+
+	if (!rbd_cb) {
+		ret = ceph_osdc_wait_request(&dev->client->osdc, req);
+		if (ver)
+			*ver = le64_to_cpu(req->r_reassert_version.version);
+		dout("reassert_ver=%lld\n",
+		     le64_to_cpu(req->r_reassert_version.version));
+		ceph_osdc_put_request(req);
+	}
+	return ret;
+
+done_err:
+	bio_chain_put(req_data->bio);
+	ceph_osdc_put_request(req);
+done_pages:
+	rbd_coll_end_req(req_data, ret, len);
+	kfree(req_data);
+	return ret;
+}
+
+/*
+ * Ceph osd op callback
+ */
+static void rbd_req_cb(struct ceph_osd_request *req, struct ceph_msg *msg)
+{
+	struct rbd_request *req_data = req->r_priv;
+	struct ceph_osd_reply_head *replyhead;
+	struct ceph_osd_op *op;
+	__s32 rc;
+	u64 bytes;
+	int read_op;
+
+	/* parse reply */
+	replyhead = msg->front.iov_base;
+	WARN_ON(le32_to_cpu(replyhead->num_ops) == 0);
+	op = (void *)(replyhead + 1);
+	rc = le32_to_cpu(replyhead->result);
+	bytes = le64_to_cpu(op->extent.length);
+	read_op = (le32_to_cpu(op->op) == CEPH_OSD_OP_READ);
+
+	dout("rbd_req_cb bytes=%lld readop=%d rc=%d\n", bytes, read_op, rc);
+
+	if (rc == -ENOENT && read_op) {
+		zero_bio_chain(req_data->bio, 0);
+		rc = 0;
+	} else if (rc == 0 && read_op && bytes < req_data->len) {
+		zero_bio_chain(req_data->bio, bytes);
+		bytes = req_data->len;
+	}
+
+	rbd_coll_end_req(req_data, rc, bytes);
+
+	if (req_data->bio)
+		bio_chain_put(req_data->bio);
+
+	ceph_osdc_put_request(req);
+	kfree(req_data);
+}
+
+static void rbd_simple_req_cb(struct ceph_osd_request *req, struct ceph_msg *msg)
+{
+	ceph_osdc_put_request(req);
+}
+
+/*
+ * Do a synchronous ceph osd operation
+ */
+static int rbd_req_sync_op(struct rbd_device *dev,
+			   struct ceph_snap_context *snapc,
+			   u64 snapid,
+			   int opcode,
+			   int flags,
+			   struct ceph_osd_req_op *orig_ops,
+			   int num_reply,
+			   const char *obj,
+			   u64 ofs, u64 len,
+			   char *buf,
+			   struct ceph_osd_request **linger_req,
+			   u64 *ver)
+{
+	int ret;
+	struct page **pages;
+	int num_pages;
+	struct ceph_osd_req_op *ops = orig_ops;
+	u32 payload_len;
+
+	num_pages = calc_pages_for(ofs , len);
+	pages = ceph_alloc_page_vector(num_pages, GFP_KERNEL);
+	if (IS_ERR(pages))
+		return PTR_ERR(pages);
+
+	if (!orig_ops) {
+		payload_len = (flags & CEPH_OSD_FLAG_WRITE ? len : 0);
+		ret = rbd_create_rw_ops(&ops, 1, opcode, payload_len);
+		if (ret < 0)
+			goto done;
+
+		if ((flags & CEPH_OSD_FLAG_WRITE) && buf) {
+			ret = ceph_copy_to_page_vector(pages, buf, ofs, len);
+			if (ret < 0)
+				goto done_ops;
+		}
+	}
+
+	ret = rbd_do_request(NULL, dev, snapc, snapid,
+			  obj, ofs, len, NULL,
+			  pages, num_pages,
+			  flags,
+			  ops,
+			  2,
+			  NULL, 0,
+			  NULL,
+			  linger_req, ver);
+	if (ret < 0)
+		goto done_ops;
+
+	if ((flags & CEPH_OSD_FLAG_READ) && buf)
+		ret = ceph_copy_from_page_vector(pages, buf, ofs, ret);
+
+done_ops:
+	if (!orig_ops)
+		rbd_destroy_ops(ops);
+done:
+	ceph_release_page_vector(pages, num_pages);
+	return ret;
+}
+
+/*
+ * Do an asynchronous ceph osd operation
+ */
+static int rbd_do_op(struct request *rq,
+		     struct rbd_device *rbd_dev ,
+		     struct ceph_snap_context *snapc,
+		     u64 snapid,
+		     int opcode, int flags, int num_reply,
+		     u64 ofs, u64 len,
+		     struct bio *bio,
+		     struct rbd_req_coll *coll,
+		     int coll_index)
+{
+	char *seg_name;
+	u64 seg_ofs;
+	u64 seg_len;
+	int ret;
+	struct ceph_osd_req_op *ops;
+	u32 payload_len;
+
+	seg_name = kmalloc(RBD_MAX_SEG_NAME_LEN + 1, GFP_NOIO);
+	if (!seg_name)
+		return -ENOMEM;
+
+	seg_len = rbd_get_segment(&rbd_dev->header,
+				  rbd_dev->header.block_name,
+				  ofs, len,
+				  seg_name, &seg_ofs);
+
+	payload_len = (flags & CEPH_OSD_FLAG_WRITE ? seg_len : 0);
+
+	ret = rbd_create_rw_ops(&ops, 1, opcode, payload_len);
+	if (ret < 0)
+		goto done;
+
+	/* we've taken care of segment sizes earlier when we
+	   cloned the bios. We should never have a segment
+	   truncated at this point */
+	BUG_ON(seg_len < len);
+
+	ret = rbd_do_request(rq, rbd_dev, snapc, snapid,
+			     seg_name, seg_ofs, seg_len,
+			     bio,
+			     NULL, 0,
+			     flags,
+			     ops,
+			     num_reply,
+			     coll, coll_index,
+			     rbd_req_cb, 0, NULL);
+
+	rbd_destroy_ops(ops);
+done:
+	kfree(seg_name);
+	return ret;
+}
+
+/*
+ * Request async osd write
+ */
+static int rbd_req_write(struct request *rq,
+			 struct rbd_device *rbd_dev,
+			 struct ceph_snap_context *snapc,
+			 u64 ofs, u64 len,
+			 struct bio *bio,
+			 struct rbd_req_coll *coll,
+			 int coll_index)
+{
+	return rbd_do_op(rq, rbd_dev, snapc, CEPH_NOSNAP,
+			 CEPH_OSD_OP_WRITE,
+			 CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK,
+			 2,
+			 ofs, len, bio, coll, coll_index);
+}
+
+/*
+ * Request async osd read
+ */
+static int rbd_req_read(struct request *rq,
+			 struct rbd_device *rbd_dev,
+			 u64 snapid,
+			 u64 ofs, u64 len,
+			 struct bio *bio,
+			 struct rbd_req_coll *coll,
+			 int coll_index)
+{
+	return rbd_do_op(rq, rbd_dev, NULL,
+			 (snapid ? snapid : CEPH_NOSNAP),
+			 CEPH_OSD_OP_READ,
+			 CEPH_OSD_FLAG_READ,
+			 2,
+			 ofs, len, bio, coll, coll_index);
+}
+
+/*
+ * Request sync osd read
+ */
+static int rbd_req_sync_read(struct rbd_device *dev,
+			  struct ceph_snap_context *snapc,
+			  u64 snapid,
+			  const char *obj,
+			  u64 ofs, u64 len,
+			  char *buf,
+			  u64 *ver)
+{
+	return rbd_req_sync_op(dev, NULL,
+			       (snapid ? snapid : CEPH_NOSNAP),
+			       CEPH_OSD_OP_READ,
+			       CEPH_OSD_FLAG_READ,
+			       NULL,
+			       1, obj, ofs, len, buf, NULL, ver);
+}
+
+/*
+ * Request sync osd watch
+ */
+static int rbd_req_sync_notify_ack(struct rbd_device *dev,
+				   u64 ver,
+				   u64 notify_id,
+				   const char *obj)
+{
+	struct ceph_osd_req_op *ops;
+	struct page **pages = NULL;
+	int ret;
+
+	ret = rbd_create_rw_ops(&ops, 1, CEPH_OSD_OP_NOTIFY_ACK, 0);
+	if (ret < 0)
+		return ret;
+
+	ops[0].watch.ver = cpu_to_le64(dev->header.obj_version);
+	ops[0].watch.cookie = notify_id;
+	ops[0].watch.flag = 0;
+
+	ret = rbd_do_request(NULL, dev, NULL, CEPH_NOSNAP,
+			  obj, 0, 0, NULL,
+			  pages, 0,
+			  CEPH_OSD_FLAG_READ,
+			  ops,
+			  1,
+			  NULL, 0,
+			  rbd_simple_req_cb, 0, NULL);
+
+	rbd_destroy_ops(ops);
+	return ret;
+}
+
+static void rbd_watch_cb(u64 ver, u64 notify_id, u8 opcode, void *data)
+{
+	struct rbd_device *dev = (struct rbd_device *)data;
+	int rc;
+
+	if (!dev)
+		return;
+
+	dout("rbd_watch_cb %s notify_id=%lld opcode=%d\n", dev->obj_md_name,
+		notify_id, (int)opcode);
+	mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
+	rc = __rbd_update_snaps(dev);
+	mutex_unlock(&ctl_mutex);
+	if (rc)
+		pr_warning(DRV_NAME "%d got notification but failed to update"
+			   " snaps: %d\n", dev->major, rc);
+
+	rbd_req_sync_notify_ack(dev, ver, notify_id, dev->obj_md_name);
+}
+
+/*
+ * Request sync osd watch
+ */
+static int rbd_req_sync_watch(struct rbd_device *dev,
+			      const char *obj,
+			      u64 ver)
+{
+	struct ceph_osd_req_op *ops;
+	struct ceph_osd_client *osdc = &dev->client->osdc;
+
+	int ret = rbd_create_rw_ops(&ops, 1, CEPH_OSD_OP_WATCH, 0);
+	if (ret < 0)
+		return ret;
+
+	ret = ceph_osdc_create_event(osdc, rbd_watch_cb, 0,
+				     (void *)dev, &dev->watch_event);
+	if (ret < 0)
+		goto fail;
+
+	ops[0].watch.ver = cpu_to_le64(ver);
+	ops[0].watch.cookie = cpu_to_le64(dev->watch_event->cookie);
+	ops[0].watch.flag = 1;
+
+	ret = rbd_req_sync_op(dev, NULL,
+			      CEPH_NOSNAP,
+			      0,
+			      CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK,
+			      ops,
+			      1, obj, 0, 0, NULL,
+			      &dev->watch_request, NULL);
+
+	if (ret < 0)
+		goto fail_event;
+
+	rbd_destroy_ops(ops);
+	return 0;
+
+fail_event:
+	ceph_osdc_cancel_event(dev->watch_event);
+	dev->watch_event = NULL;
+fail:
+	rbd_destroy_ops(ops);
+	return ret;
+}
+
+struct rbd_notify_info {
+	struct rbd_device *dev;
+};
+
+static void rbd_notify_cb(u64 ver, u64 notify_id, u8 opcode, void *data)
+{
+	struct rbd_device *dev = (struct rbd_device *)data;
+	if (!dev)
+		return;
+
+	dout("rbd_notify_cb %s notify_id=%lld opcode=%d\n", dev->obj_md_name,
+		notify_id, (int)opcode);
+}
+
+/*
+ * Request sync osd notify
+ */
+static int rbd_req_sync_notify(struct rbd_device *dev,
+		          const char *obj)
+{
+	struct ceph_osd_req_op *ops;
+	struct ceph_osd_client *osdc = &dev->client->osdc;
+	struct ceph_osd_event *event;
+	struct rbd_notify_info info;
+	int payload_len = sizeof(u32) + sizeof(u32);
+	int ret;
+
+	ret = rbd_create_rw_ops(&ops, 1, CEPH_OSD_OP_NOTIFY, payload_len);
+	if (ret < 0)
+		return ret;
+
+	info.dev = dev;
+
+	ret = ceph_osdc_create_event(osdc, rbd_notify_cb, 1,
+				     (void *)&info, &event);
+	if (ret < 0)
+		goto fail;
+
+	ops[0].watch.ver = 1;
+	ops[0].watch.flag = 1;
+	ops[0].watch.cookie = event->cookie;
+	ops[0].watch.prot_ver = RADOS_NOTIFY_VER;
+	ops[0].watch.timeout = 12;
+
+	ret = rbd_req_sync_op(dev, NULL,
+			       CEPH_NOSNAP,
+			       0,
+			       CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK,
+			       ops,
+			       1, obj, 0, 0, NULL, NULL, NULL);
+	if (ret < 0)
+		goto fail_event;
+
+	ret = ceph_osdc_wait_event(event, CEPH_OSD_TIMEOUT_DEFAULT);
+	dout("ceph_osdc_wait_event returned %d\n", ret);
+	rbd_destroy_ops(ops);
+	return 0;
+
+fail_event:
+	ceph_osdc_cancel_event(event);
+fail:
+	rbd_destroy_ops(ops);
+	return ret;
+}
+
+/*
+ * Request sync osd rollback
+ */
+static int rbd_req_sync_rollback_obj(struct rbd_device *dev,
+				     u64 snapid,
+				     const char *obj)
+{
+	struct ceph_osd_req_op *ops;
+	int ret = rbd_create_rw_ops(&ops, 1, CEPH_OSD_OP_ROLLBACK, 0);
+	if (ret < 0)
+		return ret;
+
+	ops[0].snap.snapid = snapid;
+
+	ret = rbd_req_sync_op(dev, NULL,
+			       CEPH_NOSNAP,
+			       0,
+			       CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK,
+			       ops,
+			       1, obj, 0, 0, NULL, NULL, NULL);
+
+	rbd_destroy_ops(ops);
+
+	return ret;
+}
+
+/*
+ * Request sync osd read
+ */
+static int rbd_req_sync_exec(struct rbd_device *dev,
+			     const char *obj,
+			     const char *cls,
+			     const char *method,
+			     const char *data,
+			     int len,
+			     u64 *ver)
+{
+	struct ceph_osd_req_op *ops;
+	int cls_len = strlen(cls);
+	int method_len = strlen(method);
+	int ret = rbd_create_rw_ops(&ops, 1, CEPH_OSD_OP_CALL,
+				    cls_len + method_len + len);
+	if (ret < 0)
+		return ret;
+
+	ops[0].cls.class_name = cls;
+	ops[0].cls.class_len = (__u8)cls_len;
+	ops[0].cls.method_name = method;
+	ops[0].cls.method_len = (__u8)method_len;
+	ops[0].cls.argc = 0;
+	ops[0].cls.indata = data;
+	ops[0].cls.indata_len = len;
+
+	ret = rbd_req_sync_op(dev, NULL,
+			       CEPH_NOSNAP,
+			       0,
+			       CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK,
+			       ops,
+			       1, obj, 0, 0, NULL, NULL, ver);
+
+	rbd_destroy_ops(ops);
+
+	dout("cls_exec returned %d\n", ret);
+	return ret;
+}
+
+static struct rbd_req_coll *rbd_alloc_coll(int num_reqs)
+{
+	struct rbd_req_coll *coll =
+			kzalloc(sizeof(struct rbd_req_coll) +
+			        sizeof(struct rbd_req_status) * num_reqs,
+				GFP_ATOMIC);
+
+	if (!coll)
+		return NULL;
+	coll->total = num_reqs;
+	kref_init(&coll->kref);
+	return coll;
+}
+
+/*
+ * block device queue callback
+ */
+static void rbd_rq_fn(struct request_queue *q)
+{
+	struct rbd_device *rbd_dev = q->queuedata;
+	struct request *rq;
+	struct bio_pair *bp = NULL;
+
+	rq = blk_fetch_request(q);
+
+	while (1) {
+		struct bio *bio;
+		struct bio *rq_bio, *next_bio = NULL;
+		bool do_write;
+		int size, op_size = 0;
+		u64 ofs;
+		int num_segs, cur_seg = 0;
+		struct rbd_req_coll *coll;
+
+		/* peek at request from block layer */
+		if (!rq)
+			break;
+
+		dout("fetched request\n");
+
+		/* filter out block requests we don't understand */
+		if ((rq->cmd_type != REQ_TYPE_FS)) {
+			__blk_end_request_all(rq, 0);
+			goto next;
+		}
+
+		/* deduce our operation (read, write) */
+		do_write = (rq_data_dir(rq) == WRITE);
+
+		size = blk_rq_bytes(rq);
+		ofs = blk_rq_pos(rq) * 512ULL;
+		rq_bio = rq->bio;
+		if (do_write && rbd_dev->read_only) {
+			__blk_end_request_all(rq, -EROFS);
+			goto next;
+		}
+
+		spin_unlock_irq(q->queue_lock);
+
+		dout("%s 0x%x bytes at 0x%llx\n",
+		     do_write ? "write" : "read",
+		     size, blk_rq_pos(rq) * 512ULL);
+
+		num_segs = rbd_get_num_segments(&rbd_dev->header, ofs, size);
+		coll = rbd_alloc_coll(num_segs);
+		if (!coll) {
+			spin_lock_irq(q->queue_lock);
+			__blk_end_request_all(rq, -ENOMEM);
+			goto next;
+		}
+
+		do {
+			/* a bio clone to be passed down to OSD req */
+			dout("rq->bio->bi_vcnt=%d\n", rq->bio->bi_vcnt);
+			op_size = rbd_get_segment(&rbd_dev->header,
+						  rbd_dev->header.block_name,
+						  ofs, size,
+						  NULL, NULL);
+			kref_get(&coll->kref);
+			bio = bio_chain_clone(&rq_bio, &next_bio, &bp,
+					      op_size, GFP_ATOMIC);
+			if (!bio) {
+				rbd_coll_end_req_index(rq, coll, cur_seg,
+						       -ENOMEM, op_size);
+				goto next_seg;
+			}
+
+
+			/* init OSD command: write or read */
+			if (do_write)
+				rbd_req_write(rq, rbd_dev,
+					      rbd_dev->header.snapc,
+					      ofs,
+					      op_size, bio,
+					      coll, cur_seg);
+			else
+				rbd_req_read(rq, rbd_dev,
+					     cur_snap_id(rbd_dev),
+					     ofs,
+					     op_size, bio,
+					     coll, cur_seg);
+
+next_seg:
+			size -= op_size;
+			ofs += op_size;
+
+			cur_seg++;
+			rq_bio = next_bio;
+		} while (size > 0);
+		kref_put(&coll->kref, rbd_coll_release);
+
+		if (bp)
+			bio_pair_release(bp);
+		spin_lock_irq(q->queue_lock);
+next:
+		rq = blk_fetch_request(q);
+	}
+}
+
+/*
+ * a queue callback. Makes sure that we don't create a bio that spans across
+ * multiple osd objects. One exception would be with a single page bios,
+ * which we handle later at bio_chain_clone
+ */
+static int rbd_merge_bvec(struct request_queue *q, struct bvec_merge_data *bmd,
+			  struct bio_vec *bvec)
+{
+	struct rbd_device *rbd_dev = q->queuedata;
+	unsigned int chunk_sectors = 1 << (rbd_dev->header.obj_order - 9);
+	sector_t sector = bmd->bi_sector + get_start_sect(bmd->bi_bdev);
+	unsigned int bio_sectors = bmd->bi_size >> 9;
+	int max;
+
+	max =  (chunk_sectors - ((sector & (chunk_sectors - 1))
+				 + bio_sectors)) << 9;
+	if (max < 0)
+		max = 0; /* bio_add cannot handle a negative return */
+	if (max <= bvec->bv_len && bio_sectors == 0)
+		return bvec->bv_len;
+	return max;
+}
+
+static void rbd_free_disk(struct rbd_device *rbd_dev)
+{
+	struct gendisk *disk = rbd_dev->disk;
+
+	if (!disk)
+		return;
+
+	rbd_header_free(&rbd_dev->header);
+
+	if (disk->flags & GENHD_FL_UP)
+		del_gendisk(disk);
+	if (disk->queue)
+		blk_cleanup_queue(disk->queue);
+	put_disk(disk);
+}
+
+/*
+ * reload the ondisk the header 
+ */
+static int rbd_read_header(struct rbd_device *rbd_dev,
+			   struct rbd_image_header *header)
+{
+	ssize_t rc;
+	struct rbd_image_header_ondisk *dh;
+	int snap_count = 0;
+	u64 snap_names_len = 0;
+	u64 ver;
+
+	while (1) {
+		int len = sizeof(*dh) +
+			  snap_count * sizeof(struct rbd_image_snap_ondisk) +
+			  snap_names_len;
+
+		rc = -ENOMEM;
+		dh = kmalloc(len, GFP_KERNEL);
+		if (!dh)
+			return -ENOMEM;
+
+		rc = rbd_req_sync_read(rbd_dev,
+				       NULL, CEPH_NOSNAP,
+				       rbd_dev->obj_md_name,
+				       0, len,
+				       (char *)dh, &ver);
+		if (rc < 0)
+			goto out_dh;
+
+		rc = rbd_header_from_disk(header, dh, snap_count, GFP_KERNEL);
+		if (rc < 0)
+			goto out_dh;
+
+		if (snap_count != header->total_snaps) {
+			snap_count = header->total_snaps;
+			snap_names_len = header->snap_names_len;
+			rbd_header_free(header);
+			kfree(dh);
+			continue;
+		}
+		break;
+	}
+	header->obj_version = ver;
+
+out_dh:
+	kfree(dh);
+	return rc;
+}
+
+/*
+ * create a snapshot
+ */
+static int rbd_header_add_snap(struct rbd_device *dev,
+			       const char *snap_name,
+			       gfp_t gfp_flags)
+{
+	int name_len = strlen(snap_name);
+	u64 new_snapid;
+	int ret;
+	void *data, *p, *e;
+	u64 ver;
+
+	/* we should create a snapshot only if we're pointing at the head */
+	if (dev->cur_snap)
+		return -EINVAL;
+
+	ret = ceph_monc_create_snapid(&dev->client->monc, dev->poolid,
+				      &new_snapid);
+	dout("created snapid=%lld\n", new_snapid);
+	if (ret < 0)
+		return ret;
+
+	data = kmalloc(name_len + 16, gfp_flags);
+	if (!data)
+		return -ENOMEM;
+
+	p = data;
+	e = data + name_len + 16;
+
+	ceph_encode_string_safe(&p, e, snap_name, name_len, bad);
+	ceph_encode_64_safe(&p, e, new_snapid, bad);
+
+	ret = rbd_req_sync_exec(dev, dev->obj_md_name, "rbd", "snap_add",
+				data, p - data, &ver);
+
+	kfree(data);
+
+	if (ret < 0)
+		return ret;
+
+	dev->header.snapc->seq =  new_snapid;
+
+	return 0;
+bad:
+	return -ERANGE;
+}
+
+static void __rbd_remove_all_snaps(struct rbd_device *rbd_dev)
+{
+	struct rbd_snap *snap;
+
+	while (!list_empty(&rbd_dev->snaps)) {
+		snap = list_first_entry(&rbd_dev->snaps, struct rbd_snap, node);
+		__rbd_remove_snap_dev(rbd_dev, snap);
+	}
+}
+
+/*
+ * only read the first part of the ondisk header, without the snaps info
+ */
+static int __rbd_update_snaps(struct rbd_device *rbd_dev)
+{
+	int ret;
+	struct rbd_image_header h;
+	u64 snap_seq;
+	int follow_seq = 0;
+
+	ret = rbd_read_header(rbd_dev, &h);
+	if (ret < 0)
+		return ret;
+
+	/* resized? */
+	set_capacity(rbd_dev->disk, h.image_size / 512ULL);
+
+	down_write(&rbd_dev->header.snap_rwsem);
+
+	snap_seq = rbd_dev->header.snapc->seq;
+	if (rbd_dev->header.total_snaps &&
+	    rbd_dev->header.snapc->snaps[0] == snap_seq)
+		/* pointing at the head, will need to follow that
+		   if head moves */
+		follow_seq = 1;
+
+	kfree(rbd_dev->header.snapc);
+	kfree(rbd_dev->header.snap_names);
+	kfree(rbd_dev->header.snap_sizes);
+
+	rbd_dev->header.total_snaps = h.total_snaps;
+	rbd_dev->header.snapc = h.snapc;
+	rbd_dev->header.snap_names = h.snap_names;
+	rbd_dev->header.snap_names_len = h.snap_names_len;
+	rbd_dev->header.snap_sizes = h.snap_sizes;
+	if (follow_seq)
+		rbd_dev->header.snapc->seq = rbd_dev->header.snapc->snaps[0];
+	else
+		rbd_dev->header.snapc->seq = snap_seq;
+
+	ret = __rbd_init_snaps_header(rbd_dev);
+
+	up_write(&rbd_dev->header.snap_rwsem);
+
+	return ret;
+}
+
+static int rbd_init_disk(struct rbd_device *rbd_dev)
+{
+	struct gendisk *disk;
+	struct request_queue *q;
+	int rc;
+	u64 total_size = 0;
+
+	/* contact OSD, request size info about the object being mapped */
+	rc = rbd_read_header(rbd_dev, &rbd_dev->header);
+	if (rc)
+		return rc;
+
+	/* no need to lock here, as rbd_dev is not registered yet */
+	rc = __rbd_init_snaps_header(rbd_dev);
+	if (rc)
+		return rc;
+
+	rc = rbd_header_set_snap(rbd_dev, rbd_dev->snap_name, &total_size);
+	if (rc)
+		return rc;
+
+	/* create gendisk info */
+	rc = -ENOMEM;
+	disk = alloc_disk(RBD_MINORS_PER_MAJOR);
+	if (!disk)
+		goto out;
+
+	snprintf(disk->disk_name, sizeof(disk->disk_name), DRV_NAME "%d",
+		 rbd_dev->id);
+	disk->major = rbd_dev->major;
+	disk->first_minor = 0;
+	disk->fops = &rbd_bd_ops;
+	disk->private_data = rbd_dev;
+
+	/* init rq */
+	rc = -ENOMEM;
+	q = blk_init_queue(rbd_rq_fn, &rbd_dev->lock);
+	if (!q)
+		goto out_disk;
+	blk_queue_merge_bvec(q, rbd_merge_bvec);
+	disk->queue = q;
+
+	q->queuedata = rbd_dev;
+
+	rbd_dev->disk = disk;
+	rbd_dev->q = q;
+
+	/* finally, announce the disk to the world */
+	set_capacity(disk, total_size / 512ULL);
+	add_disk(disk);
+
+	pr_info("%s: added with size 0x%llx\n",
+		disk->disk_name, (unsigned long long)total_size);
+	return 0;
+
+out_disk:
+	put_disk(disk);
+out:
+	return rc;
+}
+
+/*
+  sysfs
+*/
+
+static ssize_t rbd_size_show(struct device *dev,
+			     struct device_attribute *attr, char *buf)
+{
+	struct rbd_device *rbd_dev = dev_to_rbd(dev);
+
+	return sprintf(buf, "%llu\n", (unsigned long long)rbd_dev->header.image_size);
+}
+
+static ssize_t rbd_major_show(struct device *dev,
+			      struct device_attribute *attr, char *buf)
+{
+	struct rbd_device *rbd_dev = dev_to_rbd(dev);
+
+	return sprintf(buf, "%d\n", rbd_dev->major);
+}
+
+static ssize_t rbd_client_id_show(struct device *dev,
+				  struct device_attribute *attr, char *buf)
+{
+	struct rbd_device *rbd_dev = dev_to_rbd(dev);
+
+	return sprintf(buf, "client%lld\n", ceph_client_id(rbd_dev->client));
+}
+
+static ssize_t rbd_pool_show(struct device *dev,
+			     struct device_attribute *attr, char *buf)
+{
+	struct rbd_device *rbd_dev = dev_to_rbd(dev);
+
+	return sprintf(buf, "%s\n", rbd_dev->pool_name);
+}
+
+static ssize_t rbd_name_show(struct device *dev,
+			     struct device_attribute *attr, char *buf)
+{
+	struct rbd_device *rbd_dev = dev_to_rbd(dev);
+
+	return sprintf(buf, "%s\n", rbd_dev->obj);
+}
+
+static ssize_t rbd_snap_show(struct device *dev,
+			     struct device_attribute *attr,
+			     char *buf)
+{
+	struct rbd_device *rbd_dev = dev_to_rbd(dev);
+
+	return sprintf(buf, "%s\n", rbd_dev->snap_name);
+}
+
+static ssize_t rbd_image_refresh(struct device *dev,
+				 struct device_attribute *attr,
+				 const char *buf,
+				 size_t size)
+{
+	struct rbd_device *rbd_dev = dev_to_rbd(dev);
+	int rc;
+	int ret = size;
+
+	mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
+
+	rc = __rbd_update_snaps(rbd_dev);
+	if (rc < 0)
+		ret = rc;
+
+	mutex_unlock(&ctl_mutex);
+	return ret;
+}
+
+static DEVICE_ATTR(size, S_IRUGO, rbd_size_show, NULL);
+static DEVICE_ATTR(major, S_IRUGO, rbd_major_show, NULL);
+static DEVICE_ATTR(client_id, S_IRUGO, rbd_client_id_show, NULL);
+static DEVICE_ATTR(pool, S_IRUGO, rbd_pool_show, NULL);
+static DEVICE_ATTR(name, S_IRUGO, rbd_name_show, NULL);
+static DEVICE_ATTR(refresh, S_IWUSR, NULL, rbd_image_refresh);
+static DEVICE_ATTR(current_snap, S_IRUGO, rbd_snap_show, NULL);
+static DEVICE_ATTR(create_snap, S_IWUSR, NULL, rbd_snap_add);
+static DEVICE_ATTR(rollback_snap, S_IWUSR, NULL, rbd_snap_rollback);
+
+static struct attribute *rbd_attrs[] = {
+	&dev_attr_size.attr,
+	&dev_attr_major.attr,
+	&dev_attr_client_id.attr,
+	&dev_attr_pool.attr,
+	&dev_attr_name.attr,
+	&dev_attr_current_snap.attr,
+	&dev_attr_refresh.attr,
+	&dev_attr_create_snap.attr,
+	&dev_attr_rollback_snap.attr,
+	NULL
+};
+
+static struct attribute_group rbd_attr_group = {
+	.attrs = rbd_attrs,
+};
+
+static const struct attribute_group *rbd_attr_groups[] = {
+	&rbd_attr_group,
+	NULL
+};
+
+static void rbd_sysfs_dev_release(struct device *dev)
+{
+}
+
+static struct device_type rbd_device_type = {
+	.name		= "rbd",
+	.groups		= rbd_attr_groups,
+	.release	= rbd_sysfs_dev_release,
+};
+
+
+/*
+  sysfs - snapshots
+*/
+
+static ssize_t rbd_snap_size_show(struct device *dev,
+				  struct device_attribute *attr,
+				  char *buf)
+{
+	struct rbd_snap *snap = container_of(dev, struct rbd_snap, dev);
+
+	return sprintf(buf, "%lld\n", (long long)snap->size);
+}
+
+static ssize_t rbd_snap_id_show(struct device *dev,
+				struct device_attribute *attr,
+				char *buf)
+{
+	struct rbd_snap *snap = container_of(dev, struct rbd_snap, dev);
+
+	return sprintf(buf, "%lld\n", (long long)snap->id);
+}
+
+static DEVICE_ATTR(snap_size, S_IRUGO, rbd_snap_size_show, NULL);
+static DEVICE_ATTR(snap_id, S_IRUGO, rbd_snap_id_show, NULL);
+
+static struct attribute *rbd_snap_attrs[] = {
+	&dev_attr_snap_size.attr,
+	&dev_attr_snap_id.attr,
+	NULL,
+};
+
+static struct attribute_group rbd_snap_attr_group = {
+	.attrs = rbd_snap_attrs,
+};
+
+static void rbd_snap_dev_release(struct device *dev)
+{
+	struct rbd_snap *snap = container_of(dev, struct rbd_snap, dev);
+	kfree(snap->name);
+	kfree(snap);
+}
+
+static const struct attribute_group *rbd_snap_attr_groups[] = {
+	&rbd_snap_attr_group,
+	NULL
+};
+
+static struct device_type rbd_snap_device_type = {
+	.groups		= rbd_snap_attr_groups,
+	.release	= rbd_snap_dev_release,
+};
+
+static void __rbd_remove_snap_dev(struct rbd_device *rbd_dev,
+				  struct rbd_snap *snap)
+{
+	list_del(&snap->node);
+	device_unregister(&snap->dev);
+}
+
+static int rbd_register_snap_dev(struct rbd_device *rbd_dev,
+				  struct rbd_snap *snap,
+				  struct device *parent)
+{
+	struct device *dev = &snap->dev;
+	int ret;
+
+	dev->type = &rbd_snap_device_type;
+	dev->parent = parent;
+	dev->release = rbd_snap_dev_release;
+	dev_set_name(dev, "snap_%s", snap->name);
+	ret = device_register(dev);
+
+	return ret;
+}
+
+static int __rbd_add_snap_dev(struct rbd_device *rbd_dev,
+			      int i, const char *name,
+			      struct rbd_snap **snapp)
+{
+	int ret;
+	struct rbd_snap *snap = kzalloc(sizeof(*snap), GFP_KERNEL);
+	if (!snap)
+		return -ENOMEM;
+	snap->name = kstrdup(name, GFP_KERNEL);
+	snap->size = rbd_dev->header.snap_sizes[i];
+	snap->id = rbd_dev->header.snapc->snaps[i];
+	if (device_is_registered(&rbd_dev->dev)) {
+		ret = rbd_register_snap_dev(rbd_dev, snap,
+					     &rbd_dev->dev);
+		if (ret < 0)
+			goto err;
+	}
+	*snapp = snap;
+	return 0;
+err:
+	kfree(snap->name);
+	kfree(snap);
+	return ret;
+}
+
+/*
+ * search for the previous snap in a null delimited string list
+ */
+const char *rbd_prev_snap_name(const char *name, const char *start)
+{
+	if (name < start + 2)
+		return NULL;
+
+	name -= 2;
+	while (*name) {
+		if (name == start)
+			return start;
+		name--;
+	}
+	return name + 1;
+}
+
+/*
+ * compare the old list of snapshots that we have to what's in the header
+ * and update it accordingly. Note that the header holds the snapshots
+ * in a reverse order (from newest to oldest) and we need to go from
+ * older to new so that we don't get a duplicate snap name when
+ * doing the process (e.g., removed snapshot and recreated a new
+ * one with the same name.
+ */
+static int __rbd_init_snaps_header(struct rbd_device *rbd_dev)
+{
+	const char *name, *first_name;
+	int i = rbd_dev->header.total_snaps;
+	struct rbd_snap *snap, *old_snap = NULL;
+	int ret;
+	struct list_head *p, *n;
+
+	first_name = rbd_dev->header.snap_names;
+	name = first_name + rbd_dev->header.snap_names_len;
+
+	list_for_each_prev_safe(p, n, &rbd_dev->snaps) {
+		u64 cur_id;
+
+		old_snap = list_entry(p, struct rbd_snap, node);
+
+		if (i)
+			cur_id = rbd_dev->header.snapc->snaps[i - 1];
+
+		if (!i || old_snap->id < cur_id) {
+			/* old_snap->id was skipped, thus was removed */
+			__rbd_remove_snap_dev(rbd_dev, old_snap);
+			continue;
+		}
+		if (old_snap->id == cur_id) {
+			/* we have this snapshot already */
+			i--;
+			name = rbd_prev_snap_name(name, first_name);
+			continue;
+		}
+		for (; i > 0;
+		     i--, name = rbd_prev_snap_name(name, first_name)) {
+			if (!name) {
+				WARN_ON(1);
+				return -EINVAL;
+			}
+			cur_id = rbd_dev->header.snapc->snaps[i];
+			/* snapshot removal? handle it above */
+			if (cur_id >= old_snap->id)
+				break;
+			/* a new snapshot */
+			ret = __rbd_add_snap_dev(rbd_dev, i - 1, name, &snap);
+			if (ret < 0)
+				return ret;
+
+			/* note that we add it backward so using n and not p */
+			list_add(&snap->node, n);
+			p = &snap->node;
+		}
+	}
+	/* we're done going over the old snap list, just add what's left */
+	for (; i > 0; i--) {
+		name = rbd_prev_snap_name(name, first_name);
+		if (!name) {
+			WARN_ON(1);
+			return -EINVAL;
+		}
+		ret = __rbd_add_snap_dev(rbd_dev, i - 1, name, &snap);
+		if (ret < 0)
+			return ret;
+		list_add(&snap->node, &rbd_dev->snaps);
+	}
+
+	return 0;
+}
+
+
+static void rbd_root_dev_release(struct device *dev)
+{
+}
+
+static struct device rbd_root_dev = {
+	.init_name =    "rbd",
+	.release =      rbd_root_dev_release,
+};
+
+static int rbd_bus_add_dev(struct rbd_device *rbd_dev)
+{
+	int ret = -ENOMEM;
+	struct device *dev;
+	struct rbd_snap *snap;
+
+	mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
+	dev = &rbd_dev->dev;
+
+	dev->bus = &rbd_bus_type;
+	dev->type = &rbd_device_type;
+	dev->parent = &rbd_root_dev;
+	dev->release = rbd_dev_release;
+	dev_set_name(dev, "%d", rbd_dev->id);
+	ret = device_register(dev);
+	if (ret < 0)
+		goto done_free;
+
+	list_for_each_entry(snap, &rbd_dev->snaps, node) {
+		ret = rbd_register_snap_dev(rbd_dev, snap,
+					     &rbd_dev->dev);
+		if (ret < 0)
+			break;
+	}
+
+	mutex_unlock(&ctl_mutex);
+	return 0;
+done_free:
+	mutex_unlock(&ctl_mutex);
+	return ret;
+}
+
+static void rbd_bus_del_dev(struct rbd_device *rbd_dev)
+{
+	device_unregister(&rbd_dev->dev);
+}
+
+static int rbd_init_watch_dev(struct rbd_device *rbd_dev)
+{
+	int ret, rc;
+
+	do {
+		ret = rbd_req_sync_watch(rbd_dev, rbd_dev->obj_md_name,
+					 rbd_dev->header.obj_version);
+		if (ret == -ERANGE) {
+			mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
+			rc = __rbd_update_snaps(rbd_dev);
+			mutex_unlock(&ctl_mutex);
+			if (rc < 0)
+				return rc;
+		}
+	} while (ret == -ERANGE);
+
+	return ret;
+}
+
+static ssize_t rbd_add(struct bus_type *bus,
+		       const char *buf,
+		       size_t count)
+{
+	struct ceph_osd_client *osdc;
+	struct rbd_device *rbd_dev;
+	ssize_t rc = -ENOMEM;
+	int irc, new_id = 0;
+	struct list_head *tmp;
+	char *mon_dev_name;
+	char *options;
+
+	if (!try_module_get(THIS_MODULE))
+		return -ENODEV;
+
+	mon_dev_name = kmalloc(RBD_MAX_OPT_LEN, GFP_KERNEL);
+	if (!mon_dev_name)
+		goto err_out_mod;
+
+	options = kmalloc(RBD_MAX_OPT_LEN, GFP_KERNEL);
+	if (!options)
+		goto err_mon_dev;
+
+	/* new rbd_device object */
+	rbd_dev = kzalloc(sizeof(*rbd_dev), GFP_KERNEL);
+	if (!rbd_dev)
+		goto err_out_opt;
+
+	/* static rbd_device initialization */
+	spin_lock_init(&rbd_dev->lock);
+	INIT_LIST_HEAD(&rbd_dev->node);
+	INIT_LIST_HEAD(&rbd_dev->snaps);
+
+	/* generate unique id: find highest unique id, add one */
+	mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
+
+	list_for_each(tmp, &rbd_dev_list) {
+		struct rbd_device *rbd_dev;
+
+		rbd_dev = list_entry(tmp, struct rbd_device, node);
+		if (rbd_dev->id >= new_id)
+			new_id = rbd_dev->id + 1;
+	}
+
+	rbd_dev->id = new_id;
+
+	/* add to global list */
+	list_add_tail(&rbd_dev->node, &rbd_dev_list);
+
+	/* parse add command */
+	if (sscanf(buf, "%" __stringify(RBD_MAX_OPT_LEN) "s "
+		   "%" __stringify(RBD_MAX_OPT_LEN) "s "
+		   "%" __stringify(RBD_MAX_POOL_NAME_LEN) "s "
+		   "%" __stringify(RBD_MAX_OBJ_NAME_LEN) "s"
+		   "%" __stringify(RBD_MAX_SNAP_NAME_LEN) "s",
+		   mon_dev_name, options, rbd_dev->pool_name,
+		   rbd_dev->obj, rbd_dev->snap_name) < 4) {
+		rc = -EINVAL;
+		goto err_out_slot;
+	}
+
+	if (rbd_dev->snap_name[0] == 0)
+		rbd_dev->snap_name[0] = '-';
+
+	rbd_dev->obj_len = strlen(rbd_dev->obj);
+	snprintf(rbd_dev->obj_md_name, sizeof(rbd_dev->obj_md_name), "%s%s",
+		 rbd_dev->obj, RBD_SUFFIX);
+
+	/* initialize rest of new object */
+	snprintf(rbd_dev->name, DEV_NAME_LEN, DRV_NAME "%d", rbd_dev->id);
+	rc = rbd_get_client(rbd_dev, mon_dev_name, options);
+	if (rc < 0)
+		goto err_out_slot;
+
+	mutex_unlock(&ctl_mutex);
+
+	/* pick the pool */
+	osdc = &rbd_dev->client->osdc;
+	rc = ceph_pg_poolid_by_name(osdc->osdmap, rbd_dev->pool_name);
+	if (rc < 0)
+		goto err_out_client;
+	rbd_dev->poolid = rc;
+
+	/* register our block device */
+	irc = register_blkdev(0, rbd_dev->name);
+	if (irc < 0) {
+		rc = irc;
+		goto err_out_client;
+	}
+	rbd_dev->major = irc;
+
+	rc = rbd_bus_add_dev(rbd_dev);
+	if (rc)
+		goto err_out_blkdev;
+
+	/* set up and announce blkdev mapping */
+	rc = rbd_init_disk(rbd_dev);
+	if (rc)
+		goto err_out_bus;
+
+	rc = rbd_init_watch_dev(rbd_dev);
+	if (rc)
+		goto err_out_bus;
+
+	return count;
+
+err_out_bus:
+	mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
+	list_del_init(&rbd_dev->node);
+	mutex_unlock(&ctl_mutex);
+
+	/* this will also clean up rest of rbd_dev stuff */
+
+	rbd_bus_del_dev(rbd_dev);
+	kfree(options);
+	kfree(mon_dev_name);
+	return rc;
+
+err_out_blkdev:
+	unregister_blkdev(rbd_dev->major, rbd_dev->name);
+err_out_client:
+	rbd_put_client(rbd_dev);
+	mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
+err_out_slot:
+	list_del_init(&rbd_dev->node);
+	mutex_unlock(&ctl_mutex);
+
+	kfree(rbd_dev);
+err_out_opt:
+	kfree(options);
+err_mon_dev:
+	kfree(mon_dev_name);
+err_out_mod:
+	dout("Error adding device %s\n", buf);
+	module_put(THIS_MODULE);
+	return rc;
+}
+
+static struct rbd_device *__rbd_get_dev(unsigned long id)
+{
+	struct list_head *tmp;
+	struct rbd_device *rbd_dev;
+
+	list_for_each(tmp, &rbd_dev_list) {
+		rbd_dev = list_entry(tmp, struct rbd_device, node);
+		if (rbd_dev->id == id)
+			return rbd_dev;
+	}
+	return NULL;
+}
+
+static void rbd_dev_release(struct device *dev)
+{
+	struct rbd_device *rbd_dev =
+			container_of(dev, struct rbd_device, dev);
+
+	if (rbd_dev->watch_request)
+		ceph_osdc_unregister_linger_request(&rbd_dev->client->osdc,
+						    rbd_dev->watch_request);
+	if (rbd_dev->watch_event)
+		ceph_osdc_cancel_event(rbd_dev->watch_event);
+
+	rbd_put_client(rbd_dev);
+
+	/* clean up and free blkdev */
+	rbd_free_disk(rbd_dev);
+	unregister_blkdev(rbd_dev->major, rbd_dev->name);
+	kfree(rbd_dev);
+
+	/* release module ref */
+	module_put(THIS_MODULE);
+}
+
+static ssize_t rbd_remove(struct bus_type *bus,
+			  const char *buf,
+			  size_t count)
+{
+	struct rbd_device *rbd_dev = NULL;
+	int target_id, rc;
+	unsigned long ul;
+	int ret = count;
+
+	rc = strict_strtoul(buf, 10, &ul);
+	if (rc)
+		return rc;
+
+	/* convert to int; abort if we lost anything in the conversion */
+	target_id = (int) ul;
+	if (target_id != ul)
+		return -EINVAL;
+
+	mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
+
+	rbd_dev = __rbd_get_dev(target_id);
+	if (!rbd_dev) {
+		ret = -ENOENT;
+		goto done;
+	}
+
+	list_del_init(&rbd_dev->node);
+
+	__rbd_remove_all_snaps(rbd_dev);
+	rbd_bus_del_dev(rbd_dev);
+
+done:
+	mutex_unlock(&ctl_mutex);
+	return ret;
+}
+
+static ssize_t rbd_snap_add(struct device *dev,
+			    struct device_attribute *attr,
+			    const char *buf,
+			    size_t count)
+{
+	struct rbd_device *rbd_dev = dev_to_rbd(dev);
+	int ret;
+	char *name = kmalloc(count + 1, GFP_KERNEL);
+	if (!name)
+		return -ENOMEM;
+
+	snprintf(name, count, "%s", buf);
+
+	mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
+
+	ret = rbd_header_add_snap(rbd_dev,
+				  name, GFP_KERNEL);
+	if (ret < 0)
+		goto err_unlock;
+
+	ret = __rbd_update_snaps(rbd_dev);
+	if (ret < 0)
+		goto err_unlock;
+
+	/* shouldn't hold ctl_mutex when notifying.. notify might
+	   trigger a watch callback that would need to get that mutex */
+	mutex_unlock(&ctl_mutex);
+
+	/* make a best effort, don't error if failed */
+	rbd_req_sync_notify(rbd_dev, rbd_dev->obj_md_name);
+
+	ret = count;
+	kfree(name);
+	return ret;
+
+err_unlock:
+	mutex_unlock(&ctl_mutex);
+	kfree(name);
+	return ret;
+}
+
+static ssize_t rbd_snap_rollback(struct device *dev,
+				 struct device_attribute *attr,
+				 const char *buf,
+				 size_t count)
+{
+	struct rbd_device *rbd_dev = dev_to_rbd(dev);
+	int ret;
+	u64 snapid;
+	u64 cur_ofs;
+	char *seg_name = NULL;
+	char *snap_name = kmalloc(count + 1, GFP_KERNEL);
+	ret = -ENOMEM;
+	if (!snap_name)
+		return ret;
+
+	/* parse snaps add command */
+	snprintf(snap_name, count, "%s", buf);
+	seg_name = kmalloc(RBD_MAX_SEG_NAME_LEN + 1, GFP_NOIO);
+	if (!seg_name)
+		goto done;
+
+	mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
+
+	ret = snap_by_name(&rbd_dev->header, snap_name, &snapid, NULL);
+	if (ret < 0)
+		goto done_unlock;
+
+	dout("snapid=%lld\n", snapid);
+
+	cur_ofs = 0;
+	while (cur_ofs < rbd_dev->header.image_size) {
+		cur_ofs += rbd_get_segment(&rbd_dev->header,
+					   rbd_dev->obj,
+					   cur_ofs, (u64)-1,
+					   seg_name, NULL);
+		dout("seg_name=%s\n", seg_name);
+
+		ret = rbd_req_sync_rollback_obj(rbd_dev, snapid, seg_name);
+		if (ret < 0)
+			pr_warning("could not roll back obj %s err=%d\n",
+				   seg_name, ret);
+	}
+
+	ret = __rbd_update_snaps(rbd_dev);
+	if (ret < 0)
+		goto done_unlock;
+
+	ret = count;
+
+done_unlock:
+	mutex_unlock(&ctl_mutex);
+done:
+	kfree(seg_name);
+	kfree(snap_name);
+
+	return ret;
+}
+
+static struct bus_attribute rbd_bus_attrs[] = {
+	__ATTR(add, S_IWUSR, NULL, rbd_add),
+	__ATTR(remove, S_IWUSR, NULL, rbd_remove),
+	__ATTR_NULL
+};
+
+/*
+ * create control files in sysfs
+ * /sys/bus/rbd/...
+ */
+static int rbd_sysfs_init(void)
+{
+	int ret;
+
+	rbd_bus_type.bus_attrs = rbd_bus_attrs;
+
+	ret = bus_register(&rbd_bus_type);
+	 if (ret < 0)
+		return ret;
+
+	ret = device_register(&rbd_root_dev);
+
+	return ret;
+}
+
+static void rbd_sysfs_cleanup(void)
+{
+	device_unregister(&rbd_root_dev);
+	bus_unregister(&rbd_bus_type);
+}
+
+int __init rbd_init(void)
+{
+	int rc;
+
+	rc = rbd_sysfs_init();
+	if (rc)
+		return rc;
+	spin_lock_init(&node_lock);
+	pr_info("loaded " DRV_NAME_LONG "\n");
+	return 0;
+}
+
+void __exit rbd_exit(void)
+{
+	rbd_sysfs_cleanup();
+}
+
+module_init(rbd_init);
+module_exit(rbd_exit);
+
+MODULE_AUTHOR("Sage Weil <sage@newdream.net>");
+MODULE_AUTHOR("Yehuda Sadeh <yehuda@hq.newdream.net>");
+MODULE_DESCRIPTION("rados block device");
+
+/* following authorship retained from original osdblk.c */
+MODULE_AUTHOR("Jeff Garzik <jeff@garzik.org>");
+
+MODULE_LICENSE("GPL");
diff --git a/drivers/block/rbd_types.h b/drivers/block/rbd_types.h
new file mode 100644
index 00000000..fc6c678a
--- /dev/null
+++ b/drivers/block/rbd_types.h
@@ -0,0 +1,73 @@
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2004-2010 Sage Weil <sage@newdream.net>
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation.  See file COPYING.
+ *
+ */
+
+#ifndef CEPH_RBD_TYPES_H
+#define CEPH_RBD_TYPES_H
+
+#include <linux/types.h>
+
+/*
+ * rbd image 'foo' consists of objects
+ *   foo.rbd      - image metadata
+ *   foo.00000000
+ *   foo.00000001
+ *   ...          - data
+ */
+
+#define RBD_SUFFIX		".rbd"
+#define RBD_DIRECTORY           "rbd_directory"
+#define RBD_INFO                "rbd_info"
+
+#define RBD_DEFAULT_OBJ_ORDER	22   /* 4MB */
+#define RBD_MIN_OBJ_ORDER       16
+#define RBD_MAX_OBJ_ORDER       30
+
+#define RBD_MAX_OBJ_NAME_LEN	96
+#define RBD_MAX_SEG_NAME_LEN	128
+
+#define RBD_COMP_NONE		0
+#define RBD_CRYPT_NONE		0
+
+#define RBD_HEADER_TEXT		"<<< Rados Block Device Image >>>\n"
+#define RBD_HEADER_SIGNATURE	"RBD"
+#define RBD_HEADER_VERSION	"001.005"
+
+struct rbd_info {
+	__le64 max_id;
+} __attribute__ ((packed));
+
+struct rbd_image_snap_ondisk {
+	__le64 id;
+	__le64 image_size;
+} __attribute__((packed));
+
+struct rbd_image_header_ondisk {
+	char text[40];
+	char block_name[24];
+	char signature[4];
+	char version[8];
+	struct {
+		__u8 order;
+		__u8 crypt_type;
+		__u8 comp_type;
+		__u8 unused;
+	} __attribute__((packed)) options;
+	__le64 image_size;
+	__le64 snap_seq;
+	__le32 snap_count;
+	__le32 reserved;
+	__le64 snap_names_len;
+	struct rbd_image_snap_ondisk snaps[0];
+} __attribute__((packed));
+
+
+#endif
diff --git a/drivers/block/smart1,2.h b/drivers/block/smart1,2.h
new file mode 100644
index 00000000..e5565fba
--- /dev/null
+++ b/drivers/block/smart1,2.h
@@ -0,0 +1,278 @@
+/*
+ *    Disk Array driver for Compaq SMART2 Controllers
+ *    Copyright 1998 Compaq Computer Corporation
+ *
+ *    This program is free software; you can redistribute it and/or modify
+ *    it under the terms of the GNU General Public License as published by
+ *    the Free Software Foundation; either version 2 of the License, or
+ *    (at your option) any later version.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *    NON INFRINGEMENT.  See the GNU General Public License for more details.
+ *
+ *    You should have received a copy of the GNU General Public License
+ *    along with this program; if not, write to the Free Software
+ *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ *    Questions/Comments/Bugfixes to iss_storagedev@hp.com
+ *
+ *    If you want to make changes, improve or add functionality to this
+ *    driver, you'll probably need the Compaq Array Controller Interface
+ *    Specificiation (Document number ECG086/1198)
+ */
+
+/*
+ * This file contains the controller communication implementation for
+ * Compaq SMART-1 and SMART-2 controllers.  To the best of my knowledge,
+ * this should support:
+ *
+ *  PCI:
+ *  SMART-2/P, SMART-2DH, SMART-2SL, SMART-221, SMART-3100ES, SMART-3200
+ *  Integerated SMART Array Controller, SMART-4200, SMART-4250ES
+ *
+ *  EISA:
+ *  SMART-2/E, SMART, IAES, IDA-2, IDA
+ */
+
+/*
+ * Memory mapped FIFO interface (SMART 42xx cards)
+ */
+static void smart4_submit_command(ctlr_info_t *h, cmdlist_t *c)
+{
+        writel(c->busaddr, h->vaddr + S42XX_REQUEST_PORT_OFFSET);
+}
+
+/*  
+ *  This card is the opposite of the other cards.  
+ *   0 turns interrupts on... 
+ *   0x08 turns them off... 
+ */
+static void smart4_intr_mask(ctlr_info_t *h, unsigned long val)
+{
+	if (val) 
+	{ /* Turn interrupts on */
+		writel(0, h->vaddr + S42XX_REPLY_INTR_MASK_OFFSET);
+	} else /* Turn them off */
+	{
+        	writel( S42XX_INTR_OFF, 
+			h->vaddr + S42XX_REPLY_INTR_MASK_OFFSET);
+	}
+}
+
+/*
+ *  For older cards FIFO Full = 0. 
+ *  On this card 0 means there is room, anything else FIFO Full. 
+ * 
+ */ 
+static unsigned long smart4_fifo_full(ctlr_info_t *h)
+{
+	
+        return (!readl(h->vaddr + S42XX_REQUEST_PORT_OFFSET));
+}
+
+/* This type of controller returns -1 if the fifo is empty, 
+ *    Not 0 like the others.
+ *    And we need to let it know we read a value out 
+ */ 
+static unsigned long smart4_completed(ctlr_info_t *h)
+{
+	long register_value 
+		= readl(h->vaddr + S42XX_REPLY_PORT_OFFSET);
+
+	/* Fifo is empty */
+	if( register_value == 0xffffffff)
+		return 0; 	
+
+	/* Need to let it know we got the reply */
+	/* We do this by writing a 0 to the port we just read from */
+	writel(0, h->vaddr + S42XX_REPLY_PORT_OFFSET);
+
+	return ((unsigned long) register_value); 
+}
+
+ /*
+ *  This hardware returns interrupt pending at a different place and 
+ *  it does not tell us if the fifo is empty, we will have check  
+ *  that by getting a 0 back from the command_completed call. 
+ */
+static unsigned long smart4_intr_pending(ctlr_info_t *h)
+{
+	unsigned long register_value  = 
+		readl(h->vaddr + S42XX_INTR_STATUS);
+
+	if( register_value &  S42XX_INTR_PENDING) 
+		return  FIFO_NOT_EMPTY;	
+	return 0 ;
+}
+
+static struct access_method smart4_access = {
+	smart4_submit_command,
+	smart4_intr_mask,
+	smart4_fifo_full,
+	smart4_intr_pending,
+	smart4_completed,
+};
+
+/*
+ * Memory mapped FIFO interface (PCI SMART2 and SMART 3xxx cards)
+ */
+static void smart2_submit_command(ctlr_info_t *h, cmdlist_t *c)
+{
+	writel(c->busaddr, h->vaddr + COMMAND_FIFO);
+}
+
+static void smart2_intr_mask(ctlr_info_t *h, unsigned long val)
+{
+	writel(val, h->vaddr + INTR_MASK);
+}
+
+static unsigned long smart2_fifo_full(ctlr_info_t *h)
+{
+	return readl(h->vaddr + COMMAND_FIFO);
+}
+
+static unsigned long smart2_completed(ctlr_info_t *h)
+{
+	return readl(h->vaddr + COMMAND_COMPLETE_FIFO);
+}
+
+static unsigned long smart2_intr_pending(ctlr_info_t *h)
+{
+	return readl(h->vaddr + INTR_PENDING);
+}
+
+static struct access_method smart2_access = {
+	smart2_submit_command,
+	smart2_intr_mask,
+	smart2_fifo_full,
+	smart2_intr_pending,
+	smart2_completed,
+};
+
+/*
+ *  IO access for SMART-2/E cards
+ */
+static void smart2e_submit_command(ctlr_info_t *h, cmdlist_t *c)
+{
+	outl(c->busaddr, h->io_mem_addr + COMMAND_FIFO);
+}
+
+static void smart2e_intr_mask(ctlr_info_t *h, unsigned long val)
+{
+	outl(val, h->io_mem_addr + INTR_MASK);
+}
+
+static unsigned long smart2e_fifo_full(ctlr_info_t *h)
+{
+	return inl(h->io_mem_addr + COMMAND_FIFO);
+}
+
+static unsigned long smart2e_completed(ctlr_info_t *h)
+{
+	return inl(h->io_mem_addr + COMMAND_COMPLETE_FIFO);
+}
+
+static unsigned long smart2e_intr_pending(ctlr_info_t *h)
+{
+	return inl(h->io_mem_addr + INTR_PENDING);
+}
+
+static struct access_method smart2e_access = {
+	smart2e_submit_command,
+	smart2e_intr_mask,
+	smart2e_fifo_full,
+	smart2e_intr_pending,
+	smart2e_completed,
+};
+
+/*
+ *  IO access for older SMART-1 type cards
+ */
+#define SMART1_SYSTEM_MASK		0xC8E
+#define SMART1_SYSTEM_DOORBELL		0xC8F
+#define SMART1_LOCAL_MASK		0xC8C
+#define SMART1_LOCAL_DOORBELL		0xC8D
+#define SMART1_INTR_MASK		0xC89
+#define SMART1_LISTADDR			0xC90
+#define SMART1_LISTLEN			0xC94
+#define SMART1_TAG			0xC97
+#define SMART1_COMPLETE_ADDR		0xC98
+#define SMART1_LISTSTATUS		0xC9E
+
+#define CHANNEL_BUSY			0x01
+#define CHANNEL_CLEAR			0x02
+
+static void smart1_submit_command(ctlr_info_t *h, cmdlist_t *c)
+{
+	/*
+	 * This __u16 is actually a bunch of control flags on SMART
+	 * and below.  We want them all to be zero.
+	 */
+	c->hdr.size = 0;
+
+	outb(CHANNEL_CLEAR, h->io_mem_addr + SMART1_SYSTEM_DOORBELL);
+
+	outl(c->busaddr, h->io_mem_addr + SMART1_LISTADDR);
+	outw(c->size, h->io_mem_addr + SMART1_LISTLEN);
+
+	outb(CHANNEL_BUSY, h->io_mem_addr + SMART1_LOCAL_DOORBELL);
+}
+
+static void smart1_intr_mask(ctlr_info_t *h, unsigned long val)
+{
+	if (val == 1) {
+		outb(0xFD, h->io_mem_addr + SMART1_SYSTEM_DOORBELL);
+		outb(CHANNEL_BUSY, h->io_mem_addr + SMART1_LOCAL_DOORBELL);
+		outb(0x01, h->io_mem_addr + SMART1_INTR_MASK);
+		outb(0x01, h->io_mem_addr + SMART1_SYSTEM_MASK);
+	} else {
+		outb(0, h->io_mem_addr + 0xC8E);
+	}
+}
+
+static unsigned long smart1_fifo_full(ctlr_info_t *h)
+{
+	unsigned char chan;
+	chan = inb(h->io_mem_addr + SMART1_SYSTEM_DOORBELL) & CHANNEL_CLEAR;
+	return chan;
+}
+
+static unsigned long smart1_completed(ctlr_info_t *h)
+{
+	unsigned char status;
+	unsigned long cmd;
+
+	if (inb(h->io_mem_addr + SMART1_SYSTEM_DOORBELL) & CHANNEL_BUSY) {
+		outb(CHANNEL_BUSY, h->io_mem_addr + SMART1_SYSTEM_DOORBELL);
+
+		cmd = inl(h->io_mem_addr + SMART1_COMPLETE_ADDR);
+		status = inb(h->io_mem_addr + SMART1_LISTSTATUS);
+
+		outb(CHANNEL_CLEAR, h->io_mem_addr + SMART1_LOCAL_DOORBELL);
+
+		/*
+		 * this is x86 (actually compaq x86) only, so it's ok
+		 */
+		if (cmd) ((cmdlist_t*)bus_to_virt(cmd))->req.hdr.rcode = status;
+	} else {
+		cmd = 0;
+	}
+	return cmd;
+}
+
+static unsigned long smart1_intr_pending(ctlr_info_t *h)
+{
+	unsigned char chan;
+	chan = inb(h->io_mem_addr + SMART1_SYSTEM_DOORBELL) & CHANNEL_BUSY;
+	return chan;
+}
+
+static struct access_method smart1_access = {
+	smart1_submit_command,
+	smart1_intr_mask,
+	smart1_fifo_full,
+	smart1_intr_pending,
+	smart1_completed,
+};
diff --git a/drivers/block/sunvdc.c b/drivers/block/sunvdc.c
new file mode 100644
index 00000000..48e8fee9
--- /dev/null
+++ b/drivers/block/sunvdc.c
@@ -0,0 +1,879 @@
+/* sunvdc.c: Sun LDOM Virtual Disk Client.
+ *
+ * Copyright (C) 2007, 2008 David S. Miller <davem@davemloft.net>
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/blkdev.h>
+#include <linux/hdreg.h>
+#include <linux/genhd.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/completion.h>
+#include <linux/delay.h>
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/scatterlist.h>
+
+#include <asm/vio.h>
+#include <asm/ldc.h>
+
+#define DRV_MODULE_NAME		"sunvdc"
+#define PFX DRV_MODULE_NAME	": "
+#define DRV_MODULE_VERSION	"1.0"
+#define DRV_MODULE_RELDATE	"June 25, 2007"
+
+static char version[] __devinitdata =
+	DRV_MODULE_NAME ".c:v" DRV_MODULE_VERSION " (" DRV_MODULE_RELDATE ")\n";
+MODULE_AUTHOR("David S. Miller (davem@davemloft.net)");
+MODULE_DESCRIPTION("Sun LDOM virtual disk client driver");
+MODULE_LICENSE("GPL");
+MODULE_VERSION(DRV_MODULE_VERSION);
+
+#define VDC_TX_RING_SIZE	256
+
+#define WAITING_FOR_LINK_UP	0x01
+#define WAITING_FOR_TX_SPACE	0x02
+#define WAITING_FOR_GEN_CMD	0x04
+#define WAITING_FOR_ANY		-1
+
+struct vdc_req_entry {
+	struct request		*req;
+};
+
+struct vdc_port {
+	struct vio_driver_state	vio;
+
+	struct gendisk		*disk;
+
+	struct vdc_completion	*cmp;
+
+	u64			req_id;
+	u64			seq;
+	struct vdc_req_entry	rq_arr[VDC_TX_RING_SIZE];
+
+	unsigned long		ring_cookies;
+
+	u64			max_xfer_size;
+	u32			vdisk_block_size;
+
+	/* The server fills these in for us in the disk attribute
+	 * ACK packet.
+	 */
+	u64			operations;
+	u32			vdisk_size;
+	u8			vdisk_type;
+
+	char			disk_name[32];
+
+	struct vio_disk_geom	geom;
+	struct vio_disk_vtoc	label;
+};
+
+static inline struct vdc_port *to_vdc_port(struct vio_driver_state *vio)
+{
+	return container_of(vio, struct vdc_port, vio);
+}
+
+/* Ordered from largest major to lowest */
+static struct vio_version vdc_versions[] = {
+	{ .major = 1, .minor = 0 },
+};
+
+#define VDCBLK_NAME	"vdisk"
+static int vdc_major;
+#define PARTITION_SHIFT	3
+
+static inline u32 vdc_tx_dring_avail(struct vio_dring_state *dr)
+{
+	return vio_dring_avail(dr, VDC_TX_RING_SIZE);
+}
+
+static int vdc_getgeo(struct block_device *bdev, struct hd_geometry *geo)
+{
+	struct gendisk *disk = bdev->bd_disk;
+	struct vdc_port *port = disk->private_data;
+
+	geo->heads = (u8) port->geom.num_hd;
+	geo->sectors = (u8) port->geom.num_sec;
+	geo->cylinders = port->geom.num_cyl;
+
+	return 0;
+}
+
+static const struct block_device_operations vdc_fops = {
+	.owner		= THIS_MODULE,
+	.getgeo		= vdc_getgeo,
+};
+
+static void vdc_finish(struct vio_driver_state *vio, int err, int waiting_for)
+{
+	if (vio->cmp &&
+	    (waiting_for == -1 ||
+	     vio->cmp->waiting_for == waiting_for)) {
+		vio->cmp->err = err;
+		complete(&vio->cmp->com);
+		vio->cmp = NULL;
+	}
+}
+
+static void vdc_handshake_complete(struct vio_driver_state *vio)
+{
+	vdc_finish(vio, 0, WAITING_FOR_LINK_UP);
+}
+
+static int vdc_handle_unknown(struct vdc_port *port, void *arg)
+{
+	struct vio_msg_tag *pkt = arg;
+
+	printk(KERN_ERR PFX "Received unknown msg [%02x:%02x:%04x:%08x]\n",
+	       pkt->type, pkt->stype, pkt->stype_env, pkt->sid);
+	printk(KERN_ERR PFX "Resetting connection.\n");
+
+	ldc_disconnect(port->vio.lp);
+
+	return -ECONNRESET;
+}
+
+static int vdc_send_attr(struct vio_driver_state *vio)
+{
+	struct vdc_port *port = to_vdc_port(vio);
+	struct vio_disk_attr_info pkt;
+
+	memset(&pkt, 0, sizeof(pkt));
+
+	pkt.tag.type = VIO_TYPE_CTRL;
+	pkt.tag.stype = VIO_SUBTYPE_INFO;
+	pkt.tag.stype_env = VIO_ATTR_INFO;
+	pkt.tag.sid = vio_send_sid(vio);
+
+	pkt.xfer_mode = VIO_DRING_MODE;
+	pkt.vdisk_block_size = port->vdisk_block_size;
+	pkt.max_xfer_size = port->max_xfer_size;
+
+	viodbg(HS, "SEND ATTR xfer_mode[0x%x] blksz[%u] max_xfer[%llu]\n",
+	       pkt.xfer_mode, pkt.vdisk_block_size, pkt.max_xfer_size);
+
+	return vio_ldc_send(&port->vio, &pkt, sizeof(pkt));
+}
+
+static int vdc_handle_attr(struct vio_driver_state *vio, void *arg)
+{
+	struct vdc_port *port = to_vdc_port(vio);
+	struct vio_disk_attr_info *pkt = arg;
+
+	viodbg(HS, "GOT ATTR stype[0x%x] ops[%llx] disk_size[%llu] disk_type[%x] "
+	       "xfer_mode[0x%x] blksz[%u] max_xfer[%llu]\n",
+	       pkt->tag.stype, pkt->operations,
+	       pkt->vdisk_size, pkt->vdisk_type,
+	       pkt->xfer_mode, pkt->vdisk_block_size,
+	       pkt->max_xfer_size);
+
+	if (pkt->tag.stype == VIO_SUBTYPE_ACK) {
+		switch (pkt->vdisk_type) {
+		case VD_DISK_TYPE_DISK:
+		case VD_DISK_TYPE_SLICE:
+			break;
+
+		default:
+			printk(KERN_ERR PFX "%s: Bogus vdisk_type 0x%x\n",
+			       vio->name, pkt->vdisk_type);
+			return -ECONNRESET;
+		}
+
+		if (pkt->vdisk_block_size > port->vdisk_block_size) {
+			printk(KERN_ERR PFX "%s: BLOCK size increased "
+			       "%u --> %u\n",
+			       vio->name,
+			       port->vdisk_block_size, pkt->vdisk_block_size);
+			return -ECONNRESET;
+		}
+
+		port->operations = pkt->operations;
+		port->vdisk_size = pkt->vdisk_size;
+		port->vdisk_type = pkt->vdisk_type;
+		if (pkt->max_xfer_size < port->max_xfer_size)
+			port->max_xfer_size = pkt->max_xfer_size;
+		port->vdisk_block_size = pkt->vdisk_block_size;
+		return 0;
+	} else {
+		printk(KERN_ERR PFX "%s: Attribute NACK\n", vio->name);
+
+		return -ECONNRESET;
+	}
+}
+
+static void vdc_end_special(struct vdc_port *port, struct vio_disk_desc *desc)
+{
+	int err = desc->status;
+
+	vdc_finish(&port->vio, -err, WAITING_FOR_GEN_CMD);
+}
+
+static void vdc_end_one(struct vdc_port *port, struct vio_dring_state *dr,
+			unsigned int index)
+{
+	struct vio_disk_desc *desc = vio_dring_entry(dr, index);
+	struct vdc_req_entry *rqe = &port->rq_arr[index];
+	struct request *req;
+
+	if (unlikely(desc->hdr.state != VIO_DESC_DONE))
+		return;
+
+	ldc_unmap(port->vio.lp, desc->cookies, desc->ncookies);
+	desc->hdr.state = VIO_DESC_FREE;
+	dr->cons = (index + 1) & (VDC_TX_RING_SIZE - 1);
+
+	req = rqe->req;
+	if (req == NULL) {
+		vdc_end_special(port, desc);
+		return;
+	}
+
+	rqe->req = NULL;
+
+	__blk_end_request(req, (desc->status ? -EIO : 0), desc->size);
+
+	if (blk_queue_stopped(port->disk->queue))
+		blk_start_queue(port->disk->queue);
+}
+
+static int vdc_ack(struct vdc_port *port, void *msgbuf)
+{
+	struct vio_dring_state *dr = &port->vio.drings[VIO_DRIVER_TX_RING];
+	struct vio_dring_data *pkt = msgbuf;
+
+	if (unlikely(pkt->dring_ident != dr->ident ||
+		     pkt->start_idx != pkt->end_idx ||
+		     pkt->start_idx >= VDC_TX_RING_SIZE))
+		return 0;
+
+	vdc_end_one(port, dr, pkt->start_idx);
+
+	return 0;
+}
+
+static int vdc_nack(struct vdc_port *port, void *msgbuf)
+{
+	/* XXX Implement me XXX */
+	return 0;
+}
+
+static void vdc_event(void *arg, int event)
+{
+	struct vdc_port *port = arg;
+	struct vio_driver_state *vio = &port->vio;
+	unsigned long flags;
+	int err;
+
+	spin_lock_irqsave(&vio->lock, flags);
+
+	if (unlikely(event == LDC_EVENT_RESET ||
+		     event == LDC_EVENT_UP)) {
+		vio_link_state_change(vio, event);
+		spin_unlock_irqrestore(&vio->lock, flags);
+		return;
+	}
+
+	if (unlikely(event != LDC_EVENT_DATA_READY)) {
+		printk(KERN_WARNING PFX "Unexpected LDC event %d\n", event);
+		spin_unlock_irqrestore(&vio->lock, flags);
+		return;
+	}
+
+	err = 0;
+	while (1) {
+		union {
+			struct vio_msg_tag tag;
+			u64 raw[8];
+		} msgbuf;
+
+		err = ldc_read(vio->lp, &msgbuf, sizeof(msgbuf));
+		if (unlikely(err < 0)) {
+			if (err == -ECONNRESET)
+				vio_conn_reset(vio);
+			break;
+		}
+		if (err == 0)
+			break;
+		viodbg(DATA, "TAG [%02x:%02x:%04x:%08x]\n",
+		       msgbuf.tag.type,
+		       msgbuf.tag.stype,
+		       msgbuf.tag.stype_env,
+		       msgbuf.tag.sid);
+		err = vio_validate_sid(vio, &msgbuf.tag);
+		if (err < 0)
+			break;
+
+		if (likely(msgbuf.tag.type == VIO_TYPE_DATA)) {
+			if (msgbuf.tag.stype == VIO_SUBTYPE_ACK)
+				err = vdc_ack(port, &msgbuf);
+			else if (msgbuf.tag.stype == VIO_SUBTYPE_NACK)
+				err = vdc_nack(port, &msgbuf);
+			else
+				err = vdc_handle_unknown(port, &msgbuf);
+		} else if (msgbuf.tag.type == VIO_TYPE_CTRL) {
+			err = vio_control_pkt_engine(vio, &msgbuf);
+		} else {
+			err = vdc_handle_unknown(port, &msgbuf);
+		}
+		if (err < 0)
+			break;
+	}
+	if (err < 0)
+		vdc_finish(&port->vio, err, WAITING_FOR_ANY);
+	spin_unlock_irqrestore(&vio->lock, flags);
+}
+
+static int __vdc_tx_trigger(struct vdc_port *port)
+{
+	struct vio_dring_state *dr = &port->vio.drings[VIO_DRIVER_TX_RING];
+	struct vio_dring_data hdr = {
+		.tag = {
+			.type		= VIO_TYPE_DATA,
+			.stype		= VIO_SUBTYPE_INFO,
+			.stype_env	= VIO_DRING_DATA,
+			.sid		= vio_send_sid(&port->vio),
+		},
+		.dring_ident		= dr->ident,
+		.start_idx		= dr->prod,
+		.end_idx		= dr->prod,
+	};
+	int err, delay;
+
+	hdr.seq = dr->snd_nxt;
+	delay = 1;
+	do {
+		err = vio_ldc_send(&port->vio, &hdr, sizeof(hdr));
+		if (err > 0) {
+			dr->snd_nxt++;
+			break;
+		}
+		udelay(delay);
+		if ((delay <<= 1) > 128)
+			delay = 128;
+	} while (err == -EAGAIN);
+
+	return err;
+}
+
+static int __send_request(struct request *req)
+{
+	struct vdc_port *port = req->rq_disk->private_data;
+	struct vio_dring_state *dr = &port->vio.drings[VIO_DRIVER_TX_RING];
+	struct scatterlist sg[port->ring_cookies];
+	struct vdc_req_entry *rqe;
+	struct vio_disk_desc *desc;
+	unsigned int map_perm;
+	int nsg, err, i;
+	u64 len;
+	u8 op;
+
+	map_perm = LDC_MAP_SHADOW | LDC_MAP_DIRECT | LDC_MAP_IO;
+
+	if (rq_data_dir(req) == READ) {
+		map_perm |= LDC_MAP_W;
+		op = VD_OP_BREAD;
+	} else {
+		map_perm |= LDC_MAP_R;
+		op = VD_OP_BWRITE;
+	}
+
+	sg_init_table(sg, port->ring_cookies);
+	nsg = blk_rq_map_sg(req->q, req, sg);
+
+	len = 0;
+	for (i = 0; i < nsg; i++)
+		len += sg[i].length;
+
+	if (unlikely(vdc_tx_dring_avail(dr) < 1)) {
+		blk_stop_queue(port->disk->queue);
+		err = -ENOMEM;
+		goto out;
+	}
+
+	desc = vio_dring_cur(dr);
+
+	err = ldc_map_sg(port->vio.lp, sg, nsg,
+			 desc->cookies, port->ring_cookies,
+			 map_perm);
+	if (err < 0) {
+		printk(KERN_ERR PFX "ldc_map_sg() failure, err=%d.\n", err);
+		return err;
+	}
+
+	rqe = &port->rq_arr[dr->prod];
+	rqe->req = req;
+
+	desc->hdr.ack = VIO_ACK_ENABLE;
+	desc->req_id = port->req_id;
+	desc->operation = op;
+	if (port->vdisk_type == VD_DISK_TYPE_DISK) {
+		desc->slice = 0xff;
+	} else {
+		desc->slice = 0;
+	}
+	desc->status = ~0;
+	desc->offset = (blk_rq_pos(req) << 9) / port->vdisk_block_size;
+	desc->size = len;
+	desc->ncookies = err;
+
+	/* This has to be a non-SMP write barrier because we are writing
+	 * to memory which is shared with the peer LDOM.
+	 */
+	wmb();
+	desc->hdr.state = VIO_DESC_READY;
+
+	err = __vdc_tx_trigger(port);
+	if (err < 0) {
+		printk(KERN_ERR PFX "vdc_tx_trigger() failure, err=%d\n", err);
+	} else {
+		port->req_id++;
+		dr->prod = (dr->prod + 1) & (VDC_TX_RING_SIZE - 1);
+	}
+out:
+
+	return err;
+}
+
+static void do_vdc_request(struct request_queue *q)
+{
+	while (1) {
+		struct request *req = blk_fetch_request(q);
+
+		if (!req)
+			break;
+
+		if (__send_request(req) < 0)
+			__blk_end_request_all(req, -EIO);
+	}
+}
+
+static int generic_request(struct vdc_port *port, u8 op, void *buf, int len)
+{
+	struct vio_dring_state *dr;
+	struct vio_completion comp;
+	struct vio_disk_desc *desc;
+	unsigned int map_perm;
+	unsigned long flags;
+	int op_len, err;
+	void *req_buf;
+
+	if (!(((u64)1 << ((u64)op - 1)) & port->operations))
+		return -EOPNOTSUPP;
+
+	switch (op) {
+	case VD_OP_BREAD:
+	case VD_OP_BWRITE:
+	default:
+		return -EINVAL;
+
+	case VD_OP_FLUSH:
+		op_len = 0;
+		map_perm = 0;
+		break;
+
+	case VD_OP_GET_WCE:
+		op_len = sizeof(u32);
+		map_perm = LDC_MAP_W;
+		break;
+
+	case VD_OP_SET_WCE:
+		op_len = sizeof(u32);
+		map_perm = LDC_MAP_R;
+		break;
+
+	case VD_OP_GET_VTOC:
+		op_len = sizeof(struct vio_disk_vtoc);
+		map_perm = LDC_MAP_W;
+		break;
+
+	case VD_OP_SET_VTOC:
+		op_len = sizeof(struct vio_disk_vtoc);
+		map_perm = LDC_MAP_R;
+		break;
+
+	case VD_OP_GET_DISKGEOM:
+		op_len = sizeof(struct vio_disk_geom);
+		map_perm = LDC_MAP_W;
+		break;
+
+	case VD_OP_SET_DISKGEOM:
+		op_len = sizeof(struct vio_disk_geom);
+		map_perm = LDC_MAP_R;
+		break;
+
+	case VD_OP_SCSICMD:
+		op_len = 16;
+		map_perm = LDC_MAP_RW;
+		break;
+
+	case VD_OP_GET_DEVID:
+		op_len = sizeof(struct vio_disk_devid);
+		map_perm = LDC_MAP_W;
+		break;
+
+	case VD_OP_GET_EFI:
+	case VD_OP_SET_EFI:
+		return -EOPNOTSUPP;
+		break;
+	};
+
+	map_perm |= LDC_MAP_SHADOW | LDC_MAP_DIRECT | LDC_MAP_IO;
+
+	op_len = (op_len + 7) & ~7;
+	req_buf = kzalloc(op_len, GFP_KERNEL);
+	if (!req_buf)
+		return -ENOMEM;
+
+	if (len > op_len)
+		len = op_len;
+
+	if (map_perm & LDC_MAP_R)
+		memcpy(req_buf, buf, len);
+
+	spin_lock_irqsave(&port->vio.lock, flags);
+
+	dr = &port->vio.drings[VIO_DRIVER_TX_RING];
+
+	/* XXX If we want to use this code generically we have to
+	 * XXX handle TX ring exhaustion etc.
+	 */
+	desc = vio_dring_cur(dr);
+
+	err = ldc_map_single(port->vio.lp, req_buf, op_len,
+			     desc->cookies, port->ring_cookies,
+			     map_perm);
+	if (err < 0) {
+		spin_unlock_irqrestore(&port->vio.lock, flags);
+		kfree(req_buf);
+		return err;
+	}
+
+	init_completion(&comp.com);
+	comp.waiting_for = WAITING_FOR_GEN_CMD;
+	port->vio.cmp = &comp;
+
+	desc->hdr.ack = VIO_ACK_ENABLE;
+	desc->req_id = port->req_id;
+	desc->operation = op;
+	desc->slice = 0;
+	desc->status = ~0;
+	desc->offset = 0;
+	desc->size = op_len;
+	desc->ncookies = err;
+
+	/* This has to be a non-SMP write barrier because we are writing
+	 * to memory which is shared with the peer LDOM.
+	 */
+	wmb();
+	desc->hdr.state = VIO_DESC_READY;
+
+	err = __vdc_tx_trigger(port);
+	if (err >= 0) {
+		port->req_id++;
+		dr->prod = (dr->prod + 1) & (VDC_TX_RING_SIZE - 1);
+		spin_unlock_irqrestore(&port->vio.lock, flags);
+
+		wait_for_completion(&comp.com);
+		err = comp.err;
+	} else {
+		port->vio.cmp = NULL;
+		spin_unlock_irqrestore(&port->vio.lock, flags);
+	}
+
+	if (map_perm & LDC_MAP_W)
+		memcpy(buf, req_buf, len);
+
+	kfree(req_buf);
+
+	return err;
+}
+
+static int __devinit vdc_alloc_tx_ring(struct vdc_port *port)
+{
+	struct vio_dring_state *dr = &port->vio.drings[VIO_DRIVER_TX_RING];
+	unsigned long len, entry_size;
+	int ncookies;
+	void *dring;
+
+	entry_size = sizeof(struct vio_disk_desc) +
+		(sizeof(struct ldc_trans_cookie) * port->ring_cookies);
+	len = (VDC_TX_RING_SIZE * entry_size);
+
+	ncookies = VIO_MAX_RING_COOKIES;
+	dring = ldc_alloc_exp_dring(port->vio.lp, len,
+				    dr->cookies, &ncookies,
+				    (LDC_MAP_SHADOW |
+				     LDC_MAP_DIRECT |
+				     LDC_MAP_RW));
+	if (IS_ERR(dring))
+		return PTR_ERR(dring);
+
+	dr->base = dring;
+	dr->entry_size = entry_size;
+	dr->num_entries = VDC_TX_RING_SIZE;
+	dr->prod = dr->cons = 0;
+	dr->pending = VDC_TX_RING_SIZE;
+	dr->ncookies = ncookies;
+
+	return 0;
+}
+
+static void vdc_free_tx_ring(struct vdc_port *port)
+{
+	struct vio_dring_state *dr = &port->vio.drings[VIO_DRIVER_TX_RING];
+
+	if (dr->base) {
+		ldc_free_exp_dring(port->vio.lp, dr->base,
+				   (dr->entry_size * dr->num_entries),
+				   dr->cookies, dr->ncookies);
+		dr->base = NULL;
+		dr->entry_size = 0;
+		dr->num_entries = 0;
+		dr->pending = 0;
+		dr->ncookies = 0;
+	}
+}
+
+static int probe_disk(struct vdc_port *port)
+{
+	struct vio_completion comp;
+	struct request_queue *q;
+	struct gendisk *g;
+	int err;
+
+	init_completion(&comp.com);
+	comp.err = 0;
+	comp.waiting_for = WAITING_FOR_LINK_UP;
+	port->vio.cmp = &comp;
+
+	vio_port_up(&port->vio);
+
+	wait_for_completion(&comp.com);
+	if (comp.err)
+		return comp.err;
+
+	err = generic_request(port, VD_OP_GET_VTOC,
+			      &port->label, sizeof(port->label));
+	if (err < 0) {
+		printk(KERN_ERR PFX "VD_OP_GET_VTOC returns error %d\n", err);
+		return err;
+	}
+
+	err = generic_request(port, VD_OP_GET_DISKGEOM,
+			      &port->geom, sizeof(port->geom));
+	if (err < 0) {
+		printk(KERN_ERR PFX "VD_OP_GET_DISKGEOM returns "
+		       "error %d\n", err);
+		return err;
+	}
+
+	port->vdisk_size = ((u64)port->geom.num_cyl *
+			    (u64)port->geom.num_hd *
+			    (u64)port->geom.num_sec);
+
+	q = blk_init_queue(do_vdc_request, &port->vio.lock);
+	if (!q) {
+		printk(KERN_ERR PFX "%s: Could not allocate queue.\n",
+		       port->vio.name);
+		return -ENOMEM;
+	}
+	g = alloc_disk(1 << PARTITION_SHIFT);
+	if (!g) {
+		printk(KERN_ERR PFX "%s: Could not allocate gendisk.\n",
+		       port->vio.name);
+		blk_cleanup_queue(q);
+		return -ENOMEM;
+	}
+
+	port->disk = g;
+
+	blk_queue_max_segments(q, port->ring_cookies);
+	blk_queue_max_hw_sectors(q, port->max_xfer_size);
+	g->major = vdc_major;
+	g->first_minor = port->vio.vdev->dev_no << PARTITION_SHIFT;
+	strcpy(g->disk_name, port->disk_name);
+
+	g->fops = &vdc_fops;
+	g->queue = q;
+	g->private_data = port;
+	g->driverfs_dev = &port->vio.vdev->dev;
+
+	set_capacity(g, port->vdisk_size);
+
+	printk(KERN_INFO PFX "%s: %u sectors (%u MB)\n",
+	       g->disk_name,
+	       port->vdisk_size, (port->vdisk_size >> (20 - 9)));
+
+	add_disk(g);
+
+	return 0;
+}
+
+static struct ldc_channel_config vdc_ldc_cfg = {
+	.event		= vdc_event,
+	.mtu		= 64,
+	.mode		= LDC_MODE_UNRELIABLE,
+};
+
+static struct vio_driver_ops vdc_vio_ops = {
+	.send_attr		= vdc_send_attr,
+	.handle_attr		= vdc_handle_attr,
+	.handshake_complete	= vdc_handshake_complete,
+};
+
+static void __devinit print_version(void)
+{
+	static int version_printed;
+
+	if (version_printed++ == 0)
+		printk(KERN_INFO "%s", version);
+}
+
+static int __devinit vdc_port_probe(struct vio_dev *vdev,
+				    const struct vio_device_id *id)
+{
+	struct mdesc_handle *hp;
+	struct vdc_port *port;
+	int err;
+
+	print_version();
+
+	hp = mdesc_grab();
+
+	err = -ENODEV;
+	if ((vdev->dev_no << PARTITION_SHIFT) & ~(u64)MINORMASK) {
+		printk(KERN_ERR PFX "Port id [%llu] too large.\n",
+		       vdev->dev_no);
+		goto err_out_release_mdesc;
+	}
+
+	port = kzalloc(sizeof(*port), GFP_KERNEL);
+	err = -ENOMEM;
+	if (!port) {
+		printk(KERN_ERR PFX "Cannot allocate vdc_port.\n");
+		goto err_out_release_mdesc;
+	}
+
+	if (vdev->dev_no >= 26)
+		snprintf(port->disk_name, sizeof(port->disk_name),
+			 VDCBLK_NAME "%c%c",
+			 'a' + ((int)vdev->dev_no / 26) - 1,
+			 'a' + ((int)vdev->dev_no % 26));
+	else
+		snprintf(port->disk_name, sizeof(port->disk_name),
+			 VDCBLK_NAME "%c", 'a' + ((int)vdev->dev_no % 26));
+
+	err = vio_driver_init(&port->vio, vdev, VDEV_DISK,
+			      vdc_versions, ARRAY_SIZE(vdc_versions),
+			      &vdc_vio_ops, port->disk_name);
+	if (err)
+		goto err_out_free_port;
+
+	port->vdisk_block_size = 512;
+	port->max_xfer_size = ((128 * 1024) / port->vdisk_block_size);
+	port->ring_cookies = ((port->max_xfer_size *
+			       port->vdisk_block_size) / PAGE_SIZE) + 2;
+
+	err = vio_ldc_alloc(&port->vio, &vdc_ldc_cfg, port);
+	if (err)
+		goto err_out_free_port;
+
+	err = vdc_alloc_tx_ring(port);
+	if (err)
+		goto err_out_free_ldc;
+
+	err = probe_disk(port);
+	if (err)
+		goto err_out_free_tx_ring;
+
+	dev_set_drvdata(&vdev->dev, port);
+
+	mdesc_release(hp);
+
+	return 0;
+
+err_out_free_tx_ring:
+	vdc_free_tx_ring(port);
+
+err_out_free_ldc:
+	vio_ldc_free(&port->vio);
+
+err_out_free_port:
+	kfree(port);
+
+err_out_release_mdesc:
+	mdesc_release(hp);
+	return err;
+}
+
+static int vdc_port_remove(struct vio_dev *vdev)
+{
+	struct vdc_port *port = dev_get_drvdata(&vdev->dev);
+
+	if (port) {
+		del_timer_sync(&port->vio.timer);
+
+		vdc_free_tx_ring(port);
+		vio_ldc_free(&port->vio);
+
+		dev_set_drvdata(&vdev->dev, NULL);
+
+		kfree(port);
+	}
+	return 0;
+}
+
+static const struct vio_device_id vdc_port_match[] = {
+	{
+		.type = "vdc-port",
+	},
+	{},
+};
+MODULE_DEVICE_TABLE(vio, vdc_port_match);
+
+static struct vio_driver vdc_port_driver = {
+	.id_table	= vdc_port_match,
+	.probe		= vdc_port_probe,
+	.remove		= vdc_port_remove,
+	.driver		= {
+		.name	= "vdc_port",
+		.owner	= THIS_MODULE,
+	}
+};
+
+static int __init vdc_init(void)
+{
+	int err;
+
+	err = register_blkdev(0, VDCBLK_NAME);
+	if (err < 0)
+		goto out_err;
+
+	vdc_major = err;
+
+	err = vio_register_driver(&vdc_port_driver);
+	if (err)
+		goto out_unregister_blkdev;
+
+	return 0;
+
+out_unregister_blkdev:
+	unregister_blkdev(vdc_major, VDCBLK_NAME);
+	vdc_major = 0;
+
+out_err:
+	return err;
+}
+
+static void __exit vdc_exit(void)
+{
+	vio_unregister_driver(&vdc_port_driver);
+	unregister_blkdev(vdc_major, VDCBLK_NAME);
+}
+
+module_init(vdc_init);
+module_exit(vdc_exit);
diff --git a/drivers/block/swim.c b/drivers/block/swim.c
new file mode 100644
index 00000000..fd5adcd5
--- /dev/null
+++ b/drivers/block/swim.c
@@ -0,0 +1,1000 @@
+/*
+ * Driver for SWIM (Sander Woz Integrated Machine) floppy controller
+ *
+ * Copyright (C) 2004,2008 Laurent Vivier <Laurent@lvivier.info>
+ *
+ * based on Alastair Bridgewater SWIM analysis, 2001
+ * based on SWIM3 driver (c) Paul Mackerras, 1996
+ * based on netBSD IWM driver (c) 1997, 1998 Hauke Fath.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * 2004-08-21 (lv) - Initial implementation
+ * 2008-10-30 (lv) - Port to 2.6
+ */
+
+#include <linux/module.h>
+#include <linux/fd.h>
+#include <linux/slab.h>
+#include <linux/blkdev.h>
+#include <linux/mutex.h>
+#include <linux/hdreg.h>
+#include <linux/kernel.h>
+#include <linux/delay.h>
+#include <linux/platform_device.h>
+
+#include <asm/macintosh.h>
+#include <asm/mac_via.h>
+
+#define CARDNAME "swim"
+
+struct sector_header {
+	unsigned char side;
+	unsigned char track;
+	unsigned char sector;
+	unsigned char size;
+	unsigned char crc0;
+	unsigned char crc1;
+} __attribute__((packed));
+
+#define DRIVER_VERSION "Version 0.2 (2008-10-30)"
+
+#define REG(x)	unsigned char x, x ## _pad[0x200 - 1];
+
+struct swim {
+	REG(write_data)
+	REG(write_mark)
+	REG(write_CRC)
+	REG(write_parameter)
+	REG(write_phase)
+	REG(write_setup)
+	REG(write_mode0)
+	REG(write_mode1)
+
+	REG(read_data)
+	REG(read_mark)
+	REG(read_error)
+	REG(read_parameter)
+	REG(read_phase)
+	REG(read_setup)
+	REG(read_status)
+	REG(read_handshake)
+} __attribute__((packed));
+
+#define swim_write(base, reg, v) 	out_8(&(base)->write_##reg, (v))
+#define swim_read(base, reg)		in_8(&(base)->read_##reg)
+
+/* IWM registers */
+
+struct iwm {
+	REG(ph0L)
+	REG(ph0H)
+	REG(ph1L)
+	REG(ph1H)
+	REG(ph2L)
+	REG(ph2H)
+	REG(ph3L)
+	REG(ph3H)
+	REG(mtrOff)
+	REG(mtrOn)
+	REG(intDrive)
+	REG(extDrive)
+	REG(q6L)
+	REG(q6H)
+	REG(q7L)
+	REG(q7H)
+} __attribute__((packed));
+
+#define iwm_write(base, reg, v) 	out_8(&(base)->reg, (v))
+#define iwm_read(base, reg)		in_8(&(base)->reg)
+
+/* bits in phase register */
+
+#define SEEK_POSITIVE	0x070
+#define SEEK_NEGATIVE	0x074
+#define STEP		0x071
+#define MOTOR_ON	0x072
+#define MOTOR_OFF	0x076
+#define INDEX		0x073
+#define EJECT		0x077
+#define SETMFM		0x171
+#define SETGCR		0x175
+
+#define RELAX		0x033
+#define LSTRB		0x008
+
+#define CA_MASK		0x077
+
+/* Select values for swim_select and swim_readbit */
+
+#define READ_DATA_0	0x074
+#define TWOMEG_DRIVE	0x075
+#define SINGLE_SIDED	0x076
+#define DRIVE_PRESENT	0x077
+#define DISK_IN		0x170
+#define WRITE_PROT	0x171
+#define TRACK_ZERO	0x172
+#define TACHO		0x173
+#define READ_DATA_1	0x174
+#define MFM_MODE	0x175
+#define SEEK_COMPLETE	0x176
+#define ONEMEG_MEDIA	0x177
+
+/* Bits in handshake register */
+
+#define MARK_BYTE	0x01
+#define CRC_ZERO	0x02
+#define RDDATA		0x04
+#define SENSE		0x08
+#define MOTEN		0x10
+#define ERROR		0x20
+#define DAT2BYTE	0x40
+#define DAT1BYTE	0x80
+
+/* bits in setup register */
+
+#define S_INV_WDATA	0x01
+#define S_3_5_SELECT	0x02
+#define S_GCR		0x04
+#define S_FCLK_DIV2	0x08
+#define S_ERROR_CORR	0x10
+#define S_IBM_DRIVE	0x20
+#define S_GCR_WRITE	0x40
+#define S_TIMEOUT	0x80
+
+/* bits in mode register */
+
+#define CLFIFO		0x01
+#define ENBL1		0x02
+#define ENBL2		0x04
+#define ACTION		0x08
+#define WRITE_MODE	0x10
+#define HEDSEL		0x20
+#define MOTON		0x80
+
+/*----------------------------------------------------------------------------*/
+
+enum drive_location {
+	INTERNAL_DRIVE = 0x02,
+	EXTERNAL_DRIVE = 0x04,
+};
+
+enum media_type {
+	DD_MEDIA,
+	HD_MEDIA,
+};
+
+struct floppy_state {
+
+	/* physical properties */
+
+	enum drive_location location;	/* internal or external drive */
+	int		 head_number;	/* single- or double-sided drive */
+
+	/* media */
+
+	int		 disk_in;
+	int		 ejected;
+	enum media_type	 type;
+	int		 write_protected;
+
+	int		 total_secs;
+	int		 secpercyl;
+	int		 secpertrack;
+
+	/* in-use information */
+
+	int		track;
+	int		ref_count;
+
+	struct gendisk *disk;
+
+	/* parent controller */
+
+	struct swim_priv *swd;
+};
+
+enum motor_action {
+	OFF,
+	ON,
+};
+
+enum head {
+	LOWER_HEAD = 0,
+	UPPER_HEAD = 1,
+};
+
+#define FD_MAX_UNIT	2
+
+struct swim_priv {
+	struct swim __iomem *base;
+	spinlock_t lock;
+	struct request_queue *queue;
+	int floppy_count;
+	struct floppy_state unit[FD_MAX_UNIT];
+};
+
+extern int swim_read_sector_header(struct swim __iomem *base,
+				   struct sector_header *header);
+extern int swim_read_sector_data(struct swim __iomem *base,
+				 unsigned char *data);
+
+static DEFINE_MUTEX(swim_mutex);
+static inline void set_swim_mode(struct swim __iomem *base, int enable)
+{
+	struct iwm __iomem *iwm_base;
+	unsigned long flags;
+
+	if (!enable) {
+		swim_write(base, mode0, 0xf8);
+		return;
+	}
+
+	iwm_base = (struct iwm __iomem *)base;
+	local_irq_save(flags);
+
+	iwm_read(iwm_base, q7L);
+	iwm_read(iwm_base, mtrOff);
+	iwm_read(iwm_base, q6H);
+
+	iwm_write(iwm_base, q7H, 0x57);
+	iwm_write(iwm_base, q7H, 0x17);
+	iwm_write(iwm_base, q7H, 0x57);
+	iwm_write(iwm_base, q7H, 0x57);
+
+	local_irq_restore(flags);
+}
+
+static inline int get_swim_mode(struct swim __iomem *base)
+{
+	unsigned long flags;
+
+	local_irq_save(flags);
+
+	swim_write(base, phase, 0xf5);
+	if (swim_read(base, phase) != 0xf5)
+		goto is_iwm;
+	swim_write(base, phase, 0xf6);
+	if (swim_read(base, phase) != 0xf6)
+		goto is_iwm;
+	swim_write(base, phase, 0xf7);
+	if (swim_read(base, phase) != 0xf7)
+		goto is_iwm;
+	local_irq_restore(flags);
+	return 1;
+is_iwm:
+	local_irq_restore(flags);
+	return 0;
+}
+
+static inline void swim_select(struct swim __iomem *base, int sel)
+{
+	swim_write(base, phase, RELAX);
+
+	via1_set_head(sel & 0x100);
+
+	swim_write(base, phase, sel & CA_MASK);
+}
+
+static inline void swim_action(struct swim __iomem *base, int action)
+{
+	unsigned long flags;
+
+	local_irq_save(flags);
+
+	swim_select(base, action);
+	udelay(1);
+	swim_write(base, phase, (LSTRB<<4) | LSTRB);
+	udelay(1);
+	swim_write(base, phase, (LSTRB<<4) | ((~LSTRB) & 0x0F));
+	udelay(1);
+
+	local_irq_restore(flags);
+}
+
+static inline int swim_readbit(struct swim __iomem *base, int bit)
+{
+	int stat;
+
+	swim_select(base, bit);
+
+	udelay(10);
+
+	stat = swim_read(base, handshake);
+
+	return (stat & SENSE) == 0;
+}
+
+static inline void swim_drive(struct swim __iomem *base,
+			      enum drive_location location)
+{
+	if (location == INTERNAL_DRIVE) {
+		swim_write(base, mode0, EXTERNAL_DRIVE); /* clear drive 1 bit */
+		swim_write(base, mode1, INTERNAL_DRIVE); /* set drive 0 bit */
+	} else if (location == EXTERNAL_DRIVE) {
+		swim_write(base, mode0, INTERNAL_DRIVE); /* clear drive 0 bit */
+		swim_write(base, mode1, EXTERNAL_DRIVE); /* set drive 1 bit */
+	}
+}
+
+static inline void swim_motor(struct swim __iomem *base,
+			      enum motor_action action)
+{
+	if (action == ON) {
+		int i;
+
+		swim_action(base, MOTOR_ON);
+
+		for (i = 0; i < 2*HZ; i++) {
+			swim_select(base, RELAX);
+			if (swim_readbit(base, MOTOR_ON))
+				break;
+			current->state = TASK_INTERRUPTIBLE;
+			schedule_timeout(1);
+		}
+	} else if (action == OFF) {
+		swim_action(base, MOTOR_OFF);
+		swim_select(base, RELAX);
+	}
+}
+
+static inline void swim_eject(struct swim __iomem *base)
+{
+	int i;
+
+	swim_action(base, EJECT);
+
+	for (i = 0; i < 2*HZ; i++) {
+		swim_select(base, RELAX);
+		if (!swim_readbit(base, DISK_IN))
+			break;
+		current->state = TASK_INTERRUPTIBLE;
+		schedule_timeout(1);
+	}
+	swim_select(base, RELAX);
+}
+
+static inline void swim_head(struct swim __iomem *base, enum head head)
+{
+	/* wait drive is ready */
+
+	if (head == UPPER_HEAD)
+		swim_select(base, READ_DATA_1);
+	else if (head == LOWER_HEAD)
+		swim_select(base, READ_DATA_0);
+}
+
+static inline int swim_step(struct swim __iomem *base)
+{
+	int wait;
+
+	swim_action(base, STEP);
+
+	for (wait = 0; wait < HZ; wait++) {
+
+		current->state = TASK_INTERRUPTIBLE;
+		schedule_timeout(1);
+
+		swim_select(base, RELAX);
+		if (!swim_readbit(base, STEP))
+			return 0;
+	}
+	return -1;
+}
+
+static inline int swim_track00(struct swim __iomem *base)
+{
+	int try;
+
+	swim_action(base, SEEK_NEGATIVE);
+
+	for (try = 0; try < 100; try++) {
+
+		swim_select(base, RELAX);
+		if (swim_readbit(base, TRACK_ZERO))
+			break;
+
+		if (swim_step(base))
+			return -1;
+	}
+
+	if (swim_readbit(base, TRACK_ZERO))
+		return 0;
+
+	return -1;
+}
+
+static inline int swim_seek(struct swim __iomem *base, int step)
+{
+	if (step == 0)
+		return 0;
+
+	if (step < 0) {
+		swim_action(base, SEEK_NEGATIVE);
+		step = -step;
+	} else
+		swim_action(base, SEEK_POSITIVE);
+
+	for ( ; step > 0; step--) {
+		if (swim_step(base))
+			return -1;
+	}
+
+	return 0;
+}
+
+static inline int swim_track(struct floppy_state *fs,  int track)
+{
+	struct swim __iomem *base = fs->swd->base;
+	int ret;
+
+	ret = swim_seek(base, track - fs->track);
+
+	if (ret == 0)
+		fs->track = track;
+	else {
+		swim_track00(base);
+		fs->track = 0;
+	}
+
+	return ret;
+}
+
+static int floppy_eject(struct floppy_state *fs)
+{
+	struct swim __iomem *base = fs->swd->base;
+
+	swim_drive(base, fs->location);
+	swim_motor(base, OFF);
+	swim_eject(base);
+
+	fs->disk_in = 0;
+	fs->ejected = 1;
+
+	return 0;
+}
+
+static inline int swim_read_sector(struct floppy_state *fs,
+				   int side, int track,
+				   int sector, unsigned char *buffer)
+{
+	struct swim __iomem *base = fs->swd->base;
+	unsigned long flags;
+	struct sector_header header;
+	int ret = -1;
+	short i;
+
+	swim_track(fs, track);
+
+	swim_write(base, mode1, MOTON);
+	swim_head(base, side);
+	swim_write(base, mode0, side);
+
+	local_irq_save(flags);
+	for (i = 0; i < 36; i++) {
+		ret = swim_read_sector_header(base, &header);
+		if (!ret && (header.sector == sector)) {
+			/* found */
+
+			ret = swim_read_sector_data(base, buffer);
+			break;
+		}
+	}
+	local_irq_restore(flags);
+
+	swim_write(base, mode0, MOTON);
+
+	if ((header.side != side)  || (header.track != track) ||
+	     (header.sector != sector))
+		return 0;
+
+	return ret;
+}
+
+static int floppy_read_sectors(struct floppy_state *fs,
+			       int req_sector, int sectors_nb,
+			       unsigned char *buffer)
+{
+	struct swim __iomem *base = fs->swd->base;
+	int ret;
+	int side, track, sector;
+	int i, try;
+
+
+	swim_drive(base, fs->location);
+	for (i = req_sector; i < req_sector + sectors_nb; i++) {
+		int x;
+		track = i / fs->secpercyl;
+		x = i % fs->secpercyl;
+		side = x / fs->secpertrack;
+		sector = x % fs->secpertrack + 1;
+
+		try = 5;
+		do {
+			ret = swim_read_sector(fs, side, track, sector,
+						buffer);
+			if (try-- == 0)
+				return -EIO;
+		} while (ret != 512);
+
+		buffer += ret;
+	}
+
+	return 0;
+}
+
+static void redo_fd_request(struct request_queue *q)
+{
+	struct request *req;
+	struct floppy_state *fs;
+
+	req = blk_fetch_request(q);
+	while (req) {
+		int err = -EIO;
+
+		fs = req->rq_disk->private_data;
+		if (blk_rq_pos(req) >= fs->total_secs)
+			goto done;
+		if (!fs->disk_in)
+			goto done;
+		if (rq_data_dir(req) == WRITE && fs->write_protected)
+			goto done;
+
+		switch (rq_data_dir(req)) {
+		case WRITE:
+			/* NOT IMPLEMENTED */
+			break;
+		case READ:
+			err = floppy_read_sectors(fs, blk_rq_pos(req),
+						  blk_rq_cur_sectors(req),
+						  req->buffer);
+			break;
+		}
+	done:
+		if (!__blk_end_request_cur(req, err))
+			req = blk_fetch_request(q);
+	}
+}
+
+static void do_fd_request(struct request_queue *q)
+{
+	redo_fd_request(q);
+}
+
+static struct floppy_struct floppy_type[4] = {
+	{    0,  0, 0,  0, 0, 0x00, 0x00, 0x00, 0x00, NULL }, /* no testing   */
+	{  720,  9, 1, 80, 0, 0x2A, 0x02, 0xDF, 0x50, NULL }, /* 360KB SS 3.5"*/
+	{ 1440,  9, 2, 80, 0, 0x2A, 0x02, 0xDF, 0x50, NULL }, /* 720KB 3.5"   */
+	{ 2880, 18, 2, 80, 0, 0x1B, 0x00, 0xCF, 0x6C, NULL }, /* 1.44MB 3.5"  */
+};
+
+static int get_floppy_geometry(struct floppy_state *fs, int type,
+			       struct floppy_struct **g)
+{
+	if (type >= ARRAY_SIZE(floppy_type))
+		return -EINVAL;
+
+	if (type)
+		*g = &floppy_type[type];
+	else if (fs->type == HD_MEDIA) /* High-Density media */
+		*g = &floppy_type[3];
+	else if (fs->head_number == 2) /* double-sided */
+		*g = &floppy_type[2];
+	else
+		*g = &floppy_type[1];
+
+	return 0;
+}
+
+static void setup_medium(struct floppy_state *fs)
+{
+	struct swim __iomem *base = fs->swd->base;
+
+	if (swim_readbit(base, DISK_IN)) {
+		struct floppy_struct *g;
+		fs->disk_in = 1;
+		fs->write_protected = swim_readbit(base, WRITE_PROT);
+		fs->type = swim_readbit(base, ONEMEG_MEDIA);
+
+		if (swim_track00(base))
+			printk(KERN_ERR
+				"SWIM: cannot move floppy head to track 0\n");
+
+		swim_track00(base);
+
+		get_floppy_geometry(fs, 0, &g);
+		fs->total_secs = g->size;
+		fs->secpercyl = g->head * g->sect;
+		fs->secpertrack = g->sect;
+		fs->track = 0;
+	} else {
+		fs->disk_in = 0;
+	}
+}
+
+static int floppy_open(struct block_device *bdev, fmode_t mode)
+{
+	struct floppy_state *fs = bdev->bd_disk->private_data;
+	struct swim __iomem *base = fs->swd->base;
+	int err;
+
+	if (fs->ref_count == -1 || (fs->ref_count && mode & FMODE_EXCL))
+		return -EBUSY;
+
+	if (mode & FMODE_EXCL)
+		fs->ref_count = -1;
+	else
+		fs->ref_count++;
+
+	swim_write(base, setup, S_IBM_DRIVE  | S_FCLK_DIV2);
+	udelay(10);
+	swim_drive(base, INTERNAL_DRIVE);
+	swim_motor(base, ON);
+	swim_action(base, SETMFM);
+	if (fs->ejected)
+		setup_medium(fs);
+	if (!fs->disk_in) {
+		err = -ENXIO;
+		goto out;
+	}
+
+	if (mode & FMODE_NDELAY)
+		return 0;
+
+	if (mode & (FMODE_READ|FMODE_WRITE)) {
+		check_disk_change(bdev);
+		if ((mode & FMODE_WRITE) && fs->write_protected) {
+			err = -EROFS;
+			goto out;
+		}
+	}
+	return 0;
+out:
+	if (fs->ref_count < 0)
+		fs->ref_count = 0;
+	else if (fs->ref_count > 0)
+		--fs->ref_count;
+
+	if (fs->ref_count == 0)
+		swim_motor(base, OFF);
+	return err;
+}
+
+static int floppy_unlocked_open(struct block_device *bdev, fmode_t mode)
+{
+	int ret;
+
+	mutex_lock(&swim_mutex);
+	ret = floppy_open(bdev, mode);
+	mutex_unlock(&swim_mutex);
+
+	return ret;
+}
+
+static int floppy_release(struct gendisk *disk, fmode_t mode)
+{
+	struct floppy_state *fs = disk->private_data;
+	struct swim __iomem *base = fs->swd->base;
+
+	mutex_lock(&swim_mutex);
+	if (fs->ref_count < 0)
+		fs->ref_count = 0;
+	else if (fs->ref_count > 0)
+		--fs->ref_count;
+
+	if (fs->ref_count == 0)
+		swim_motor(base, OFF);
+	mutex_unlock(&swim_mutex);
+
+	return 0;
+}
+
+static int floppy_ioctl(struct block_device *bdev, fmode_t mode,
+			unsigned int cmd, unsigned long param)
+{
+	struct floppy_state *fs = bdev->bd_disk->private_data;
+	int err;
+
+	if ((cmd & 0x80) && !capable(CAP_SYS_ADMIN))
+			return -EPERM;
+
+	switch (cmd) {
+	case FDEJECT:
+		if (fs->ref_count != 1)
+			return -EBUSY;
+		mutex_lock(&swim_mutex);
+		err = floppy_eject(fs);
+		mutex_unlock(&swim_mutex);
+		return err;
+
+	case FDGETPRM:
+		if (copy_to_user((void __user *) param, (void *) &floppy_type,
+				 sizeof(struct floppy_struct)))
+			return -EFAULT;
+		break;
+
+	default:
+		printk(KERN_DEBUG "SWIM floppy_ioctl: unknown cmd %d\n",
+		       cmd);
+		return -ENOSYS;
+	}
+	return 0;
+}
+
+static int floppy_getgeo(struct block_device *bdev, struct hd_geometry *geo)
+{
+	struct floppy_state *fs = bdev->bd_disk->private_data;
+	struct floppy_struct *g;
+	int ret;
+
+	ret = get_floppy_geometry(fs, 0, &g);
+	if (ret)
+		return ret;
+
+	geo->heads = g->head;
+	geo->sectors = g->sect;
+	geo->cylinders = g->track;
+
+	return 0;
+}
+
+static unsigned int floppy_check_events(struct gendisk *disk,
+					unsigned int clearing)
+{
+	struct floppy_state *fs = disk->private_data;
+
+	return fs->ejected ? DISK_EVENT_MEDIA_CHANGE : 0;
+}
+
+static int floppy_revalidate(struct gendisk *disk)
+{
+	struct floppy_state *fs = disk->private_data;
+	struct swim __iomem *base = fs->swd->base;
+
+	swim_drive(base, fs->location);
+
+	if (fs->ejected)
+		setup_medium(fs);
+
+	if (!fs->disk_in)
+		swim_motor(base, OFF);
+	else
+		fs->ejected = 0;
+
+	return !fs->disk_in;
+}
+
+static const struct block_device_operations floppy_fops = {
+	.owner		 = THIS_MODULE,
+	.open		 = floppy_unlocked_open,
+	.release	 = floppy_release,
+	.ioctl		 = floppy_ioctl,
+	.getgeo		 = floppy_getgeo,
+	.check_events	 = floppy_check_events,
+	.revalidate_disk = floppy_revalidate,
+};
+
+static struct kobject *floppy_find(dev_t dev, int *part, void *data)
+{
+	struct swim_priv *swd = data;
+	int drive = (*part & 3);
+
+	if (drive > swd->floppy_count)
+		return NULL;
+
+	*part = 0;
+	return get_disk(swd->unit[drive].disk);
+}
+
+static int __devinit swim_add_floppy(struct swim_priv *swd,
+				     enum drive_location location)
+{
+	struct floppy_state *fs = &swd->unit[swd->floppy_count];
+	struct swim __iomem *base = swd->base;
+
+	fs->location = location;
+
+	swim_drive(base, location);
+
+	swim_motor(base, OFF);
+
+	if (swim_readbit(base, SINGLE_SIDED))
+		fs->head_number = 1;
+	else
+		fs->head_number = 2;
+	fs->ref_count = 0;
+	fs->ejected = 1;
+
+	swd->floppy_count++;
+
+	return 0;
+}
+
+static int __devinit swim_floppy_init(struct swim_priv *swd)
+{
+	int err;
+	int drive;
+	struct swim __iomem *base = swd->base;
+
+	/* scan floppy drives */
+
+	swim_drive(base, INTERNAL_DRIVE);
+	if (swim_readbit(base, DRIVE_PRESENT))
+		swim_add_floppy(swd, INTERNAL_DRIVE);
+	swim_drive(base, EXTERNAL_DRIVE);
+	if (swim_readbit(base, DRIVE_PRESENT))
+		swim_add_floppy(swd, EXTERNAL_DRIVE);
+
+	/* register floppy drives */
+
+	err = register_blkdev(FLOPPY_MAJOR, "fd");
+	if (err) {
+		printk(KERN_ERR "Unable to get major %d for SWIM floppy\n",
+		       FLOPPY_MAJOR);
+		return -EBUSY;
+	}
+
+	for (drive = 0; drive < swd->floppy_count; drive++) {
+		swd->unit[drive].disk = alloc_disk(1);
+		if (swd->unit[drive].disk == NULL) {
+			err = -ENOMEM;
+			goto exit_put_disks;
+		}
+		swd->unit[drive].swd = swd;
+	}
+
+	swd->queue = blk_init_queue(do_fd_request, &swd->lock);
+	if (!swd->queue) {
+		err = -ENOMEM;
+		goto exit_put_disks;
+	}
+
+	for (drive = 0; drive < swd->floppy_count; drive++) {
+		swd->unit[drive].disk->flags = GENHD_FL_REMOVABLE;
+		swd->unit[drive].disk->major = FLOPPY_MAJOR;
+		swd->unit[drive].disk->first_minor = drive;
+		sprintf(swd->unit[drive].disk->disk_name, "fd%d", drive);
+		swd->unit[drive].disk->fops = &floppy_fops;
+		swd->unit[drive].disk->private_data = &swd->unit[drive];
+		swd->unit[drive].disk->queue = swd->queue;
+		set_capacity(swd->unit[drive].disk, 2880);
+		add_disk(swd->unit[drive].disk);
+	}
+
+	blk_register_region(MKDEV(FLOPPY_MAJOR, 0), 256, THIS_MODULE,
+			    floppy_find, NULL, swd);
+
+	return 0;
+
+exit_put_disks:
+	unregister_blkdev(FLOPPY_MAJOR, "fd");
+	while (drive--)
+		put_disk(swd->unit[drive].disk);
+	return err;
+}
+
+static int __devinit swim_probe(struct platform_device *dev)
+{
+	struct resource *res;
+	struct swim __iomem *swim_base;
+	struct swim_priv *swd;
+	int ret;
+
+	res = platform_get_resource(dev, IORESOURCE_MEM, 0);
+	if (!res) {
+		ret = -ENODEV;
+		goto out;
+	}
+
+	if (!request_mem_region(res->start, resource_size(res), CARDNAME)) {
+		ret = -EBUSY;
+		goto out;
+	}
+
+	swim_base = ioremap(res->start, resource_size(res));
+	if (!swim_base) {
+		return -ENOMEM;
+		goto out_release_io;
+	}
+
+	/* probe device */
+
+	set_swim_mode(swim_base, 1);
+	if (!get_swim_mode(swim_base)) {
+		printk(KERN_INFO "SWIM device not found !\n");
+		ret = -ENODEV;
+		goto out_iounmap;
+	}
+
+	/* set platform driver data */
+
+	swd = kzalloc(sizeof(struct swim_priv), GFP_KERNEL);
+	if (!swd) {
+		ret = -ENOMEM;
+		goto out_iounmap;
+	}
+	platform_set_drvdata(dev, swd);
+
+	swd->base = swim_base;
+
+	ret = swim_floppy_init(swd);
+	if (ret)
+		goto out_kfree;
+
+	return 0;
+
+out_kfree:
+	platform_set_drvdata(dev, NULL);
+	kfree(swd);
+out_iounmap:
+	iounmap(swim_base);
+out_release_io:
+	release_mem_region(res->start, resource_size(res));
+out:
+	return ret;
+}
+
+static int __devexit swim_remove(struct platform_device *dev)
+{
+	struct swim_priv *swd = platform_get_drvdata(dev);
+	int drive;
+	struct resource *res;
+
+	blk_unregister_region(MKDEV(FLOPPY_MAJOR, 0), 256);
+
+	for (drive = 0; drive < swd->floppy_count; drive++) {
+		del_gendisk(swd->unit[drive].disk);
+		put_disk(swd->unit[drive].disk);
+	}
+
+	unregister_blkdev(FLOPPY_MAJOR, "fd");
+
+	blk_cleanup_queue(swd->queue);
+
+	/* eject floppies */
+
+	for (drive = 0; drive < swd->floppy_count; drive++)
+		floppy_eject(&swd->unit[drive]);
+
+	iounmap(swd->base);
+
+	res = platform_get_resource(dev, IORESOURCE_MEM, 0);
+	if (res)
+		release_mem_region(res->start, resource_size(res));
+
+	platform_set_drvdata(dev, NULL);
+	kfree(swd);
+
+	return 0;
+}
+
+static struct platform_driver swim_driver = {
+	.probe  = swim_probe,
+	.remove = __devexit_p(swim_remove),
+	.driver   = {
+		.name	= CARDNAME,
+		.owner	= THIS_MODULE,
+	},
+};
+
+static int __init swim_init(void)
+{
+	printk(KERN_INFO "SWIM floppy driver %s\n", DRIVER_VERSION);
+
+	return platform_driver_register(&swim_driver);
+}
+module_init(swim_init);
+
+static void __exit swim_exit(void)
+{
+	platform_driver_unregister(&swim_driver);
+}
+module_exit(swim_exit);
+
+MODULE_DESCRIPTION("Driver for SWIM floppy controller");
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Laurent Vivier <laurent@lvivier.info>");
+MODULE_ALIAS_BLOCKDEV_MAJOR(FLOPPY_MAJOR);
diff --git a/drivers/block/swim3.c b/drivers/block/swim3.c
new file mode 100644
index 00000000..773bfa79
--- /dev/null
+++ b/drivers/block/swim3.c
@@ -0,0 +1,1213 @@
+/*
+ * Driver for the SWIM3 (Super Woz Integrated Machine 3)
+ * floppy controller found on Power Macintoshes.
+ *
+ * Copyright (C) 1996 Paul Mackerras.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+/*
+ * TODO:
+ * handle 2 drives
+ * handle GCR disks
+ */
+
+#include <linux/stddef.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/timer.h>
+#include <linux/delay.h>
+#include <linux/fd.h>
+#include <linux/ioctl.h>
+#include <linux/blkdev.h>
+#include <linux/interrupt.h>
+#include <linux/mutex.h>
+#include <linux/module.h>
+#include <linux/spinlock.h>
+#include <asm/io.h>
+#include <asm/dbdma.h>
+#include <asm/prom.h>
+#include <asm/uaccess.h>
+#include <asm/mediabay.h>
+#include <asm/machdep.h>
+#include <asm/pmac_feature.h>
+
+static DEFINE_MUTEX(swim3_mutex);
+static struct request_queue *swim3_queue;
+static struct gendisk *disks[2];
+static struct request *fd_req;
+
+#define MAX_FLOPPIES	2
+
+enum swim_state {
+	idle,
+	locating,
+	seeking,
+	settling,
+	do_transfer,
+	jogging,
+	available,
+	revalidating,
+	ejecting
+};
+
+#define REG(x)	unsigned char x; char x ## _pad[15];
+
+/*
+ * The names for these registers mostly represent speculation on my part.
+ * It will be interesting to see how close they are to the names Apple uses.
+ */
+struct swim3 {
+	REG(data);
+	REG(timer);		/* counts down at 1MHz */
+	REG(error);
+	REG(mode);
+	REG(select);		/* controls CA0, CA1, CA2 and LSTRB signals */
+	REG(setup);
+	REG(control);		/* writing bits clears them */
+	REG(status);		/* writing bits sets them in control */
+	REG(intr);
+	REG(nseek);		/* # tracks to seek */
+	REG(ctrack);		/* current track number */
+	REG(csect);		/* current sector number */
+	REG(gap3);		/* size of gap 3 in track format */
+	REG(sector);		/* sector # to read or write */
+	REG(nsect);		/* # sectors to read or write */
+	REG(intr_enable);
+};
+
+#define control_bic	control
+#define control_bis	status
+
+/* Bits in select register */
+#define CA_MASK		7
+#define LSTRB		8
+
+/* Bits in control register */
+#define DO_SEEK		0x80
+#define FORMAT		0x40
+#define SELECT		0x20
+#define WRITE_SECTORS	0x10
+#define DO_ACTION	0x08
+#define DRIVE2_ENABLE	0x04
+#define DRIVE_ENABLE	0x02
+#define INTR_ENABLE	0x01
+
+/* Bits in status register */
+#define FIFO_1BYTE	0x80
+#define FIFO_2BYTE	0x40
+#define ERROR		0x20
+#define DATA		0x08
+#define RDDATA		0x04
+#define INTR_PENDING	0x02
+#define MARK_BYTE	0x01
+
+/* Bits in intr and intr_enable registers */
+#define ERROR_INTR	0x20
+#define DATA_CHANGED	0x10
+#define TRANSFER_DONE	0x08
+#define SEEN_SECTOR	0x04
+#define SEEK_DONE	0x02
+#define TIMER_DONE	0x01
+
+/* Bits in error register */
+#define ERR_DATA_CRC	0x80
+#define ERR_ADDR_CRC	0x40
+#define ERR_OVERRUN	0x04
+#define ERR_UNDERRUN	0x01
+
+/* Bits in setup register */
+#define S_SW_RESET	0x80
+#define S_GCR_WRITE	0x40
+#define S_IBM_DRIVE	0x20
+#define S_TEST_MODE	0x10
+#define S_FCLK_DIV2	0x08
+#define S_GCR		0x04
+#define S_COPY_PROT	0x02
+#define S_INV_WDATA	0x01
+
+/* Select values for swim3_action */
+#define SEEK_POSITIVE	0
+#define SEEK_NEGATIVE	4
+#define STEP		1
+#define MOTOR_ON	2
+#define MOTOR_OFF	6
+#define INDEX		3
+#define EJECT		7
+#define SETMFM		9
+#define SETGCR		13
+
+/* Select values for swim3_select and swim3_readbit */
+#define STEP_DIR	0
+#define STEPPING	1
+#define MOTOR_ON	2
+#define RELAX		3	/* also eject in progress */
+#define READ_DATA_0	4
+#define TWOMEG_DRIVE	5
+#define SINGLE_SIDED	6	/* drive or diskette is 4MB type? */
+#define DRIVE_PRESENT	7
+#define DISK_IN		8
+#define WRITE_PROT	9
+#define TRACK_ZERO	10
+#define TACHO		11
+#define READ_DATA_1	12
+#define MFM_MODE	13
+#define SEEK_COMPLETE	14
+#define ONEMEG_MEDIA	15
+
+/* Definitions of values used in writing and formatting */
+#define DATA_ESCAPE	0x99
+#define GCR_SYNC_EXC	0x3f
+#define GCR_SYNC_CONV	0x80
+#define GCR_FIRST_MARK	0xd5
+#define GCR_SECOND_MARK	0xaa
+#define GCR_ADDR_MARK	"\xd5\xaa\x00"
+#define GCR_DATA_MARK	"\xd5\xaa\x0b"
+#define GCR_SLIP_BYTE	"\x27\xaa"
+#define GCR_SELF_SYNC	"\x3f\xbf\x1e\x34\x3c\x3f"
+
+#define DATA_99		"\x99\x99"
+#define MFM_ADDR_MARK	"\x99\xa1\x99\xa1\x99\xa1\x99\xfe"
+#define MFM_INDEX_MARK	"\x99\xc2\x99\xc2\x99\xc2\x99\xfc"
+#define MFM_GAP_LEN	12
+
+struct floppy_state {
+	enum swim_state	state;
+	spinlock_t lock;
+	struct swim3 __iomem *swim3;	/* hardware registers */
+	struct dbdma_regs __iomem *dma;	/* DMA controller registers */
+	int	swim3_intr;	/* interrupt number for SWIM3 */
+	int	dma_intr;	/* interrupt number for DMA channel */
+	int	cur_cyl;	/* cylinder head is on, or -1 */
+	int	cur_sector;	/* last sector we saw go past */
+	int	req_cyl;	/* the cylinder for the current r/w request */
+	int	head;		/* head number ditto */
+	int	req_sector;	/* sector number ditto */
+	int	scount;		/* # sectors we're transferring at present */
+	int	retries;
+	int	settle_time;
+	int	secpercyl;	/* disk geometry information */
+	int	secpertrack;
+	int	total_secs;
+	int	write_prot;	/* 1 if write-protected, 0 if not, -1 dunno */
+	struct dbdma_cmd *dma_cmd;
+	int	ref_count;
+	int	expect_cyl;
+	struct timer_list timeout;
+	int	timeout_pending;
+	int	ejected;
+	wait_queue_head_t wait;
+	int	wanted;
+	struct macio_dev *mdev;
+	char	dbdma_cmd_space[5 * sizeof(struct dbdma_cmd)];
+};
+
+static struct floppy_state floppy_states[MAX_FLOPPIES];
+static int floppy_count = 0;
+static DEFINE_SPINLOCK(swim3_lock);
+
+static unsigned short write_preamble[] = {
+	0x4e4e, 0x4e4e, 0x4e4e, 0x4e4e, 0x4e4e,	/* gap field */
+	0, 0, 0, 0, 0, 0,			/* sync field */
+	0x99a1, 0x99a1, 0x99a1, 0x99fb,		/* data address mark */
+	0x990f					/* no escape for 512 bytes */
+};
+
+static unsigned short write_postamble[] = {
+	0x9904,					/* insert CRC */
+	0x4e4e, 0x4e4e,
+	0x9908,					/* stop writing */
+	0, 0, 0, 0, 0, 0
+};
+
+static void swim3_select(struct floppy_state *fs, int sel);
+static void swim3_action(struct floppy_state *fs, int action);
+static int swim3_readbit(struct floppy_state *fs, int bit);
+static void do_fd_request(struct request_queue * q);
+static void start_request(struct floppy_state *fs);
+static void set_timeout(struct floppy_state *fs, int nticks,
+			void (*proc)(unsigned long));
+static void scan_track(struct floppy_state *fs);
+static void seek_track(struct floppy_state *fs, int n);
+static void init_dma(struct dbdma_cmd *cp, int cmd, void *buf, int count);
+static void setup_transfer(struct floppy_state *fs);
+static void act(struct floppy_state *fs);
+static void scan_timeout(unsigned long data);
+static void seek_timeout(unsigned long data);
+static void settle_timeout(unsigned long data);
+static void xfer_timeout(unsigned long data);
+static irqreturn_t swim3_interrupt(int irq, void *dev_id);
+/*static void fd_dma_interrupt(int irq, void *dev_id);*/
+static int grab_drive(struct floppy_state *fs, enum swim_state state,
+		      int interruptible);
+static void release_drive(struct floppy_state *fs);
+static int fd_eject(struct floppy_state *fs);
+static int floppy_ioctl(struct block_device *bdev, fmode_t mode,
+			unsigned int cmd, unsigned long param);
+static int floppy_open(struct block_device *bdev, fmode_t mode);
+static int floppy_release(struct gendisk *disk, fmode_t mode);
+static unsigned int floppy_check_events(struct gendisk *disk,
+					unsigned int clearing);
+static int floppy_revalidate(struct gendisk *disk);
+
+static bool swim3_end_request(int err, unsigned int nr_bytes)
+{
+	if (__blk_end_request(fd_req, err, nr_bytes))
+		return true;
+
+	fd_req = NULL;
+	return false;
+}
+
+static bool swim3_end_request_cur(int err)
+{
+	return swim3_end_request(err, blk_rq_cur_bytes(fd_req));
+}
+
+static void swim3_select(struct floppy_state *fs, int sel)
+{
+	struct swim3 __iomem *sw = fs->swim3;
+
+	out_8(&sw->select, RELAX);
+	if (sel & 8)
+		out_8(&sw->control_bis, SELECT);
+	else
+		out_8(&sw->control_bic, SELECT);
+	out_8(&sw->select, sel & CA_MASK);
+}
+
+static void swim3_action(struct floppy_state *fs, int action)
+{
+	struct swim3 __iomem *sw = fs->swim3;
+
+	swim3_select(fs, action);
+	udelay(1);
+	out_8(&sw->select, sw->select | LSTRB);
+	udelay(2);
+	out_8(&sw->select, sw->select & ~LSTRB);
+	udelay(1);
+}
+
+static int swim3_readbit(struct floppy_state *fs, int bit)
+{
+	struct swim3 __iomem *sw = fs->swim3;
+	int stat;
+
+	swim3_select(fs, bit);
+	udelay(1);
+	stat = in_8(&sw->status);
+	return (stat & DATA) == 0;
+}
+
+static void do_fd_request(struct request_queue * q)
+{
+	int i;
+
+	for(i=0; i<floppy_count; i++) {
+		struct floppy_state *fs = &floppy_states[i];
+		if (fs->mdev->media_bay &&
+		    check_media_bay(fs->mdev->media_bay) != MB_FD)
+			continue;
+		start_request(fs);
+	}
+}
+
+static void start_request(struct floppy_state *fs)
+{
+	struct request *req;
+	unsigned long x;
+
+	if (fs->state == idle && fs->wanted) {
+		fs->state = available;
+		wake_up(&fs->wait);
+		return;
+	}
+	while (fs->state == idle) {
+		if (!fd_req) {
+			fd_req = blk_fetch_request(swim3_queue);
+			if (!fd_req)
+				break;
+		}
+		req = fd_req;
+#if 0
+		printk("do_fd_req: dev=%s cmd=%d sec=%ld nr_sec=%u buf=%p\n",
+		       req->rq_disk->disk_name, req->cmd,
+		       (long)blk_rq_pos(req), blk_rq_sectors(req), req->buffer);
+		printk("           errors=%d current_nr_sectors=%u\n",
+		       req->errors, blk_rq_cur_sectors(req));
+#endif
+
+		if (blk_rq_pos(req) >= fs->total_secs) {
+			swim3_end_request_cur(-EIO);
+			continue;
+		}
+		if (fs->ejected) {
+			swim3_end_request_cur(-EIO);
+			continue;
+		}
+
+		if (rq_data_dir(req) == WRITE) {
+			if (fs->write_prot < 0)
+				fs->write_prot = swim3_readbit(fs, WRITE_PROT);
+			if (fs->write_prot) {
+				swim3_end_request_cur(-EIO);
+				continue;
+			}
+		}
+
+		/* Do not remove the cast. blk_rq_pos(req) is now a
+		 * sector_t and can be 64 bits, but it will never go
+		 * past 32 bits for this driver anyway, so we can
+		 * safely cast it down and not have to do a 64/32
+		 * division
+		 */
+		fs->req_cyl = ((long)blk_rq_pos(req)) / fs->secpercyl;
+		x = ((long)blk_rq_pos(req)) % fs->secpercyl;
+		fs->head = x / fs->secpertrack;
+		fs->req_sector = x % fs->secpertrack + 1;
+		fd_req = req;
+		fs->state = do_transfer;
+		fs->retries = 0;
+
+		act(fs);
+	}
+}
+
+static void set_timeout(struct floppy_state *fs, int nticks,
+			void (*proc)(unsigned long))
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&fs->lock, flags);
+	if (fs->timeout_pending)
+		del_timer(&fs->timeout);
+	fs->timeout.expires = jiffies + nticks;
+	fs->timeout.function = proc;
+	fs->timeout.data = (unsigned long) fs;
+	add_timer(&fs->timeout);
+	fs->timeout_pending = 1;
+	spin_unlock_irqrestore(&fs->lock, flags);
+}
+
+static inline void scan_track(struct floppy_state *fs)
+{
+	struct swim3 __iomem *sw = fs->swim3;
+
+	swim3_select(fs, READ_DATA_0);
+	in_8(&sw->intr);		/* clear SEEN_SECTOR bit */
+	in_8(&sw->error);
+	out_8(&sw->intr_enable, SEEN_SECTOR);
+	out_8(&sw->control_bis, DO_ACTION);
+	/* enable intr when track found */
+	set_timeout(fs, HZ, scan_timeout);	/* enable timeout */
+}
+
+static inline void seek_track(struct floppy_state *fs, int n)
+{
+	struct swim3 __iomem *sw = fs->swim3;
+
+	if (n >= 0) {
+		swim3_action(fs, SEEK_POSITIVE);
+		sw->nseek = n;
+	} else {
+		swim3_action(fs, SEEK_NEGATIVE);
+		sw->nseek = -n;
+	}
+	fs->expect_cyl = (fs->cur_cyl >= 0)? fs->cur_cyl + n: -1;
+	swim3_select(fs, STEP);
+	in_8(&sw->error);
+	/* enable intr when seek finished */
+	out_8(&sw->intr_enable, SEEK_DONE);
+	out_8(&sw->control_bis, DO_SEEK);
+	set_timeout(fs, 3*HZ, seek_timeout);	/* enable timeout */
+	fs->settle_time = 0;
+}
+
+static inline void init_dma(struct dbdma_cmd *cp, int cmd,
+			    void *buf, int count)
+{
+	st_le16(&cp->req_count, count);
+	st_le16(&cp->command, cmd);
+	st_le32(&cp->phy_addr, virt_to_bus(buf));
+	cp->xfer_status = 0;
+}
+
+static inline void setup_transfer(struct floppy_state *fs)
+{
+	int n;
+	struct swim3 __iomem *sw = fs->swim3;
+	struct dbdma_cmd *cp = fs->dma_cmd;
+	struct dbdma_regs __iomem *dr = fs->dma;
+
+	if (blk_rq_cur_sectors(fd_req) <= 0) {
+		printk(KERN_ERR "swim3: transfer 0 sectors?\n");
+		return;
+	}
+	if (rq_data_dir(fd_req) == WRITE)
+		n = 1;
+	else {
+		n = fs->secpertrack - fs->req_sector + 1;
+		if (n > blk_rq_cur_sectors(fd_req))
+			n = blk_rq_cur_sectors(fd_req);
+	}
+	fs->scount = n;
+	swim3_select(fs, fs->head? READ_DATA_1: READ_DATA_0);
+	out_8(&sw->sector, fs->req_sector);
+	out_8(&sw->nsect, n);
+	out_8(&sw->gap3, 0);
+	out_le32(&dr->cmdptr, virt_to_bus(cp));
+	if (rq_data_dir(fd_req) == WRITE) {
+		/* Set up 3 dma commands: write preamble, data, postamble */
+		init_dma(cp, OUTPUT_MORE, write_preamble, sizeof(write_preamble));
+		++cp;
+		init_dma(cp, OUTPUT_MORE, fd_req->buffer, 512);
+		++cp;
+		init_dma(cp, OUTPUT_LAST, write_postamble, sizeof(write_postamble));
+	} else {
+		init_dma(cp, INPUT_LAST, fd_req->buffer, n * 512);
+	}
+	++cp;
+	out_le16(&cp->command, DBDMA_STOP);
+	out_8(&sw->control_bic, DO_ACTION | WRITE_SECTORS);
+	in_8(&sw->error);
+	out_8(&sw->control_bic, DO_ACTION | WRITE_SECTORS);
+	if (rq_data_dir(fd_req) == WRITE)
+		out_8(&sw->control_bis, WRITE_SECTORS);
+	in_8(&sw->intr);
+	out_le32(&dr->control, (RUN << 16) | RUN);
+	/* enable intr when transfer complete */
+	out_8(&sw->intr_enable, TRANSFER_DONE);
+	out_8(&sw->control_bis, DO_ACTION);
+	set_timeout(fs, 2*HZ, xfer_timeout);	/* enable timeout */
+}
+
+static void act(struct floppy_state *fs)
+{
+	for (;;) {
+		switch (fs->state) {
+		case idle:
+			return;		/* XXX shouldn't get here */
+
+		case locating:
+			if (swim3_readbit(fs, TRACK_ZERO)) {
+				fs->cur_cyl = 0;
+				if (fs->req_cyl == 0)
+					fs->state = do_transfer;
+				else
+					fs->state = seeking;
+				break;
+			}
+			scan_track(fs);
+			return;
+
+		case seeking:
+			if (fs->cur_cyl < 0) {
+				fs->expect_cyl = -1;
+				fs->state = locating;
+				break;
+			}
+			if (fs->req_cyl == fs->cur_cyl) {
+				printk("whoops, seeking 0\n");
+				fs->state = do_transfer;
+				break;
+			}
+			seek_track(fs, fs->req_cyl - fs->cur_cyl);
+			return;
+
+		case settling:
+			/* check for SEEK_COMPLETE after 30ms */
+			fs->settle_time = (HZ + 32) / 33;
+			set_timeout(fs, fs->settle_time, settle_timeout);
+			return;
+
+		case do_transfer:
+			if (fs->cur_cyl != fs->req_cyl) {
+				if (fs->retries > 5) {
+					swim3_end_request_cur(-EIO);
+					fs->state = idle;
+					return;
+				}
+				fs->state = seeking;
+				break;
+			}
+			setup_transfer(fs);
+			return;
+
+		case jogging:
+			seek_track(fs, -5);
+			return;
+
+		default:
+			printk(KERN_ERR"swim3: unknown state %d\n", fs->state);
+			return;
+		}
+	}
+}
+
+static void scan_timeout(unsigned long data)
+{
+	struct floppy_state *fs = (struct floppy_state *) data;
+	struct swim3 __iomem *sw = fs->swim3;
+
+	fs->timeout_pending = 0;
+	out_8(&sw->control_bic, DO_ACTION | WRITE_SECTORS);
+	out_8(&sw->select, RELAX);
+	out_8(&sw->intr_enable, 0);
+	fs->cur_cyl = -1;
+	if (fs->retries > 5) {
+		swim3_end_request_cur(-EIO);
+		fs->state = idle;
+		start_request(fs);
+	} else {
+		fs->state = jogging;
+		act(fs);
+	}
+}
+
+static void seek_timeout(unsigned long data)
+{
+	struct floppy_state *fs = (struct floppy_state *) data;
+	struct swim3 __iomem *sw = fs->swim3;
+
+	fs->timeout_pending = 0;
+	out_8(&sw->control_bic, DO_SEEK);
+	out_8(&sw->select, RELAX);
+	out_8(&sw->intr_enable, 0);
+	printk(KERN_ERR "swim3: seek timeout\n");
+	swim3_end_request_cur(-EIO);
+	fs->state = idle;
+	start_request(fs);
+}
+
+static void settle_timeout(unsigned long data)
+{
+	struct floppy_state *fs = (struct floppy_state *) data;
+	struct swim3 __iomem *sw = fs->swim3;
+
+	fs->timeout_pending = 0;
+	if (swim3_readbit(fs, SEEK_COMPLETE)) {
+		out_8(&sw->select, RELAX);
+		fs->state = locating;
+		act(fs);
+		return;
+	}
+	out_8(&sw->select, RELAX);
+	if (fs->settle_time < 2*HZ) {
+		++fs->settle_time;
+		set_timeout(fs, 1, settle_timeout);
+		return;
+	}
+	printk(KERN_ERR "swim3: seek settle timeout\n");
+	swim3_end_request_cur(-EIO);
+	fs->state = idle;
+	start_request(fs);
+}
+
+static void xfer_timeout(unsigned long data)
+{
+	struct floppy_state *fs = (struct floppy_state *) data;
+	struct swim3 __iomem *sw = fs->swim3;
+	struct dbdma_regs __iomem *dr = fs->dma;
+	int n;
+
+	fs->timeout_pending = 0;
+	out_le32(&dr->control, RUN << 16);
+	/* We must wait a bit for dbdma to stop */
+	for (n = 0; (in_le32(&dr->status) & ACTIVE) && n < 1000; n++)
+		udelay(1);
+	out_8(&sw->intr_enable, 0);
+	out_8(&sw->control_bic, WRITE_SECTORS | DO_ACTION);
+	out_8(&sw->select, RELAX);
+	printk(KERN_ERR "swim3: timeout %sing sector %ld\n",
+	       (rq_data_dir(fd_req)==WRITE? "writ": "read"),
+	       (long)blk_rq_pos(fd_req));
+	swim3_end_request_cur(-EIO);
+	fs->state = idle;
+	start_request(fs);
+}
+
+static irqreturn_t swim3_interrupt(int irq, void *dev_id)
+{
+	struct floppy_state *fs = (struct floppy_state *) dev_id;
+	struct swim3 __iomem *sw = fs->swim3;
+	int intr, err, n;
+	int stat, resid;
+	struct dbdma_regs __iomem *dr;
+	struct dbdma_cmd *cp;
+
+	intr = in_8(&sw->intr);
+	err = (intr & ERROR_INTR)? in_8(&sw->error): 0;
+	if ((intr & ERROR_INTR) && fs->state != do_transfer)
+		printk(KERN_ERR "swim3_interrupt, state=%d, dir=%x, intr=%x, err=%x\n",
+		       fs->state, rq_data_dir(fd_req), intr, err);
+	switch (fs->state) {
+	case locating:
+		if (intr & SEEN_SECTOR) {
+			out_8(&sw->control_bic, DO_ACTION | WRITE_SECTORS);
+			out_8(&sw->select, RELAX);
+			out_8(&sw->intr_enable, 0);
+			del_timer(&fs->timeout);
+			fs->timeout_pending = 0;
+			if (sw->ctrack == 0xff) {
+				printk(KERN_ERR "swim3: seen sector but cyl=ff?\n");
+				fs->cur_cyl = -1;
+				if (fs->retries > 5) {
+					swim3_end_request_cur(-EIO);
+					fs->state = idle;
+					start_request(fs);
+				} else {
+					fs->state = jogging;
+					act(fs);
+				}
+				break;
+			}
+			fs->cur_cyl = sw->ctrack;
+			fs->cur_sector = sw->csect;
+			if (fs->expect_cyl != -1 && fs->expect_cyl != fs->cur_cyl)
+				printk(KERN_ERR "swim3: expected cyl %d, got %d\n",
+				       fs->expect_cyl, fs->cur_cyl);
+			fs->state = do_transfer;
+			act(fs);
+		}
+		break;
+	case seeking:
+	case jogging:
+		if (sw->nseek == 0) {
+			out_8(&sw->control_bic, DO_SEEK);
+			out_8(&sw->select, RELAX);
+			out_8(&sw->intr_enable, 0);
+			del_timer(&fs->timeout);
+			fs->timeout_pending = 0;
+			if (fs->state == seeking)
+				++fs->retries;
+			fs->state = settling;
+			act(fs);
+		}
+		break;
+	case settling:
+		out_8(&sw->intr_enable, 0);
+		del_timer(&fs->timeout);
+		fs->timeout_pending = 0;
+		act(fs);
+		break;
+	case do_transfer:
+		if ((intr & (ERROR_INTR | TRANSFER_DONE)) == 0)
+			break;
+		out_8(&sw->intr_enable, 0);
+		out_8(&sw->control_bic, WRITE_SECTORS | DO_ACTION);
+		out_8(&sw->select, RELAX);
+		del_timer(&fs->timeout);
+		fs->timeout_pending = 0;
+		dr = fs->dma;
+		cp = fs->dma_cmd;
+		if (rq_data_dir(fd_req) == WRITE)
+			++cp;
+		/*
+		 * Check that the main data transfer has finished.
+		 * On writing, the swim3 sometimes doesn't use
+		 * up all the bytes of the postamble, so we can still
+		 * see DMA active here.  That doesn't matter as long
+		 * as all the sector data has been transferred.
+		 */
+		if ((intr & ERROR_INTR) == 0 && cp->xfer_status == 0) {
+			/* wait a little while for DMA to complete */
+			for (n = 0; n < 100; ++n) {
+				if (cp->xfer_status != 0)
+					break;
+				udelay(1);
+				barrier();
+			}
+		}
+		/* turn off DMA */
+		out_le32(&dr->control, (RUN | PAUSE) << 16);
+		stat = ld_le16(&cp->xfer_status);
+		resid = ld_le16(&cp->res_count);
+		if (intr & ERROR_INTR) {
+			n = fs->scount - 1 - resid / 512;
+			if (n > 0) {
+				blk_update_request(fd_req, 0, n << 9);
+				fs->req_sector += n;
+			}
+			if (fs->retries < 5) {
+				++fs->retries;
+				act(fs);
+			} else {
+				printk("swim3: error %sing block %ld (err=%x)\n",
+				       rq_data_dir(fd_req) == WRITE? "writ": "read",
+				       (long)blk_rq_pos(fd_req), err);
+				swim3_end_request_cur(-EIO);
+				fs->state = idle;
+			}
+		} else {
+			if ((stat & ACTIVE) == 0 || resid != 0) {
+				/* musta been an error */
+				printk(KERN_ERR "swim3: fd dma: stat=%x resid=%d\n", stat, resid);
+				printk(KERN_ERR "  state=%d, dir=%x, intr=%x, err=%x\n",
+				       fs->state, rq_data_dir(fd_req), intr, err);
+				swim3_end_request_cur(-EIO);
+				fs->state = idle;
+				start_request(fs);
+				break;
+			}
+			if (swim3_end_request(0, fs->scount << 9)) {
+				fs->req_sector += fs->scount;
+				if (fs->req_sector > fs->secpertrack) {
+					fs->req_sector -= fs->secpertrack;
+					if (++fs->head > 1) {
+						fs->head = 0;
+						++fs->req_cyl;
+					}
+				}
+				act(fs);
+			} else
+				fs->state = idle;
+		}
+		if (fs->state == idle)
+			start_request(fs);
+		break;
+	default:
+		printk(KERN_ERR "swim3: don't know what to do in state %d\n", fs->state);
+	}
+	return IRQ_HANDLED;
+}
+
+/*
+static void fd_dma_interrupt(int irq, void *dev_id)
+{
+}
+*/
+
+static int grab_drive(struct floppy_state *fs, enum swim_state state,
+		      int interruptible)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&fs->lock, flags);
+	if (fs->state != idle) {
+		++fs->wanted;
+		while (fs->state != available) {
+			if (interruptible && signal_pending(current)) {
+				--fs->wanted;
+				spin_unlock_irqrestore(&fs->lock, flags);
+				return -EINTR;
+			}
+			interruptible_sleep_on(&fs->wait);
+		}
+		--fs->wanted;
+	}
+	fs->state = state;
+	spin_unlock_irqrestore(&fs->lock, flags);
+	return 0;
+}
+
+static void release_drive(struct floppy_state *fs)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&fs->lock, flags);
+	fs->state = idle;
+	start_request(fs);
+	spin_unlock_irqrestore(&fs->lock, flags);
+}
+
+static int fd_eject(struct floppy_state *fs)
+{
+	int err, n;
+
+	err = grab_drive(fs, ejecting, 1);
+	if (err)
+		return err;
+	swim3_action(fs, EJECT);
+	for (n = 20; n > 0; --n) {
+		if (signal_pending(current)) {
+			err = -EINTR;
+			break;
+		}
+		swim3_select(fs, RELAX);
+		schedule_timeout_interruptible(1);
+		if (swim3_readbit(fs, DISK_IN) == 0)
+			break;
+	}
+	swim3_select(fs, RELAX);
+	udelay(150);
+	fs->ejected = 1;
+	release_drive(fs);
+	return err;
+}
+
+static struct floppy_struct floppy_type =
+	{ 2880,18,2,80,0,0x1B,0x00,0xCF,0x6C,NULL };	/*  7 1.44MB 3.5"   */
+
+static int floppy_locked_ioctl(struct block_device *bdev, fmode_t mode,
+			unsigned int cmd, unsigned long param)
+{
+	struct floppy_state *fs = bdev->bd_disk->private_data;
+	int err;
+		
+	if ((cmd & 0x80) && !capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	if (fs->mdev->media_bay &&
+	    check_media_bay(fs->mdev->media_bay) != MB_FD)
+		return -ENXIO;
+
+	switch (cmd) {
+	case FDEJECT:
+		if (fs->ref_count != 1)
+			return -EBUSY;
+		err = fd_eject(fs);
+		return err;
+	case FDGETPRM:
+	        if (copy_to_user((void __user *) param, &floppy_type,
+				 sizeof(struct floppy_struct)))
+			return -EFAULT;
+		return 0;
+	}
+	return -ENOTTY;
+}
+
+static int floppy_ioctl(struct block_device *bdev, fmode_t mode,
+				 unsigned int cmd, unsigned long param)
+{
+	int ret;
+
+	mutex_lock(&swim3_mutex);
+	ret = floppy_locked_ioctl(bdev, mode, cmd, param);
+	mutex_unlock(&swim3_mutex);
+
+	return ret;
+}
+
+static int floppy_open(struct block_device *bdev, fmode_t mode)
+{
+	struct floppy_state *fs = bdev->bd_disk->private_data;
+	struct swim3 __iomem *sw = fs->swim3;
+	int n, err = 0;
+
+	if (fs->ref_count == 0) {
+		if (fs->mdev->media_bay &&
+		    check_media_bay(fs->mdev->media_bay) != MB_FD)
+			return -ENXIO;
+		out_8(&sw->setup, S_IBM_DRIVE | S_FCLK_DIV2);
+		out_8(&sw->control_bic, 0xff);
+		out_8(&sw->mode, 0x95);
+		udelay(10);
+		out_8(&sw->intr_enable, 0);
+		out_8(&sw->control_bis, DRIVE_ENABLE | INTR_ENABLE);
+		swim3_action(fs, MOTOR_ON);
+		fs->write_prot = -1;
+		fs->cur_cyl = -1;
+		for (n = 0; n < 2 * HZ; ++n) {
+			if (n >= HZ/30 && swim3_readbit(fs, SEEK_COMPLETE))
+				break;
+			if (signal_pending(current)) {
+				err = -EINTR;
+				break;
+			}
+			swim3_select(fs, RELAX);
+			schedule_timeout_interruptible(1);
+		}
+		if (err == 0 && (swim3_readbit(fs, SEEK_COMPLETE) == 0
+				 || swim3_readbit(fs, DISK_IN) == 0))
+			err = -ENXIO;
+		swim3_action(fs, SETMFM);
+		swim3_select(fs, RELAX);
+
+	} else if (fs->ref_count == -1 || mode & FMODE_EXCL)
+		return -EBUSY;
+
+	if (err == 0 && (mode & FMODE_NDELAY) == 0
+	    && (mode & (FMODE_READ|FMODE_WRITE))) {
+		check_disk_change(bdev);
+		if (fs->ejected)
+			err = -ENXIO;
+	}
+
+	if (err == 0 && (mode & FMODE_WRITE)) {
+		if (fs->write_prot < 0)
+			fs->write_prot = swim3_readbit(fs, WRITE_PROT);
+		if (fs->write_prot)
+			err = -EROFS;
+	}
+
+	if (err) {
+		if (fs->ref_count == 0) {
+			swim3_action(fs, MOTOR_OFF);
+			out_8(&sw->control_bic, DRIVE_ENABLE | INTR_ENABLE);
+			swim3_select(fs, RELAX);
+		}
+		return err;
+	}
+
+	if (mode & FMODE_EXCL)
+		fs->ref_count = -1;
+	else
+		++fs->ref_count;
+
+	return 0;
+}
+
+static int floppy_unlocked_open(struct block_device *bdev, fmode_t mode)
+{
+	int ret;
+
+	mutex_lock(&swim3_mutex);
+	ret = floppy_open(bdev, mode);
+	mutex_unlock(&swim3_mutex);
+
+	return ret;
+}
+
+static int floppy_release(struct gendisk *disk, fmode_t mode)
+{
+	struct floppy_state *fs = disk->private_data;
+	struct swim3 __iomem *sw = fs->swim3;
+	mutex_lock(&swim3_mutex);
+	if (fs->ref_count > 0 && --fs->ref_count == 0) {
+		swim3_action(fs, MOTOR_OFF);
+		out_8(&sw->control_bic, 0xff);
+		swim3_select(fs, RELAX);
+	}
+	mutex_unlock(&swim3_mutex);
+	return 0;
+}
+
+static unsigned int floppy_check_events(struct gendisk *disk,
+					unsigned int clearing)
+{
+	struct floppy_state *fs = disk->private_data;
+	return fs->ejected ? DISK_EVENT_MEDIA_CHANGE : 0;
+}
+
+static int floppy_revalidate(struct gendisk *disk)
+{
+	struct floppy_state *fs = disk->private_data;
+	struct swim3 __iomem *sw;
+	int ret, n;
+
+	if (fs->mdev->media_bay &&
+	    check_media_bay(fs->mdev->media_bay) != MB_FD)
+		return -ENXIO;
+
+	sw = fs->swim3;
+	grab_drive(fs, revalidating, 0);
+	out_8(&sw->intr_enable, 0);
+	out_8(&sw->control_bis, DRIVE_ENABLE);
+	swim3_action(fs, MOTOR_ON);	/* necessary? */
+	fs->write_prot = -1;
+	fs->cur_cyl = -1;
+	mdelay(1);
+	for (n = HZ; n > 0; --n) {
+		if (swim3_readbit(fs, SEEK_COMPLETE))
+			break;
+		if (signal_pending(current))
+			break;
+		swim3_select(fs, RELAX);
+		schedule_timeout_interruptible(1);
+	}
+	ret = swim3_readbit(fs, SEEK_COMPLETE) == 0
+		|| swim3_readbit(fs, DISK_IN) == 0;
+	if (ret)
+		swim3_action(fs, MOTOR_OFF);
+	else {
+		fs->ejected = 0;
+		swim3_action(fs, SETMFM);
+	}
+	swim3_select(fs, RELAX);
+
+	release_drive(fs);
+	return ret;
+}
+
+static const struct block_device_operations floppy_fops = {
+	.open		= floppy_unlocked_open,
+	.release	= floppy_release,
+	.ioctl		= floppy_ioctl,
+	.check_events	= floppy_check_events,
+	.revalidate_disk= floppy_revalidate,
+};
+
+static int swim3_add_device(struct macio_dev *mdev, int index)
+{
+	struct device_node *swim = mdev->ofdev.dev.of_node;
+	struct floppy_state *fs = &floppy_states[index];
+	int rc = -EBUSY;
+
+	/* Check & Request resources */
+	if (macio_resource_count(mdev) < 2) {
+		printk(KERN_WARNING "ifd%d: no address for %s\n",
+		       index, swim->full_name);
+		return -ENXIO;
+	}
+	if (macio_irq_count(mdev) < 2) {
+		printk(KERN_WARNING "fd%d: no intrs for device %s\n",
+			index, swim->full_name);
+	}
+	if (macio_request_resource(mdev, 0, "swim3 (mmio)")) {
+		printk(KERN_ERR "fd%d: can't request mmio resource for %s\n",
+		       index, swim->full_name);
+		return -EBUSY;
+	}
+	if (macio_request_resource(mdev, 1, "swim3 (dma)")) {
+		printk(KERN_ERR "fd%d: can't request dma resource for %s\n",
+		       index, swim->full_name);
+		macio_release_resource(mdev, 0);
+		return -EBUSY;
+	}
+	dev_set_drvdata(&mdev->ofdev.dev, fs);
+
+	if (mdev->media_bay == NULL)
+		pmac_call_feature(PMAC_FTR_SWIM3_ENABLE, swim, 0, 1);
+	
+	memset(fs, 0, sizeof(*fs));
+	spin_lock_init(&fs->lock);
+	fs->state = idle;
+	fs->swim3 = (struct swim3 __iomem *)
+		ioremap(macio_resource_start(mdev, 0), 0x200);
+	if (fs->swim3 == NULL) {
+		printk("fd%d: couldn't map registers for %s\n",
+		       index, swim->full_name);
+		rc = -ENOMEM;
+		goto out_release;
+	}
+	fs->dma = (struct dbdma_regs __iomem *)
+		ioremap(macio_resource_start(mdev, 1), 0x200);
+	if (fs->dma == NULL) {
+		printk("fd%d: couldn't map DMA for %s\n",
+		       index, swim->full_name);
+		iounmap(fs->swim3);
+		rc = -ENOMEM;
+		goto out_release;
+	}
+	fs->swim3_intr = macio_irq(mdev, 0);
+	fs->dma_intr = macio_irq(mdev, 1);
+	fs->cur_cyl = -1;
+	fs->cur_sector = -1;
+	fs->secpercyl = 36;
+	fs->secpertrack = 18;
+	fs->total_secs = 2880;
+	fs->mdev = mdev;
+	init_waitqueue_head(&fs->wait);
+
+	fs->dma_cmd = (struct dbdma_cmd *) DBDMA_ALIGN(fs->dbdma_cmd_space);
+	memset(fs->dma_cmd, 0, 2 * sizeof(struct dbdma_cmd));
+	st_le16(&fs->dma_cmd[1].command, DBDMA_STOP);
+
+	if (request_irq(fs->swim3_intr, swim3_interrupt, 0, "SWIM3", fs)) {
+		printk(KERN_ERR "fd%d: couldn't request irq %d for %s\n",
+		       index, fs->swim3_intr, swim->full_name);
+		pmac_call_feature(PMAC_FTR_SWIM3_ENABLE, swim, 0, 0);
+		goto out_unmap;
+		return -EBUSY;
+	}
+/*
+	if (request_irq(fs->dma_intr, fd_dma_interrupt, 0, "SWIM3-dma", fs)) {
+		printk(KERN_ERR "Couldn't get irq %d for SWIM3 DMA",
+		       fs->dma_intr);
+		return -EBUSY;
+	}
+*/
+
+	init_timer(&fs->timeout);
+
+	printk(KERN_INFO "fd%d: SWIM3 floppy controller %s\n", floppy_count,
+		mdev->media_bay ? "in media bay" : "");
+
+	return 0;
+
+ out_unmap:
+	iounmap(fs->dma);
+	iounmap(fs->swim3);
+
+ out_release:
+	macio_release_resource(mdev, 0);
+	macio_release_resource(mdev, 1);
+
+	return rc;
+}
+
+static int __devinit swim3_attach(struct macio_dev *mdev, const struct of_device_id *match)
+{
+	int i, rc;
+	struct gendisk *disk;
+
+	/* Add the drive */
+	rc = swim3_add_device(mdev, floppy_count);
+	if (rc)
+		return rc;
+
+	/* Now create the queue if not there yet */
+	if (swim3_queue == NULL) {
+		/* If we failed, there isn't much we can do as the driver is still
+		 * too dumb to remove the device, just bail out
+		 */
+		if (register_blkdev(FLOPPY_MAJOR, "fd"))
+			return 0;
+		swim3_queue = blk_init_queue(do_fd_request, &swim3_lock);
+		if (swim3_queue == NULL) {
+			unregister_blkdev(FLOPPY_MAJOR, "fd");
+			return 0;
+		}
+	}
+
+	/* Now register that disk. Same comment about failure handling */
+	i = floppy_count++;
+	disk = disks[i] = alloc_disk(1);
+	if (disk == NULL)
+		return 0;
+
+	disk->major = FLOPPY_MAJOR;
+	disk->first_minor = i;
+	disk->fops = &floppy_fops;
+	disk->private_data = &floppy_states[i];
+	disk->queue = swim3_queue;
+	disk->flags |= GENHD_FL_REMOVABLE;
+	sprintf(disk->disk_name, "fd%d", i);
+	set_capacity(disk, 2880);
+	add_disk(disk);
+
+	return 0;
+}
+
+static struct of_device_id swim3_match[] =
+{
+	{
+	.name		= "swim3",
+	},
+	{
+	.compatible	= "ohare-swim3"
+	},
+	{
+	.compatible	= "swim3"
+	},
+};
+
+static struct macio_driver swim3_driver =
+{
+	.driver = {
+		.name 		= "swim3",
+		.of_match_table	= swim3_match,
+	},
+	.probe		= swim3_attach,
+#if 0
+	.suspend	= swim3_suspend,
+	.resume		= swim3_resume,
+#endif
+};
+
+
+int swim3_init(void)
+{
+	macio_register_driver(&swim3_driver);
+	return 0;
+}
+
+module_init(swim3_init)
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Paul Mackerras");
+MODULE_ALIAS_BLOCKDEV_MAJOR(FLOPPY_MAJOR);
diff --git a/drivers/block/swim_asm.S b/drivers/block/swim_asm.S
new file mode 100644
index 00000000..c9668206
--- /dev/null
+++ b/drivers/block/swim_asm.S
@@ -0,0 +1,247 @@
+/*
+ * low-level functions for the SWIM floppy controller
+ *
+ * needs assembly language because is very timing dependent
+ * this controller exists only on macintosh 680x0 based
+ *
+ * Copyright (C) 2004,2008 Laurent Vivier <Laurent@lvivier.info>
+ *
+ * based on Alastair Bridgewater SWIM analysis, 2001
+ * based on netBSD IWM driver (c) 1997, 1998 Hauke Fath.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * 2004-08-21 (lv) - Initial implementation
+ * 2008-11-05 (lv) - add get_swim_mode
+ */
+
+	.equ	write_data,	0x0000
+	.equ	write_mark,	0x0200
+	.equ	write_CRC,	0x0400
+	.equ	write_parameter,0x0600
+	.equ	write_phase,	0x0800
+	.equ	write_setup,	0x0a00
+	.equ	write_mode0,	0x0c00
+	.equ	write_mode1,	0x0e00
+	.equ	read_data,	0x1000
+	.equ	read_mark,	0x1200
+	.equ	read_error,	0x1400
+	.equ	read_parameter,	0x1600
+	.equ	read_phase,	0x1800
+	.equ	read_setup,	0x1a00
+	.equ	read_status,	0x1c00
+	.equ	read_handshake,	0x1e00
+
+	.equ	o_side, 0
+	.equ	o_track, 1
+	.equ	o_sector, 2
+	.equ	o_size, 3
+	.equ	o_crc0, 4
+	.equ	o_crc1, 5
+
+	.equ	seek_time, 30000
+	.equ	max_retry, 40
+	.equ	sector_size, 512
+
+	.global swim_read_sector_header
+swim_read_sector_header:
+	link	%a6, #0
+	moveml	%d1-%d5/%a0-%a4,%sp@-
+	movel	%a6@(0x0c), %a4
+	bsr	mfm_read_addrmark
+	moveml	%sp@+, %d1-%d5/%a0-%a4
+	unlk	%a6
+	rts
+
+sector_address_mark:
+	.byte	0xa1, 0xa1, 0xa1, 0xfe
+sector_data_mark:
+	.byte	0xa1, 0xa1, 0xa1, 0xfb
+
+mfm_read_addrmark:
+	movel	%a6@(0x08), %a3
+	lea	%a3@(read_handshake), %a2
+	lea	%a3@(read_mark), %a3
+	moveq	#-1, %d0
+	movew	#seek_time, %d2
+
+wait_header_init:
+	tstb	%a3@(read_error - read_mark)
+	moveb	#0x18, %a3@(write_mode0 - read_mark)
+	moveb	#0x01, %a3@(write_mode1 - read_mark)
+	moveb	#0x01, %a3@(write_mode0 - read_mark)
+	tstb	%a3@(read_error - read_mark)
+	moveb	#0x08, %a3@(write_mode1 - read_mark)
+
+	lea	sector_address_mark, %a0
+	moveq	#3, %d1
+
+wait_addr_mark_byte:
+
+	tstb	%a2@
+	dbmi	%d2, wait_addr_mark_byte
+	bpl	header_exit
+
+	moveb	%a3@, %d3
+	cmpb	%a0@+, %d3
+	dbne	%d1, wait_addr_mark_byte
+	bne	wait_header_init
+
+	moveq	#max_retry, %d2
+
+amark0:	tstb	%a2@
+	dbmi	%d2, amark0
+	bpl	signal_nonyb
+
+	moveb	%a3@, %a4@(o_track)
+
+	moveq	#max_retry, %d2
+
+amark1:	tstb	%a2@
+	dbmi	%d2, amark1
+	bpl	signal_nonyb
+
+	moveb	%a3@, %a4@(o_side)
+
+	moveq	#max_retry, %d2
+
+amark2:	tstb	%a2@
+	dbmi	%d2, amark2
+	bpl	signal_nonyb
+
+	moveb	%a3@, %a4@(o_sector)
+
+	moveq	#max_retry, %d2
+
+amark3:	tstb	%a2@
+	dbmi	%d2, amark3
+	bpl	signal_nonyb
+
+	moveb	%a3@, %a4@(o_size)
+
+	moveq	#max_retry, %d2
+
+crc0:	tstb	%a2@
+	dbmi	%d2, crc0
+	bpl	signal_nonyb
+
+	moveb	%a3@, %a4@(o_crc0)
+
+	moveq	#max_retry, %d2
+
+crc1:	tstb	%a2@
+	dbmi	%d2, crc1
+	bpl	signal_nonyb
+
+	moveb	%a3@, %a4@(o_crc1)
+
+	tstb	%a3@(read_error - read_mark)
+
+header_exit:
+	moveq	#0, %d0
+	moveb	#0x18, %a3@(write_mode0 - read_mark)
+	rts
+signal_nonyb:
+	moveq	#-1, %d0
+	moveb	#0x18, %a3@(write_mode0 - read_mark)
+	rts
+
+	.global swim_read_sector_data
+swim_read_sector_data:
+	link	%a6, #0
+	moveml	%d1-%d5/%a0-%a5,%sp@-
+	movel	%a6@(0x0c), %a4
+	bsr	mfm_read_data
+	moveml	%sp@+, %d1-%d5/%a0-%a5
+	unlk	%a6
+	rts
+
+mfm_read_data:
+	movel	%a6@(0x08), %a3
+	lea	%a3@(read_handshake), %a2
+	lea	%a3@(read_data), %a5
+	lea	%a3@(read_mark), %a3
+	movew	#seek_time, %d2
+
+wait_data_init:
+	tstb	%a3@(read_error - read_mark)
+	moveb	#0x18, %a3@(write_mode0 - read_mark)
+	moveb	#0x01, %a3@(write_mode1 - read_mark)
+	moveb	#0x01, %a3@(write_mode0 - read_mark)
+	tstb	%a3@(read_error - read_mark)
+	moveb	#0x08, %a3@(write_mode1 - read_mark)
+
+	lea	sector_data_mark, %a0
+	moveq	#3, %d1
+
+	/* wait data address mark */
+
+wait_data_mark_byte:
+
+	tstb	%a2@
+	dbmi	%d2, wait_data_mark_byte
+	bpl	data_exit
+
+	moveb	%a3@, %d3
+	cmpb	%a0@+, %d3
+	dbne	%d1, wait_data_mark_byte
+	bne	wait_data_init
+
+	/* read data */
+
+	tstb	%a3@(read_error - read_mark)
+
+	movel	#sector_size-1, %d4		/* sector size */
+read_new_data:
+	movew	#max_retry, %d2
+read_data_loop:
+	moveb	%a2@, %d5
+	andb	#0xc0, %d5
+	dbne	%d2, read_data_loop
+	beq	data_exit
+	moveb	%a5@, %a4@+
+	andb	#0x40, %d5
+	dbne	%d4, read_new_data
+	beq	exit_loop
+	moveb	%a5@, %a4@+
+	dbra	%d4, read_new_data
+exit_loop:
+
+	/* read CRC */
+
+	movew	#max_retry, %d2
+data_crc0:
+
+	tstb	%a2@
+	dbmi	%d2, data_crc0
+	bpl	data_exit
+
+	moveb	%a3@, %d5
+
+	moveq	#max_retry, %d2
+
+data_crc1:
+
+	tstb	%a2@
+	dbmi	%d2, data_crc1
+	bpl	data_exit
+
+	moveb	%a3@, %d5
+
+	tstb	%a3@(read_error - read_mark)
+
+	moveb	#0x18, %a3@(write_mode0 - read_mark)
+
+	/* return number of bytes read */
+
+	movel	#sector_size, %d0
+	addw	#1, %d4
+	subl	%d4, %d0
+	rts
+data_exit:
+	moveb	#0x18, %a3@(write_mode0 - read_mark)
+	moveq	#-1, %d0
+	rts
diff --git a/drivers/block/sx8.c b/drivers/block/sx8.c
new file mode 100644
index 00000000..eec7b7a4
--- /dev/null
+++ b/drivers/block/sx8.c
@@ -0,0 +1,1759 @@
+/*
+ *  sx8.c: Driver for Promise SATA SX8 looks-like-I2O hardware
+ *
+ *  Copyright 2004-2005 Red Hat, Inc.
+ *
+ *  Author/maintainer:  Jeff Garzik <jgarzik@pobox.com>
+ *
+ *  This file is subject to the terms and conditions of the GNU General Public
+ *  License.  See the file "COPYING" in the main directory of this archive
+ *  for more details.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/pci.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/blkdev.h>
+#include <linux/sched.h>
+#include <linux/interrupt.h>
+#include <linux/compiler.h>
+#include <linux/workqueue.h>
+#include <linux/bitops.h>
+#include <linux/delay.h>
+#include <linux/time.h>
+#include <linux/hdreg.h>
+#include <linux/dma-mapping.h>
+#include <linux/completion.h>
+#include <linux/scatterlist.h>
+#include <asm/io.h>
+#include <asm/uaccess.h>
+
+#if 0
+#define CARM_DEBUG
+#define CARM_VERBOSE_DEBUG
+#else
+#undef CARM_DEBUG
+#undef CARM_VERBOSE_DEBUG
+#endif
+#undef CARM_NDEBUG
+
+#define DRV_NAME "sx8"
+#define DRV_VERSION "1.0"
+#define PFX DRV_NAME ": "
+
+MODULE_AUTHOR("Jeff Garzik");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Promise SATA SX8 block driver");
+MODULE_VERSION(DRV_VERSION);
+
+/*
+ * SX8 hardware has a single message queue for all ATA ports.
+ * When this driver was written, the hardware (firmware?) would
+ * corrupt data eventually, if more than one request was outstanding.
+ * As one can imagine, having 8 ports bottlenecking on a single
+ * command hurts performance.
+ *
+ * Based on user reports, later versions of the hardware (firmware?)
+ * seem to be able to survive with more than one command queued.
+ *
+ * Therefore, we default to the safe option -- 1 command -- but
+ * allow the user to increase this.
+ *
+ * SX8 should be able to support up to ~60 queued commands (CARM_MAX_REQ),
+ * but problems seem to occur when you exceed ~30, even on newer hardware.
+ */
+static int max_queue = 1;
+module_param(max_queue, int, 0444);
+MODULE_PARM_DESC(max_queue, "Maximum number of queued commands. (min==1, max==30, safe==1)");
+
+
+#define NEXT_RESP(idx)	((idx + 1) % RMSG_Q_LEN)
+
+/* 0xf is just arbitrary, non-zero noise; this is sorta like poisoning */
+#define TAG_ENCODE(tag)	(((tag) << 16) | 0xf)
+#define TAG_DECODE(tag)	(((tag) >> 16) & 0x1f)
+#define TAG_VALID(tag)	((((tag) & 0xf) == 0xf) && (TAG_DECODE(tag) < 32))
+
+/* note: prints function name for you */
+#ifdef CARM_DEBUG
+#define DPRINTK(fmt, args...) printk(KERN_ERR "%s: " fmt, __func__, ## args)
+#ifdef CARM_VERBOSE_DEBUG
+#define VPRINTK(fmt, args...) printk(KERN_ERR "%s: " fmt, __func__, ## args)
+#else
+#define VPRINTK(fmt, args...)
+#endif	/* CARM_VERBOSE_DEBUG */
+#else
+#define DPRINTK(fmt, args...)
+#define VPRINTK(fmt, args...)
+#endif	/* CARM_DEBUG */
+
+#ifdef CARM_NDEBUG
+#define assert(expr)
+#else
+#define assert(expr) \
+        if(unlikely(!(expr))) {                                   \
+        printk(KERN_ERR "Assertion failed! %s,%s,%s,line=%d\n", \
+	#expr, __FILE__, __func__, __LINE__);          \
+        }
+#endif
+
+/* defines only for the constants which don't work well as enums */
+struct carm_host;
+
+enum {
+	/* adapter-wide limits */
+	CARM_MAX_PORTS		= 8,
+	CARM_SHM_SIZE		= (4096 << 7),
+	CARM_MINORS_PER_MAJOR	= 256 / CARM_MAX_PORTS,
+	CARM_MAX_WAIT_Q		= CARM_MAX_PORTS + 1,
+
+	/* command message queue limits */
+	CARM_MAX_REQ		= 64,	       /* max command msgs per host */
+	CARM_MSG_LOW_WATER	= (CARM_MAX_REQ / 4),	     /* refill mark */
+
+	/* S/G limits, host-wide and per-request */
+	CARM_MAX_REQ_SG		= 32,	     /* max s/g entries per request */
+	CARM_MAX_HOST_SG	= 600,		/* max s/g entries per host */
+	CARM_SG_LOW_WATER	= (CARM_MAX_HOST_SG / 4),   /* re-fill mark */
+
+	/* hardware registers */
+	CARM_IHQP		= 0x1c,
+	CARM_INT_STAT		= 0x10, /* interrupt status */
+	CARM_INT_MASK		= 0x14, /* interrupt mask */
+	CARM_HMUC		= 0x18, /* host message unit control */
+	RBUF_ADDR_LO		= 0x20, /* response msg DMA buf low 32 bits */
+	RBUF_ADDR_HI		= 0x24, /* response msg DMA buf high 32 bits */
+	RBUF_BYTE_SZ		= 0x28,
+	CARM_RESP_IDX		= 0x2c,
+	CARM_CMS0		= 0x30, /* command message size reg 0 */
+	CARM_LMUC		= 0x48,
+	CARM_HMPHA		= 0x6c,
+	CARM_INITC		= 0xb5,
+
+	/* bits in CARM_INT_{STAT,MASK} */
+	INT_RESERVED		= 0xfffffff0,
+	INT_WATCHDOG		= (1 << 3),	/* watchdog timer */
+	INT_Q_OVERFLOW		= (1 << 2),	/* cmd msg q overflow */
+	INT_Q_AVAILABLE		= (1 << 1),	/* cmd msg q has free space */
+	INT_RESPONSE		= (1 << 0),	/* response msg available */
+	INT_ACK_MASK		= INT_WATCHDOG | INT_Q_OVERFLOW,
+	INT_DEF_MASK		= INT_RESERVED | INT_Q_OVERFLOW |
+				  INT_RESPONSE,
+
+	/* command messages, and related register bits */
+	CARM_HAVE_RESP		= 0x01,
+	CARM_MSG_READ		= 1,
+	CARM_MSG_WRITE		= 2,
+	CARM_MSG_VERIFY		= 3,
+	CARM_MSG_GET_CAPACITY	= 4,
+	CARM_MSG_FLUSH		= 5,
+	CARM_MSG_IOCTL		= 6,
+	CARM_MSG_ARRAY		= 8,
+	CARM_MSG_MISC		= 9,
+	CARM_CME		= (1 << 2),
+	CARM_RME		= (1 << 1),
+	CARM_WZBC		= (1 << 0),
+	CARM_RMI		= (1 << 0),
+	CARM_Q_FULL		= (1 << 3),
+	CARM_MSG_SIZE		= 288,
+	CARM_Q_LEN		= 48,
+
+	/* CARM_MSG_IOCTL messages */
+	CARM_IOC_SCAN_CHAN	= 5,	/* scan channels for devices */
+	CARM_IOC_GET_TCQ	= 13,	/* get tcq/ncq depth */
+	CARM_IOC_SET_TCQ	= 14,	/* set tcq/ncq depth */
+
+	IOC_SCAN_CHAN_NODEV	= 0x1f,
+	IOC_SCAN_CHAN_OFFSET	= 0x40,
+
+	/* CARM_MSG_ARRAY messages */
+	CARM_ARRAY_INFO		= 0,
+
+	ARRAY_NO_EXIST		= (1 << 31),
+
+	/* response messages */
+	RMSG_SZ			= 8,	/* sizeof(struct carm_response) */
+	RMSG_Q_LEN		= 48,	/* resp. msg list length */
+	RMSG_OK			= 1,	/* bit indicating msg was successful */
+					/* length of entire resp. msg buffer */
+	RBUF_LEN		= RMSG_SZ * RMSG_Q_LEN,
+
+	PDC_SHM_SIZE		= (4096 << 7), /* length of entire h/w buffer */
+
+	/* CARM_MSG_MISC messages */
+	MISC_GET_FW_VER		= 2,
+	MISC_ALLOC_MEM		= 3,
+	MISC_SET_TIME		= 5,
+
+	/* MISC_GET_FW_VER feature bits */
+	FW_VER_4PORT		= (1 << 2), /* 1=4 ports, 0=8 ports */
+	FW_VER_NON_RAID		= (1 << 1), /* 1=non-RAID firmware, 0=RAID */
+	FW_VER_ZCR		= (1 << 0), /* zero channel RAID (whatever that is) */
+
+	/* carm_host flags */
+	FL_NON_RAID		= FW_VER_NON_RAID,
+	FL_4PORT		= FW_VER_4PORT,
+	FL_FW_VER_MASK		= (FW_VER_NON_RAID | FW_VER_4PORT),
+	FL_DAC			= (1 << 16),
+	FL_DYN_MAJOR		= (1 << 17),
+};
+
+enum {
+	CARM_SG_BOUNDARY	= 0xffffUL,	    /* s/g segment boundary */
+};
+
+enum scatter_gather_types {
+	SGT_32BIT		= 0,
+	SGT_64BIT		= 1,
+};
+
+enum host_states {
+	HST_INVALID,		/* invalid state; never used */
+	HST_ALLOC_BUF,		/* setting up master SHM area */
+	HST_ERROR,		/* we never leave here */
+	HST_PORT_SCAN,		/* start dev scan */
+	HST_DEV_SCAN_START,	/* start per-device probe */
+	HST_DEV_SCAN,		/* continue per-device probe */
+	HST_DEV_ACTIVATE,	/* activate devices we found */
+	HST_PROBE_FINISHED,	/* probe is complete */
+	HST_PROBE_START,	/* initiate probe */
+	HST_SYNC_TIME,		/* tell firmware what time it is */
+	HST_GET_FW_VER,		/* get firmware version, adapter port cnt */
+};
+
+#ifdef CARM_DEBUG
+static const char *state_name[] = {
+	"HST_INVALID",
+	"HST_ALLOC_BUF",
+	"HST_ERROR",
+	"HST_PORT_SCAN",
+	"HST_DEV_SCAN_START",
+	"HST_DEV_SCAN",
+	"HST_DEV_ACTIVATE",
+	"HST_PROBE_FINISHED",
+	"HST_PROBE_START",
+	"HST_SYNC_TIME",
+	"HST_GET_FW_VER",
+};
+#endif
+
+struct carm_port {
+	unsigned int			port_no;
+	struct gendisk			*disk;
+	struct carm_host		*host;
+
+	/* attached device characteristics */
+	u64				capacity;
+	char				name[41];
+	u16				dev_geom_head;
+	u16				dev_geom_sect;
+	u16				dev_geom_cyl;
+};
+
+struct carm_request {
+	unsigned int			tag;
+	int				n_elem;
+	unsigned int			msg_type;
+	unsigned int			msg_subtype;
+	unsigned int			msg_bucket;
+	struct request			*rq;
+	struct carm_port		*port;
+	struct scatterlist		sg[CARM_MAX_REQ_SG];
+};
+
+struct carm_host {
+	unsigned long			flags;
+	void				__iomem *mmio;
+	void				*shm;
+	dma_addr_t			shm_dma;
+
+	int				major;
+	int				id;
+	char				name[32];
+
+	spinlock_t			lock;
+	struct pci_dev			*pdev;
+	unsigned int			state;
+	u32				fw_ver;
+
+	struct request_queue		*oob_q;
+	unsigned int			n_oob;
+
+	unsigned int			hw_sg_used;
+
+	unsigned int			resp_idx;
+
+	unsigned int			wait_q_prod;
+	unsigned int			wait_q_cons;
+	struct request_queue		*wait_q[CARM_MAX_WAIT_Q];
+
+	unsigned int			n_msgs;
+	u64				msg_alloc;
+	struct carm_request		req[CARM_MAX_REQ];
+	void				*msg_base;
+	dma_addr_t			msg_dma;
+
+	int				cur_scan_dev;
+	unsigned long			dev_active;
+	unsigned long			dev_present;
+	struct carm_port		port[CARM_MAX_PORTS];
+
+	struct work_struct		fsm_task;
+
+	struct completion		probe_comp;
+};
+
+struct carm_response {
+	__le32 ret_handle;
+	__le32 status;
+}  __attribute__((packed));
+
+struct carm_msg_sg {
+	__le32 start;
+	__le32 len;
+}  __attribute__((packed));
+
+struct carm_msg_rw {
+	u8 type;
+	u8 id;
+	u8 sg_count;
+	u8 sg_type;
+	__le32 handle;
+	__le32 lba;
+	__le16 lba_count;
+	__le16 lba_high;
+	struct carm_msg_sg sg[32];
+}  __attribute__((packed));
+
+struct carm_msg_allocbuf {
+	u8 type;
+	u8 subtype;
+	u8 n_sg;
+	u8 sg_type;
+	__le32 handle;
+	__le32 addr;
+	__le32 len;
+	__le32 evt_pool;
+	__le32 n_evt;
+	__le32 rbuf_pool;
+	__le32 n_rbuf;
+	__le32 msg_pool;
+	__le32 n_msg;
+	struct carm_msg_sg sg[8];
+}  __attribute__((packed));
+
+struct carm_msg_ioctl {
+	u8 type;
+	u8 subtype;
+	u8 array_id;
+	u8 reserved1;
+	__le32 handle;
+	__le32 data_addr;
+	u32 reserved2;
+}  __attribute__((packed));
+
+struct carm_msg_sync_time {
+	u8 type;
+	u8 subtype;
+	u16 reserved1;
+	__le32 handle;
+	u32 reserved2;
+	__le32 timestamp;
+}  __attribute__((packed));
+
+struct carm_msg_get_fw_ver {
+	u8 type;
+	u8 subtype;
+	u16 reserved1;
+	__le32 handle;
+	__le32 data_addr;
+	u32 reserved2;
+}  __attribute__((packed));
+
+struct carm_fw_ver {
+	__le32 version;
+	u8 features;
+	u8 reserved1;
+	u16 reserved2;
+}  __attribute__((packed));
+
+struct carm_array_info {
+	__le32 size;
+
+	__le16 size_hi;
+	__le16 stripe_size;
+
+	__le32 mode;
+
+	__le16 stripe_blk_sz;
+	__le16 reserved1;
+
+	__le16 cyl;
+	__le16 head;
+
+	__le16 sect;
+	u8 array_id;
+	u8 reserved2;
+
+	char name[40];
+
+	__le32 array_status;
+
+	/* device list continues beyond this point? */
+}  __attribute__((packed));
+
+static int carm_init_one (struct pci_dev *pdev, const struct pci_device_id *ent);
+static void carm_remove_one (struct pci_dev *pdev);
+static int carm_bdev_getgeo(struct block_device *bdev, struct hd_geometry *geo);
+
+static const struct pci_device_id carm_pci_tbl[] = {
+	{ PCI_VENDOR_ID_PROMISE, 0x8000, PCI_ANY_ID, PCI_ANY_ID, 0, 0, },
+	{ PCI_VENDOR_ID_PROMISE, 0x8002, PCI_ANY_ID, PCI_ANY_ID, 0, 0, },
+	{ }	/* terminate list */
+};
+MODULE_DEVICE_TABLE(pci, carm_pci_tbl);
+
+static struct pci_driver carm_driver = {
+	.name		= DRV_NAME,
+	.id_table	= carm_pci_tbl,
+	.probe		= carm_init_one,
+	.remove		= carm_remove_one,
+};
+
+static const struct block_device_operations carm_bd_ops = {
+	.owner		= THIS_MODULE,
+	.getgeo		= carm_bdev_getgeo,
+};
+
+static unsigned int carm_host_id;
+static unsigned long carm_major_alloc;
+
+
+
+static int carm_bdev_getgeo(struct block_device *bdev, struct hd_geometry *geo)
+{
+	struct carm_port *port = bdev->bd_disk->private_data;
+
+	geo->heads = (u8) port->dev_geom_head;
+	geo->sectors = (u8) port->dev_geom_sect;
+	geo->cylinders = port->dev_geom_cyl;
+	return 0;
+}
+
+static const u32 msg_sizes[] = { 32, 64, 128, CARM_MSG_SIZE };
+
+static inline int carm_lookup_bucket(u32 msg_size)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(msg_sizes); i++)
+		if (msg_size <= msg_sizes[i])
+			return i;
+
+	return -ENOENT;
+}
+
+static void carm_init_buckets(void __iomem *mmio)
+{
+	unsigned int i;
+
+	for (i = 0; i < ARRAY_SIZE(msg_sizes); i++)
+		writel(msg_sizes[i], mmio + CARM_CMS0 + (4 * i));
+}
+
+static inline void *carm_ref_msg(struct carm_host *host,
+				 unsigned int msg_idx)
+{
+	return host->msg_base + (msg_idx * CARM_MSG_SIZE);
+}
+
+static inline dma_addr_t carm_ref_msg_dma(struct carm_host *host,
+					  unsigned int msg_idx)
+{
+	return host->msg_dma + (msg_idx * CARM_MSG_SIZE);
+}
+
+static int carm_send_msg(struct carm_host *host,
+			 struct carm_request *crq)
+{
+	void __iomem *mmio = host->mmio;
+	u32 msg = (u32) carm_ref_msg_dma(host, crq->tag);
+	u32 cm_bucket = crq->msg_bucket;
+	u32 tmp;
+	int rc = 0;
+
+	VPRINTK("ENTER\n");
+
+	tmp = readl(mmio + CARM_HMUC);
+	if (tmp & CARM_Q_FULL) {
+#if 0
+		tmp = readl(mmio + CARM_INT_MASK);
+		tmp |= INT_Q_AVAILABLE;
+		writel(tmp, mmio + CARM_INT_MASK);
+		readl(mmio + CARM_INT_MASK);	/* flush */
+#endif
+		DPRINTK("host msg queue full\n");
+		rc = -EBUSY;
+	} else {
+		writel(msg | (cm_bucket << 1), mmio + CARM_IHQP);
+		readl(mmio + CARM_IHQP);	/* flush */
+	}
+
+	return rc;
+}
+
+static struct carm_request *carm_get_request(struct carm_host *host)
+{
+	unsigned int i;
+
+	/* obey global hardware limit on S/G entries */
+	if (host->hw_sg_used >= (CARM_MAX_HOST_SG - CARM_MAX_REQ_SG))
+		return NULL;
+
+	for (i = 0; i < max_queue; i++)
+		if ((host->msg_alloc & (1ULL << i)) == 0) {
+			struct carm_request *crq = &host->req[i];
+			crq->port = NULL;
+			crq->n_elem = 0;
+
+			host->msg_alloc |= (1ULL << i);
+			host->n_msgs++;
+
+			assert(host->n_msgs <= CARM_MAX_REQ);
+			sg_init_table(crq->sg, CARM_MAX_REQ_SG);
+			return crq;
+		}
+
+	DPRINTK("no request available, returning NULL\n");
+	return NULL;
+}
+
+static int carm_put_request(struct carm_host *host, struct carm_request *crq)
+{
+	assert(crq->tag < max_queue);
+
+	if (unlikely((host->msg_alloc & (1ULL << crq->tag)) == 0))
+		return -EINVAL; /* tried to clear a tag that was not active */
+
+	assert(host->hw_sg_used >= crq->n_elem);
+
+	host->msg_alloc &= ~(1ULL << crq->tag);
+	host->hw_sg_used -= crq->n_elem;
+	host->n_msgs--;
+
+	return 0;
+}
+
+static struct carm_request *carm_get_special(struct carm_host *host)
+{
+	unsigned long flags;
+	struct carm_request *crq = NULL;
+	struct request *rq;
+	int tries = 5000;
+
+	while (tries-- > 0) {
+		spin_lock_irqsave(&host->lock, flags);
+		crq = carm_get_request(host);
+		spin_unlock_irqrestore(&host->lock, flags);
+
+		if (crq)
+			break;
+		msleep(10);
+	}
+
+	if (!crq)
+		return NULL;
+
+	rq = blk_get_request(host->oob_q, WRITE /* bogus */, GFP_KERNEL);
+	if (!rq) {
+		spin_lock_irqsave(&host->lock, flags);
+		carm_put_request(host, crq);
+		spin_unlock_irqrestore(&host->lock, flags);
+		return NULL;
+	}
+
+	crq->rq = rq;
+	return crq;
+}
+
+static int carm_array_info (struct carm_host *host, unsigned int array_idx)
+{
+	struct carm_msg_ioctl *ioc;
+	unsigned int idx;
+	u32 msg_data;
+	dma_addr_t msg_dma;
+	struct carm_request *crq;
+	int rc;
+
+	crq = carm_get_special(host);
+	if (!crq) {
+		rc = -ENOMEM;
+		goto err_out;
+	}
+
+	idx = crq->tag;
+
+	ioc = carm_ref_msg(host, idx);
+	msg_dma = carm_ref_msg_dma(host, idx);
+	msg_data = (u32) (msg_dma + sizeof(struct carm_array_info));
+
+	crq->msg_type = CARM_MSG_ARRAY;
+	crq->msg_subtype = CARM_ARRAY_INFO;
+	rc = carm_lookup_bucket(sizeof(struct carm_msg_ioctl) +
+				sizeof(struct carm_array_info));
+	BUG_ON(rc < 0);
+	crq->msg_bucket = (u32) rc;
+
+	memset(ioc, 0, sizeof(*ioc));
+	ioc->type	= CARM_MSG_ARRAY;
+	ioc->subtype	= CARM_ARRAY_INFO;
+	ioc->array_id	= (u8) array_idx;
+	ioc->handle	= cpu_to_le32(TAG_ENCODE(idx));
+	ioc->data_addr	= cpu_to_le32(msg_data);
+
+	spin_lock_irq(&host->lock);
+	assert(host->state == HST_DEV_SCAN_START ||
+	       host->state == HST_DEV_SCAN);
+	spin_unlock_irq(&host->lock);
+
+	DPRINTK("blk_insert_request, tag == %u\n", idx);
+	blk_insert_request(host->oob_q, crq->rq, 1, crq);
+
+	return 0;
+
+err_out:
+	spin_lock_irq(&host->lock);
+	host->state = HST_ERROR;
+	spin_unlock_irq(&host->lock);
+	return rc;
+}
+
+typedef unsigned int (*carm_sspc_t)(struct carm_host *, unsigned int, void *);
+
+static int carm_send_special (struct carm_host *host, carm_sspc_t func)
+{
+	struct carm_request *crq;
+	struct carm_msg_ioctl *ioc;
+	void *mem;
+	unsigned int idx, msg_size;
+	int rc;
+
+	crq = carm_get_special(host);
+	if (!crq)
+		return -ENOMEM;
+
+	idx = crq->tag;
+
+	mem = carm_ref_msg(host, idx);
+
+	msg_size = func(host, idx, mem);
+
+	ioc = mem;
+	crq->msg_type = ioc->type;
+	crq->msg_subtype = ioc->subtype;
+	rc = carm_lookup_bucket(msg_size);
+	BUG_ON(rc < 0);
+	crq->msg_bucket = (u32) rc;
+
+	DPRINTK("blk_insert_request, tag == %u\n", idx);
+	blk_insert_request(host->oob_q, crq->rq, 1, crq);
+
+	return 0;
+}
+
+static unsigned int carm_fill_sync_time(struct carm_host *host,
+					unsigned int idx, void *mem)
+{
+	struct timeval tv;
+	struct carm_msg_sync_time *st = mem;
+
+	do_gettimeofday(&tv);
+
+	memset(st, 0, sizeof(*st));
+	st->type	= CARM_MSG_MISC;
+	st->subtype	= MISC_SET_TIME;
+	st->handle	= cpu_to_le32(TAG_ENCODE(idx));
+	st->timestamp	= cpu_to_le32(tv.tv_sec);
+
+	return sizeof(struct carm_msg_sync_time);
+}
+
+static unsigned int carm_fill_alloc_buf(struct carm_host *host,
+					unsigned int idx, void *mem)
+{
+	struct carm_msg_allocbuf *ab = mem;
+
+	memset(ab, 0, sizeof(*ab));
+	ab->type	= CARM_MSG_MISC;
+	ab->subtype	= MISC_ALLOC_MEM;
+	ab->handle	= cpu_to_le32(TAG_ENCODE(idx));
+	ab->n_sg	= 1;
+	ab->sg_type	= SGT_32BIT;
+	ab->addr	= cpu_to_le32(host->shm_dma + (PDC_SHM_SIZE >> 1));
+	ab->len		= cpu_to_le32(PDC_SHM_SIZE >> 1);
+	ab->evt_pool	= cpu_to_le32(host->shm_dma + (16 * 1024));
+	ab->n_evt	= cpu_to_le32(1024);
+	ab->rbuf_pool	= cpu_to_le32(host->shm_dma);
+	ab->n_rbuf	= cpu_to_le32(RMSG_Q_LEN);
+	ab->msg_pool	= cpu_to_le32(host->shm_dma + RBUF_LEN);
+	ab->n_msg	= cpu_to_le32(CARM_Q_LEN);
+	ab->sg[0].start	= cpu_to_le32(host->shm_dma + (PDC_SHM_SIZE >> 1));
+	ab->sg[0].len	= cpu_to_le32(65536);
+
+	return sizeof(struct carm_msg_allocbuf);
+}
+
+static unsigned int carm_fill_scan_channels(struct carm_host *host,
+					    unsigned int idx, void *mem)
+{
+	struct carm_msg_ioctl *ioc = mem;
+	u32 msg_data = (u32) (carm_ref_msg_dma(host, idx) +
+			      IOC_SCAN_CHAN_OFFSET);
+
+	memset(ioc, 0, sizeof(*ioc));
+	ioc->type	= CARM_MSG_IOCTL;
+	ioc->subtype	= CARM_IOC_SCAN_CHAN;
+	ioc->handle	= cpu_to_le32(TAG_ENCODE(idx));
+	ioc->data_addr	= cpu_to_le32(msg_data);
+
+	/* fill output data area with "no device" default values */
+	mem += IOC_SCAN_CHAN_OFFSET;
+	memset(mem, IOC_SCAN_CHAN_NODEV, CARM_MAX_PORTS);
+
+	return IOC_SCAN_CHAN_OFFSET + CARM_MAX_PORTS;
+}
+
+static unsigned int carm_fill_get_fw_ver(struct carm_host *host,
+					 unsigned int idx, void *mem)
+{
+	struct carm_msg_get_fw_ver *ioc = mem;
+	u32 msg_data = (u32) (carm_ref_msg_dma(host, idx) + sizeof(*ioc));
+
+	memset(ioc, 0, sizeof(*ioc));
+	ioc->type	= CARM_MSG_MISC;
+	ioc->subtype	= MISC_GET_FW_VER;
+	ioc->handle	= cpu_to_le32(TAG_ENCODE(idx));
+	ioc->data_addr	= cpu_to_le32(msg_data);
+
+	return sizeof(struct carm_msg_get_fw_ver) +
+	       sizeof(struct carm_fw_ver);
+}
+
+static inline void carm_end_request_queued(struct carm_host *host,
+					   struct carm_request *crq,
+					   int error)
+{
+	struct request *req = crq->rq;
+	int rc;
+
+	__blk_end_request_all(req, error);
+
+	rc = carm_put_request(host, crq);
+	assert(rc == 0);
+}
+
+static inline void carm_push_q (struct carm_host *host, struct request_queue *q)
+{
+	unsigned int idx = host->wait_q_prod % CARM_MAX_WAIT_Q;
+
+	blk_stop_queue(q);
+	VPRINTK("STOPPED QUEUE %p\n", q);
+
+	host->wait_q[idx] = q;
+	host->wait_q_prod++;
+	BUG_ON(host->wait_q_prod == host->wait_q_cons); /* overrun */
+}
+
+static inline struct request_queue *carm_pop_q(struct carm_host *host)
+{
+	unsigned int idx;
+
+	if (host->wait_q_prod == host->wait_q_cons)
+		return NULL;
+
+	idx = host->wait_q_cons % CARM_MAX_WAIT_Q;
+	host->wait_q_cons++;
+
+	return host->wait_q[idx];
+}
+
+static inline void carm_round_robin(struct carm_host *host)
+{
+	struct request_queue *q = carm_pop_q(host);
+	if (q) {
+		blk_start_queue(q);
+		VPRINTK("STARTED QUEUE %p\n", q);
+	}
+}
+
+static inline void carm_end_rq(struct carm_host *host, struct carm_request *crq,
+			       int error)
+{
+	carm_end_request_queued(host, crq, error);
+	if (max_queue == 1)
+		carm_round_robin(host);
+	else if ((host->n_msgs <= CARM_MSG_LOW_WATER) &&
+		 (host->hw_sg_used <= CARM_SG_LOW_WATER)) {
+		carm_round_robin(host);
+	}
+}
+
+static void carm_oob_rq_fn(struct request_queue *q)
+{
+	struct carm_host *host = q->queuedata;
+	struct carm_request *crq;
+	struct request *rq;
+	int rc;
+
+	while (1) {
+		DPRINTK("get req\n");
+		rq = blk_fetch_request(q);
+		if (!rq)
+			break;
+
+		crq = rq->special;
+		assert(crq != NULL);
+		assert(crq->rq == rq);
+
+		crq->n_elem = 0;
+
+		DPRINTK("send req\n");
+		rc = carm_send_msg(host, crq);
+		if (rc) {
+			blk_requeue_request(q, rq);
+			carm_push_q(host, q);
+			return;		/* call us again later, eventually */
+		}
+	}
+}
+
+static void carm_rq_fn(struct request_queue *q)
+{
+	struct carm_port *port = q->queuedata;
+	struct carm_host *host = port->host;
+	struct carm_msg_rw *msg;
+	struct carm_request *crq;
+	struct request *rq;
+	struct scatterlist *sg;
+	int writing = 0, pci_dir, i, n_elem, rc;
+	u32 tmp;
+	unsigned int msg_size;
+
+queue_one_request:
+	VPRINTK("get req\n");
+	rq = blk_peek_request(q);
+	if (!rq)
+		return;
+
+	crq = carm_get_request(host);
+	if (!crq) {
+		carm_push_q(host, q);
+		return;		/* call us again later, eventually */
+	}
+	crq->rq = rq;
+
+	blk_start_request(rq);
+
+	if (rq_data_dir(rq) == WRITE) {
+		writing = 1;
+		pci_dir = PCI_DMA_TODEVICE;
+	} else {
+		pci_dir = PCI_DMA_FROMDEVICE;
+	}
+
+	/* get scatterlist from block layer */
+	sg = &crq->sg[0];
+	n_elem = blk_rq_map_sg(q, rq, sg);
+	if (n_elem <= 0) {
+		carm_end_rq(host, crq, -EIO);
+		return;		/* request with no s/g entries? */
+	}
+
+	/* map scatterlist to PCI bus addresses */
+	n_elem = pci_map_sg(host->pdev, sg, n_elem, pci_dir);
+	if (n_elem <= 0) {
+		carm_end_rq(host, crq, -EIO);
+		return;		/* request with no s/g entries? */
+	}
+	crq->n_elem = n_elem;
+	crq->port = port;
+	host->hw_sg_used += n_elem;
+
+	/*
+	 * build read/write message
+	 */
+
+	VPRINTK("build msg\n");
+	msg = (struct carm_msg_rw *) carm_ref_msg(host, crq->tag);
+
+	if (writing) {
+		msg->type = CARM_MSG_WRITE;
+		crq->msg_type = CARM_MSG_WRITE;
+	} else {
+		msg->type = CARM_MSG_READ;
+		crq->msg_type = CARM_MSG_READ;
+	}
+
+	msg->id		= port->port_no;
+	msg->sg_count	= n_elem;
+	msg->sg_type	= SGT_32BIT;
+	msg->handle	= cpu_to_le32(TAG_ENCODE(crq->tag));
+	msg->lba	= cpu_to_le32(blk_rq_pos(rq) & 0xffffffff);
+	tmp		= (blk_rq_pos(rq) >> 16) >> 16;
+	msg->lba_high	= cpu_to_le16( (u16) tmp );
+	msg->lba_count	= cpu_to_le16(blk_rq_sectors(rq));
+
+	msg_size = sizeof(struct carm_msg_rw) - sizeof(msg->sg);
+	for (i = 0; i < n_elem; i++) {
+		struct carm_msg_sg *carm_sg = &msg->sg[i];
+		carm_sg->start = cpu_to_le32(sg_dma_address(&crq->sg[i]));
+		carm_sg->len = cpu_to_le32(sg_dma_len(&crq->sg[i]));
+		msg_size += sizeof(struct carm_msg_sg);
+	}
+
+	rc = carm_lookup_bucket(msg_size);
+	BUG_ON(rc < 0);
+	crq->msg_bucket = (u32) rc;
+
+	/*
+	 * queue read/write message to hardware
+	 */
+
+	VPRINTK("send msg, tag == %u\n", crq->tag);
+	rc = carm_send_msg(host, crq);
+	if (rc) {
+		carm_put_request(host, crq);
+		blk_requeue_request(q, rq);
+		carm_push_q(host, q);
+		return;		/* call us again later, eventually */
+	}
+
+	goto queue_one_request;
+}
+
+static void carm_handle_array_info(struct carm_host *host,
+				   struct carm_request *crq, u8 *mem,
+				   int error)
+{
+	struct carm_port *port;
+	u8 *msg_data = mem + sizeof(struct carm_array_info);
+	struct carm_array_info *desc = (struct carm_array_info *) msg_data;
+	u64 lo, hi;
+	int cur_port;
+	size_t slen;
+
+	DPRINTK("ENTER\n");
+
+	carm_end_rq(host, crq, error);
+
+	if (error)
+		goto out;
+	if (le32_to_cpu(desc->array_status) & ARRAY_NO_EXIST)
+		goto out;
+
+	cur_port = host->cur_scan_dev;
+
+	/* should never occur */
+	if ((cur_port < 0) || (cur_port >= CARM_MAX_PORTS)) {
+		printk(KERN_ERR PFX "BUG: cur_scan_dev==%d, array_id==%d\n",
+		       cur_port, (int) desc->array_id);
+		goto out;
+	}
+
+	port = &host->port[cur_port];
+
+	lo = (u64) le32_to_cpu(desc->size);
+	hi = (u64) le16_to_cpu(desc->size_hi);
+
+	port->capacity = lo | (hi << 32);
+	port->dev_geom_head = le16_to_cpu(desc->head);
+	port->dev_geom_sect = le16_to_cpu(desc->sect);
+	port->dev_geom_cyl = le16_to_cpu(desc->cyl);
+
+	host->dev_active |= (1 << cur_port);
+
+	strncpy(port->name, desc->name, sizeof(port->name));
+	port->name[sizeof(port->name) - 1] = 0;
+	slen = strlen(port->name);
+	while (slen && (port->name[slen - 1] == ' ')) {
+		port->name[slen - 1] = 0;
+		slen--;
+	}
+
+	printk(KERN_INFO DRV_NAME "(%s): port %u device %Lu sectors\n",
+	       pci_name(host->pdev), port->port_no,
+	       (unsigned long long) port->capacity);
+	printk(KERN_INFO DRV_NAME "(%s): port %u device \"%s\"\n",
+	       pci_name(host->pdev), port->port_no, port->name);
+
+out:
+	assert(host->state == HST_DEV_SCAN);
+	schedule_work(&host->fsm_task);
+}
+
+static void carm_handle_scan_chan(struct carm_host *host,
+				  struct carm_request *crq, u8 *mem,
+				  int error)
+{
+	u8 *msg_data = mem + IOC_SCAN_CHAN_OFFSET;
+	unsigned int i, dev_count = 0;
+	int new_state = HST_DEV_SCAN_START;
+
+	DPRINTK("ENTER\n");
+
+	carm_end_rq(host, crq, error);
+
+	if (error) {
+		new_state = HST_ERROR;
+		goto out;
+	}
+
+	/* TODO: scan and support non-disk devices */
+	for (i = 0; i < 8; i++)
+		if (msg_data[i] == 0) { /* direct-access device (disk) */
+			host->dev_present |= (1 << i);
+			dev_count++;
+		}
+
+	printk(KERN_INFO DRV_NAME "(%s): found %u interesting devices\n",
+	       pci_name(host->pdev), dev_count);
+
+out:
+	assert(host->state == HST_PORT_SCAN);
+	host->state = new_state;
+	schedule_work(&host->fsm_task);
+}
+
+static void carm_handle_generic(struct carm_host *host,
+				struct carm_request *crq, int error,
+				int cur_state, int next_state)
+{
+	DPRINTK("ENTER\n");
+
+	carm_end_rq(host, crq, error);
+
+	assert(host->state == cur_state);
+	if (error)
+		host->state = HST_ERROR;
+	else
+		host->state = next_state;
+	schedule_work(&host->fsm_task);
+}
+
+static inline void carm_handle_rw(struct carm_host *host,
+				  struct carm_request *crq, int error)
+{
+	int pci_dir;
+
+	VPRINTK("ENTER\n");
+
+	if (rq_data_dir(crq->rq) == WRITE)
+		pci_dir = PCI_DMA_TODEVICE;
+	else
+		pci_dir = PCI_DMA_FROMDEVICE;
+
+	pci_unmap_sg(host->pdev, &crq->sg[0], crq->n_elem, pci_dir);
+
+	carm_end_rq(host, crq, error);
+}
+
+static inline void carm_handle_resp(struct carm_host *host,
+				    __le32 ret_handle_le, u32 status)
+{
+	u32 handle = le32_to_cpu(ret_handle_le);
+	unsigned int msg_idx;
+	struct carm_request *crq;
+	int error = (status == RMSG_OK) ? 0 : -EIO;
+	u8 *mem;
+
+	VPRINTK("ENTER, handle == 0x%x\n", handle);
+
+	if (unlikely(!TAG_VALID(handle))) {
+		printk(KERN_ERR DRV_NAME "(%s): BUG: invalid tag 0x%x\n",
+		       pci_name(host->pdev), handle);
+		return;
+	}
+
+	msg_idx = TAG_DECODE(handle);
+	VPRINTK("tag == %u\n", msg_idx);
+
+	crq = &host->req[msg_idx];
+
+	/* fast path */
+	if (likely(crq->msg_type == CARM_MSG_READ ||
+		   crq->msg_type == CARM_MSG_WRITE)) {
+		carm_handle_rw(host, crq, error);
+		return;
+	}
+
+	mem = carm_ref_msg(host, msg_idx);
+
+	switch (crq->msg_type) {
+	case CARM_MSG_IOCTL: {
+		switch (crq->msg_subtype) {
+		case CARM_IOC_SCAN_CHAN:
+			carm_handle_scan_chan(host, crq, mem, error);
+			break;
+		default:
+			/* unknown / invalid response */
+			goto err_out;
+		}
+		break;
+	}
+
+	case CARM_MSG_MISC: {
+		switch (crq->msg_subtype) {
+		case MISC_ALLOC_MEM:
+			carm_handle_generic(host, crq, error,
+					    HST_ALLOC_BUF, HST_SYNC_TIME);
+			break;
+		case MISC_SET_TIME:
+			carm_handle_generic(host, crq, error,
+					    HST_SYNC_TIME, HST_GET_FW_VER);
+			break;
+		case MISC_GET_FW_VER: {
+			struct carm_fw_ver *ver = (struct carm_fw_ver *)
+				(mem + sizeof(struct carm_msg_get_fw_ver));
+			if (!error) {
+				host->fw_ver = le32_to_cpu(ver->version);
+				host->flags |= (ver->features & FL_FW_VER_MASK);
+			}
+			carm_handle_generic(host, crq, error,
+					    HST_GET_FW_VER, HST_PORT_SCAN);
+			break;
+		}
+		default:
+			/* unknown / invalid response */
+			goto err_out;
+		}
+		break;
+	}
+
+	case CARM_MSG_ARRAY: {
+		switch (crq->msg_subtype) {
+		case CARM_ARRAY_INFO:
+			carm_handle_array_info(host, crq, mem, error);
+			break;
+		default:
+			/* unknown / invalid response */
+			goto err_out;
+		}
+		break;
+	}
+
+	default:
+		/* unknown / invalid response */
+		goto err_out;
+	}
+
+	return;
+
+err_out:
+	printk(KERN_WARNING DRV_NAME "(%s): BUG: unhandled message type %d/%d\n",
+	       pci_name(host->pdev), crq->msg_type, crq->msg_subtype);
+	carm_end_rq(host, crq, -EIO);
+}
+
+static inline void carm_handle_responses(struct carm_host *host)
+{
+	void __iomem *mmio = host->mmio;
+	struct carm_response *resp = (struct carm_response *) host->shm;
+	unsigned int work = 0;
+	unsigned int idx = host->resp_idx % RMSG_Q_LEN;
+
+	while (1) {
+		u32 status = le32_to_cpu(resp[idx].status);
+
+		if (status == 0xffffffff) {
+			VPRINTK("ending response on index %u\n", idx);
+			writel(idx << 3, mmio + CARM_RESP_IDX);
+			break;
+		}
+
+		/* response to a message we sent */
+		else if ((status & (1 << 31)) == 0) {
+			VPRINTK("handling msg response on index %u\n", idx);
+			carm_handle_resp(host, resp[idx].ret_handle, status);
+			resp[idx].status = cpu_to_le32(0xffffffff);
+		}
+
+		/* asynchronous events the hardware throws our way */
+		else if ((status & 0xff000000) == (1 << 31)) {
+			u8 *evt_type_ptr = (u8 *) &resp[idx];
+			u8 evt_type = *evt_type_ptr;
+			printk(KERN_WARNING DRV_NAME "(%s): unhandled event type %d\n",
+			       pci_name(host->pdev), (int) evt_type);
+			resp[idx].status = cpu_to_le32(0xffffffff);
+		}
+
+		idx = NEXT_RESP(idx);
+		work++;
+	}
+
+	VPRINTK("EXIT, work==%u\n", work);
+	host->resp_idx += work;
+}
+
+static irqreturn_t carm_interrupt(int irq, void *__host)
+{
+	struct carm_host *host = __host;
+	void __iomem *mmio;
+	u32 mask;
+	int handled = 0;
+	unsigned long flags;
+
+	if (!host) {
+		VPRINTK("no host\n");
+		return IRQ_NONE;
+	}
+
+	spin_lock_irqsave(&host->lock, flags);
+
+	mmio = host->mmio;
+
+	/* reading should also clear interrupts */
+	mask = readl(mmio + CARM_INT_STAT);
+
+	if (mask == 0 || mask == 0xffffffff) {
+		VPRINTK("no work, mask == 0x%x\n", mask);
+		goto out;
+	}
+
+	if (mask & INT_ACK_MASK)
+		writel(mask, mmio + CARM_INT_STAT);
+
+	if (unlikely(host->state == HST_INVALID)) {
+		VPRINTK("not initialized yet, mask = 0x%x\n", mask);
+		goto out;
+	}
+
+	if (mask & CARM_HAVE_RESP) {
+		handled = 1;
+		carm_handle_responses(host);
+	}
+
+out:
+	spin_unlock_irqrestore(&host->lock, flags);
+	VPRINTK("EXIT\n");
+	return IRQ_RETVAL(handled);
+}
+
+static void carm_fsm_task (struct work_struct *work)
+{
+	struct carm_host *host =
+		container_of(work, struct carm_host, fsm_task);
+	unsigned long flags;
+	unsigned int state;
+	int rc, i, next_dev;
+	int reschedule = 0;
+	int new_state = HST_INVALID;
+
+	spin_lock_irqsave(&host->lock, flags);
+	state = host->state;
+	spin_unlock_irqrestore(&host->lock, flags);
+
+	DPRINTK("ENTER, state == %s\n", state_name[state]);
+
+	switch (state) {
+	case HST_PROBE_START:
+		new_state = HST_ALLOC_BUF;
+		reschedule = 1;
+		break;
+
+	case HST_ALLOC_BUF:
+		rc = carm_send_special(host, carm_fill_alloc_buf);
+		if (rc) {
+			new_state = HST_ERROR;
+			reschedule = 1;
+		}
+		break;
+
+	case HST_SYNC_TIME:
+		rc = carm_send_special(host, carm_fill_sync_time);
+		if (rc) {
+			new_state = HST_ERROR;
+			reschedule = 1;
+		}
+		break;
+
+	case HST_GET_FW_VER:
+		rc = carm_send_special(host, carm_fill_get_fw_ver);
+		if (rc) {
+			new_state = HST_ERROR;
+			reschedule = 1;
+		}
+		break;
+
+	case HST_PORT_SCAN:
+		rc = carm_send_special(host, carm_fill_scan_channels);
+		if (rc) {
+			new_state = HST_ERROR;
+			reschedule = 1;
+		}
+		break;
+
+	case HST_DEV_SCAN_START:
+		host->cur_scan_dev = -1;
+		new_state = HST_DEV_SCAN;
+		reschedule = 1;
+		break;
+
+	case HST_DEV_SCAN:
+		next_dev = -1;
+		for (i = host->cur_scan_dev + 1; i < CARM_MAX_PORTS; i++)
+			if (host->dev_present & (1 << i)) {
+				next_dev = i;
+				break;
+			}
+
+		if (next_dev >= 0) {
+			host->cur_scan_dev = next_dev;
+			rc = carm_array_info(host, next_dev);
+			if (rc) {
+				new_state = HST_ERROR;
+				reschedule = 1;
+			}
+		} else {
+			new_state = HST_DEV_ACTIVATE;
+			reschedule = 1;
+		}
+		break;
+
+	case HST_DEV_ACTIVATE: {
+		int activated = 0;
+		for (i = 0; i < CARM_MAX_PORTS; i++)
+			if (host->dev_active & (1 << i)) {
+				struct carm_port *port = &host->port[i];
+				struct gendisk *disk = port->disk;
+
+				set_capacity(disk, port->capacity);
+				add_disk(disk);
+				activated++;
+			}
+
+		printk(KERN_INFO DRV_NAME "(%s): %d ports activated\n",
+		       pci_name(host->pdev), activated);
+
+		new_state = HST_PROBE_FINISHED;
+		reschedule = 1;
+		break;
+	}
+
+	case HST_PROBE_FINISHED:
+		complete(&host->probe_comp);
+		break;
+
+	case HST_ERROR:
+		/* FIXME: TODO */
+		break;
+
+	default:
+		/* should never occur */
+		printk(KERN_ERR PFX "BUG: unknown state %d\n", state);
+		assert(0);
+		break;
+	}
+
+	if (new_state != HST_INVALID) {
+		spin_lock_irqsave(&host->lock, flags);
+		host->state = new_state;
+		spin_unlock_irqrestore(&host->lock, flags);
+	}
+	if (reschedule)
+		schedule_work(&host->fsm_task);
+}
+
+static int carm_init_wait(void __iomem *mmio, u32 bits, unsigned int test_bit)
+{
+	unsigned int i;
+
+	for (i = 0; i < 50000; i++) {
+		u32 tmp = readl(mmio + CARM_LMUC);
+		udelay(100);
+
+		if (test_bit) {
+			if ((tmp & bits) == bits)
+				return 0;
+		} else {
+			if ((tmp & bits) == 0)
+				return 0;
+		}
+
+		cond_resched();
+	}
+
+	printk(KERN_ERR PFX "carm_init_wait timeout, bits == 0x%x, test_bit == %s\n",
+	       bits, test_bit ? "yes" : "no");
+	return -EBUSY;
+}
+
+static void carm_init_responses(struct carm_host *host)
+{
+	void __iomem *mmio = host->mmio;
+	unsigned int i;
+	struct carm_response *resp = (struct carm_response *) host->shm;
+
+	for (i = 0; i < RMSG_Q_LEN; i++)
+		resp[i].status = cpu_to_le32(0xffffffff);
+
+	writel(0, mmio + CARM_RESP_IDX);
+}
+
+static int carm_init_host(struct carm_host *host)
+{
+	void __iomem *mmio = host->mmio;
+	u32 tmp;
+	u8 tmp8;
+	int rc;
+
+	DPRINTK("ENTER\n");
+
+	writel(0, mmio + CARM_INT_MASK);
+
+	tmp8 = readb(mmio + CARM_INITC);
+	if (tmp8 & 0x01) {
+		tmp8 &= ~0x01;
+		writeb(tmp8, mmio + CARM_INITC);
+		readb(mmio + CARM_INITC);	/* flush */
+
+		DPRINTK("snooze...\n");
+		msleep(5000);
+	}
+
+	tmp = readl(mmio + CARM_HMUC);
+	if (tmp & CARM_CME) {
+		DPRINTK("CME bit present, waiting\n");
+		rc = carm_init_wait(mmio, CARM_CME, 1);
+		if (rc) {
+			DPRINTK("EXIT, carm_init_wait 1 failed\n");
+			return rc;
+		}
+	}
+	if (tmp & CARM_RME) {
+		DPRINTK("RME bit present, waiting\n");
+		rc = carm_init_wait(mmio, CARM_RME, 1);
+		if (rc) {
+			DPRINTK("EXIT, carm_init_wait 2 failed\n");
+			return rc;
+		}
+	}
+
+	tmp &= ~(CARM_RME | CARM_CME);
+	writel(tmp, mmio + CARM_HMUC);
+	readl(mmio + CARM_HMUC);	/* flush */
+
+	rc = carm_init_wait(mmio, CARM_RME | CARM_CME, 0);
+	if (rc) {
+		DPRINTK("EXIT, carm_init_wait 3 failed\n");
+		return rc;
+	}
+
+	carm_init_buckets(mmio);
+
+	writel(host->shm_dma & 0xffffffff, mmio + RBUF_ADDR_LO);
+	writel((host->shm_dma >> 16) >> 16, mmio + RBUF_ADDR_HI);
+	writel(RBUF_LEN, mmio + RBUF_BYTE_SZ);
+
+	tmp = readl(mmio + CARM_HMUC);
+	tmp |= (CARM_RME | CARM_CME | CARM_WZBC);
+	writel(tmp, mmio + CARM_HMUC);
+	readl(mmio + CARM_HMUC);	/* flush */
+
+	rc = carm_init_wait(mmio, CARM_RME | CARM_CME, 1);
+	if (rc) {
+		DPRINTK("EXIT, carm_init_wait 4 failed\n");
+		return rc;
+	}
+
+	writel(0, mmio + CARM_HMPHA);
+	writel(INT_DEF_MASK, mmio + CARM_INT_MASK);
+
+	carm_init_responses(host);
+
+	/* start initialization, probing state machine */
+	spin_lock_irq(&host->lock);
+	assert(host->state == HST_INVALID);
+	host->state = HST_PROBE_START;
+	spin_unlock_irq(&host->lock);
+	schedule_work(&host->fsm_task);
+
+	DPRINTK("EXIT\n");
+	return 0;
+}
+
+static int carm_init_disks(struct carm_host *host)
+{
+	unsigned int i;
+	int rc = 0;
+
+	for (i = 0; i < CARM_MAX_PORTS; i++) {
+		struct gendisk *disk;
+		struct request_queue *q;
+		struct carm_port *port;
+
+		port = &host->port[i];
+		port->host = host;
+		port->port_no = i;
+
+		disk = alloc_disk(CARM_MINORS_PER_MAJOR);
+		if (!disk) {
+			rc = -ENOMEM;
+			break;
+		}
+
+		port->disk = disk;
+		sprintf(disk->disk_name, DRV_NAME "/%u",
+			(unsigned int) (host->id * CARM_MAX_PORTS) + i);
+		disk->major = host->major;
+		disk->first_minor = i * CARM_MINORS_PER_MAJOR;
+		disk->fops = &carm_bd_ops;
+		disk->private_data = port;
+
+		q = blk_init_queue(carm_rq_fn, &host->lock);
+		if (!q) {
+			rc = -ENOMEM;
+			break;
+		}
+		disk->queue = q;
+		blk_queue_max_segments(q, CARM_MAX_REQ_SG);
+		blk_queue_segment_boundary(q, CARM_SG_BOUNDARY);
+
+		q->queuedata = port;
+	}
+
+	return rc;
+}
+
+static void carm_free_disks(struct carm_host *host)
+{
+	unsigned int i;
+
+	for (i = 0; i < CARM_MAX_PORTS; i++) {
+		struct gendisk *disk = host->port[i].disk;
+		if (disk) {
+			struct request_queue *q = disk->queue;
+
+			if (disk->flags & GENHD_FL_UP)
+				del_gendisk(disk);
+			if (q)
+				blk_cleanup_queue(q);
+			put_disk(disk);
+		}
+	}
+}
+
+static int carm_init_shm(struct carm_host *host)
+{
+	host->shm = pci_alloc_consistent(host->pdev, CARM_SHM_SIZE,
+					 &host->shm_dma);
+	if (!host->shm)
+		return -ENOMEM;
+
+	host->msg_base = host->shm + RBUF_LEN;
+	host->msg_dma = host->shm_dma + RBUF_LEN;
+
+	memset(host->shm, 0xff, RBUF_LEN);
+	memset(host->msg_base, 0, PDC_SHM_SIZE - RBUF_LEN);
+
+	return 0;
+}
+
+static int carm_init_one (struct pci_dev *pdev, const struct pci_device_id *ent)
+{
+	struct carm_host *host;
+	unsigned int pci_dac;
+	int rc;
+	struct request_queue *q;
+	unsigned int i;
+
+	printk_once(KERN_DEBUG DRV_NAME " version " DRV_VERSION "\n");
+
+	rc = pci_enable_device(pdev);
+	if (rc)
+		return rc;
+
+	rc = pci_request_regions(pdev, DRV_NAME);
+	if (rc)
+		goto err_out;
+
+#ifdef IF_64BIT_DMA_IS_POSSIBLE /* grrrr... */
+	rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
+	if (!rc) {
+		rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
+		if (rc) {
+			printk(KERN_ERR DRV_NAME "(%s): consistent DMA mask failure\n",
+				pci_name(pdev));
+			goto err_out_regions;
+		}
+		pci_dac = 1;
+	} else {
+#endif
+		rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
+		if (rc) {
+			printk(KERN_ERR DRV_NAME "(%s): DMA mask failure\n",
+				pci_name(pdev));
+			goto err_out_regions;
+		}
+		pci_dac = 0;
+#ifdef IF_64BIT_DMA_IS_POSSIBLE /* grrrr... */
+	}
+#endif
+
+	host = kzalloc(sizeof(*host), GFP_KERNEL);
+	if (!host) {
+		printk(KERN_ERR DRV_NAME "(%s): memory alloc failure\n",
+		       pci_name(pdev));
+		rc = -ENOMEM;
+		goto err_out_regions;
+	}
+
+	host->pdev = pdev;
+	host->flags = pci_dac ? FL_DAC : 0;
+	spin_lock_init(&host->lock);
+	INIT_WORK(&host->fsm_task, carm_fsm_task);
+	init_completion(&host->probe_comp);
+
+	for (i = 0; i < ARRAY_SIZE(host->req); i++)
+		host->req[i].tag = i;
+
+	host->mmio = ioremap(pci_resource_start(pdev, 0),
+			     pci_resource_len(pdev, 0));
+	if (!host->mmio) {
+		printk(KERN_ERR DRV_NAME "(%s): MMIO alloc failure\n",
+		       pci_name(pdev));
+		rc = -ENOMEM;
+		goto err_out_kfree;
+	}
+
+	rc = carm_init_shm(host);
+	if (rc) {
+		printk(KERN_ERR DRV_NAME "(%s): DMA SHM alloc failure\n",
+		       pci_name(pdev));
+		goto err_out_iounmap;
+	}
+
+	q = blk_init_queue(carm_oob_rq_fn, &host->lock);
+	if (!q) {
+		printk(KERN_ERR DRV_NAME "(%s): OOB queue alloc failure\n",
+		       pci_name(pdev));
+		rc = -ENOMEM;
+		goto err_out_pci_free;
+	}
+	host->oob_q = q;
+	q->queuedata = host;
+
+	/*
+	 * Figure out which major to use: 160, 161, or dynamic
+	 */
+	if (!test_and_set_bit(0, &carm_major_alloc))
+		host->major = 160;
+	else if (!test_and_set_bit(1, &carm_major_alloc))
+		host->major = 161;
+	else
+		host->flags |= FL_DYN_MAJOR;
+
+	host->id = carm_host_id;
+	sprintf(host->name, DRV_NAME "%d", carm_host_id);
+
+	rc = register_blkdev(host->major, host->name);
+	if (rc < 0)
+		goto err_out_free_majors;
+	if (host->flags & FL_DYN_MAJOR)
+		host->major = rc;
+
+	rc = carm_init_disks(host);
+	if (rc)
+		goto err_out_blkdev_disks;
+
+	pci_set_master(pdev);
+
+	rc = request_irq(pdev->irq, carm_interrupt, IRQF_SHARED, DRV_NAME, host);
+	if (rc) {
+		printk(KERN_ERR DRV_NAME "(%s): irq alloc failure\n",
+		       pci_name(pdev));
+		goto err_out_blkdev_disks;
+	}
+
+	rc = carm_init_host(host);
+	if (rc)
+		goto err_out_free_irq;
+
+	DPRINTK("waiting for probe_comp\n");
+	wait_for_completion(&host->probe_comp);
+
+	printk(KERN_INFO "%s: pci %s, ports %d, io %llx, irq %u, major %d\n",
+	       host->name, pci_name(pdev), (int) CARM_MAX_PORTS,
+	       (unsigned long long)pci_resource_start(pdev, 0),
+		   pdev->irq, host->major);
+
+	carm_host_id++;
+	pci_set_drvdata(pdev, host);
+	return 0;
+
+err_out_free_irq:
+	free_irq(pdev->irq, host);
+err_out_blkdev_disks:
+	carm_free_disks(host);
+	unregister_blkdev(host->major, host->name);
+err_out_free_majors:
+	if (host->major == 160)
+		clear_bit(0, &carm_major_alloc);
+	else if (host->major == 161)
+		clear_bit(1, &carm_major_alloc);
+	blk_cleanup_queue(host->oob_q);
+err_out_pci_free:
+	pci_free_consistent(pdev, CARM_SHM_SIZE, host->shm, host->shm_dma);
+err_out_iounmap:
+	iounmap(host->mmio);
+err_out_kfree:
+	kfree(host);
+err_out_regions:
+	pci_release_regions(pdev);
+err_out:
+	pci_disable_device(pdev);
+	return rc;
+}
+
+static void carm_remove_one (struct pci_dev *pdev)
+{
+	struct carm_host *host = pci_get_drvdata(pdev);
+
+	if (!host) {
+		printk(KERN_ERR PFX "BUG: no host data for PCI(%s)\n",
+		       pci_name(pdev));
+		return;
+	}
+
+	free_irq(pdev->irq, host);
+	carm_free_disks(host);
+	unregister_blkdev(host->major, host->name);
+	if (host->major == 160)
+		clear_bit(0, &carm_major_alloc);
+	else if (host->major == 161)
+		clear_bit(1, &carm_major_alloc);
+	blk_cleanup_queue(host->oob_q);
+	pci_free_consistent(pdev, CARM_SHM_SIZE, host->shm, host->shm_dma);
+	iounmap(host->mmio);
+	kfree(host);
+	pci_release_regions(pdev);
+	pci_disable_device(pdev);
+	pci_set_drvdata(pdev, NULL);
+}
+
+static int __init carm_init(void)
+{
+	return pci_register_driver(&carm_driver);
+}
+
+static void __exit carm_exit(void)
+{
+	pci_unregister_driver(&carm_driver);
+}
+
+module_init(carm_init);
+module_exit(carm_exit);
+
+
diff --git a/drivers/block/ub.c b/drivers/block/ub.c
new file mode 100644
index 00000000..7333b9e4
--- /dev/null
+++ b/drivers/block/ub.c
@@ -0,0 +1,2509 @@
+/*
+ * The low performance USB storage driver (ub).
+ *
+ * Copyright (c) 1999, 2000 Matthew Dharm (mdharm-usb@one-eyed-alien.net)
+ * Copyright (C) 2004 Pete Zaitcev (zaitcev@yahoo.com)
+ *
+ * This work is a part of Linux kernel, is derived from it,
+ * and is not licensed separately. See file COPYING for details.
+ *
+ * TODO (sorted by decreasing priority)
+ *  -- Return sense now that rq allows it (we always auto-sense anyway).
+ *  -- set readonly flag for CDs, set removable flag for CF readers
+ *  -- do inquiry and verify we got a disk and not a tape (for LUN mismatch)
+ *  -- verify the 13 conditions and do bulk resets
+ *  -- highmem
+ *  -- move top_sense and work_bcs into separate allocations (if they survive)
+ *     for cache purists and esoteric architectures.
+ *  -- Allocate structure for LUN 0 before the first ub_sync_tur, avoid NULL. ?
+ *  -- prune comments, they are too volumnous
+ *  -- Resove XXX's
+ *  -- CLEAR, CLR2STS, CLRRS seem to be ripe for refactoring.
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/usb.h>
+#include <linux/usb_usual.h>
+#include <linux/blkdev.h>
+#include <linux/timer.h>
+#include <linux/scatterlist.h>
+#include <linux/slab.h>
+#include <linux/mutex.h>
+#include <scsi/scsi.h>
+
+#define DRV_NAME "ub"
+
+#define UB_MAJOR 180
+
+/*
+ * The command state machine is the key model for understanding of this driver.
+ *
+ * The general rule is that all transitions are done towards the bottom
+ * of the diagram, thus preventing any loops.
+ *
+ * An exception to that is how the STAT state is handled. A counter allows it
+ * to be re-entered along the path marked with [C].
+ *
+ *       +--------+
+ *       ! INIT   !
+ *       +--------+
+ *           !
+ *        ub_scsi_cmd_start fails ->--------------------------------------\
+ *           !                                                            !
+ *           V                                                            !
+ *       +--------+                                                       !
+ *       ! CMD    !                                                       !
+ *       +--------+                                                       !
+ *           !                                            +--------+      !
+ *         was -EPIPE -->-------------------------------->! CLEAR  !      !
+ *           !                                            +--------+      !
+ *           !                                                !           !
+ *         was error -->------------------------------------- ! --------->\
+ *           !                                                !           !
+ *  /--<-- cmd->dir == NONE ?                                 !           !
+ *  !        !                                                !           !
+ *  !        V                                                !           !
+ *  !    +--------+                                           !           !
+ *  !    ! DATA   !                                           !           !
+ *  !    +--------+                                           !           !
+ *  !        !                           +---------+          !           !
+ *  !      was -EPIPE -->--------------->! CLR2STS !          !           !
+ *  !        !                           +---------+          !           !
+ *  !        !                                !               !           !
+ *  !        !                              was error -->---- ! --------->\
+ *  !      was error -->--------------------- ! ------------- ! --------->\
+ *  !        !                                !               !           !
+ *  !        V                                !               !           !
+ *  \--->+--------+                           !               !           !
+ *       ! STAT   !<--------------------------/               !           !
+ *  /--->+--------+                                           !           !
+ *  !        !                                                !           !
+ * [C]     was -EPIPE -->-----------\                         !           !
+ *  !        !                      !                         !           !
+ *  +<---- len == 0                 !                         !           !
+ *  !        !                      !                         !           !
+ *  !      was error -->--------------------------------------!---------->\
+ *  !        !                      !                         !           !
+ *  +<---- bad CSW                  !                         !           !
+ *  +<---- bad tag                  !                         !           !
+ *  !        !                      V                         !           !
+ *  !        !                 +--------+                     !           !
+ *  !        !                 ! CLRRS  !                     !           !
+ *  !        !                 +--------+                     !           !
+ *  !        !                      !                         !           !
+ *  \------- ! --------------------[C]--------\               !           !
+ *           !                                !               !           !
+ *         cmd->error---\                +--------+           !           !
+ *           !          +--------------->! SENSE  !<----------/           !
+ *         STAT_FAIL----/                +--------+                       !
+ *           !                                !                           V
+ *           !                                V                      +--------+
+ *           \--------------------------------\--------------------->! DONE   !
+ *                                                                   +--------+
+ */
+
+/*
+ * This many LUNs per USB device.
+ * Every one of them takes a host, see UB_MAX_HOSTS.
+ */
+#define UB_MAX_LUNS   9
+
+/*
+ */
+
+#define UB_PARTS_PER_LUN      8
+
+#define UB_MAX_CDB_SIZE      16		/* Corresponds to Bulk */
+
+#define UB_SENSE_SIZE  18
+
+/*
+ */
+
+/* command block wrapper */
+struct bulk_cb_wrap {
+	__le32	Signature;		/* contains 'USBC' */
+	u32	Tag;			/* unique per command id */
+	__le32	DataTransferLength;	/* size of data */
+	u8	Flags;			/* direction in bit 0 */
+	u8	Lun;			/* LUN */
+	u8	Length;			/* of of the CDB */
+	u8	CDB[UB_MAX_CDB_SIZE];	/* max command */
+};
+
+#define US_BULK_CB_WRAP_LEN	31
+#define US_BULK_CB_SIGN		0x43425355	/*spells out USBC */
+#define US_BULK_FLAG_IN		1
+#define US_BULK_FLAG_OUT	0
+
+/* command status wrapper */
+struct bulk_cs_wrap {
+	__le32	Signature;		/* should = 'USBS' */
+	u32	Tag;			/* same as original command */
+	__le32	Residue;		/* amount not transferred */
+	u8	Status;			/* see below */
+};
+
+#define US_BULK_CS_WRAP_LEN	13
+#define US_BULK_CS_SIGN		0x53425355	/* spells out 'USBS' */
+#define US_BULK_STAT_OK		0
+#define US_BULK_STAT_FAIL	1
+#define US_BULK_STAT_PHASE	2
+
+/* bulk-only class specific requests */
+#define US_BULK_RESET_REQUEST	0xff
+#define US_BULK_GET_MAX_LUN	0xfe
+
+/*
+ */
+struct ub_dev;
+
+#define UB_MAX_REQ_SG	9	/* cdrecord requires 32KB and maybe a header */
+#define UB_MAX_SECTORS 64
+
+/*
+ * A second is more than enough for a 32K transfer (UB_MAX_SECTORS)
+ * even if a webcam hogs the bus, but some devices need time to spin up.
+ */
+#define UB_URB_TIMEOUT	(HZ*2)
+#define UB_DATA_TIMEOUT	(HZ*5)	/* ZIP does spin-ups in the data phase */
+#define UB_STAT_TIMEOUT	(HZ*5)	/* Same spinups and eject for a dataless cmd. */
+#define UB_CTRL_TIMEOUT	(HZ/2)	/* 500ms ought to be enough to clear a stall */
+
+/*
+ * An instance of a SCSI command in transit.
+ */
+#define UB_DIR_NONE	0
+#define UB_DIR_READ	1
+#define UB_DIR_ILLEGAL2	2
+#define UB_DIR_WRITE	3
+
+#define UB_DIR_CHAR(c)  (((c)==UB_DIR_WRITE)? 'w': \
+			 (((c)==UB_DIR_READ)? 'r': 'n'))
+
+enum ub_scsi_cmd_state {
+	UB_CMDST_INIT,			/* Initial state */
+	UB_CMDST_CMD,			/* Command submitted */
+	UB_CMDST_DATA,			/* Data phase */
+	UB_CMDST_CLR2STS,		/* Clearing before requesting status */
+	UB_CMDST_STAT,			/* Status phase */
+	UB_CMDST_CLEAR,			/* Clearing a stall (halt, actually) */
+	UB_CMDST_CLRRS,			/* Clearing before retrying status */
+	UB_CMDST_SENSE,			/* Sending Request Sense */
+	UB_CMDST_DONE			/* Final state */
+};
+
+struct ub_scsi_cmd {
+	unsigned char cdb[UB_MAX_CDB_SIZE];
+	unsigned char cdb_len;
+
+	unsigned char dir;		/* 0 - none, 1 - read, 3 - write. */
+	enum ub_scsi_cmd_state state;
+	unsigned int tag;
+	struct ub_scsi_cmd *next;
+
+	int error;			/* Return code - valid upon done */
+	unsigned int act_len;		/* Return size */
+	unsigned char key, asc, ascq;	/* May be valid if error==-EIO */
+
+	int stat_count;			/* Retries getting status. */
+	unsigned int timeo;		/* jiffies until rq->timeout changes */
+
+	unsigned int len;		/* Requested length */
+	unsigned int current_sg;
+	unsigned int nsg;		/* sgv[nsg] */
+	struct scatterlist sgv[UB_MAX_REQ_SG];
+
+	struct ub_lun *lun;
+	void (*done)(struct ub_dev *, struct ub_scsi_cmd *);
+	void *back;
+};
+
+struct ub_request {
+	struct request *rq;
+	unsigned int current_try;
+	unsigned int nsg;		/* sgv[nsg] */
+	struct scatterlist sgv[UB_MAX_REQ_SG];
+};
+
+/*
+ */
+struct ub_capacity {
+	unsigned long nsec;		/* Linux size - 512 byte sectors */
+	unsigned int bsize;		/* Linux hardsect_size */
+	unsigned int bshift;		/* Shift between 512 and hard sects */
+};
+
+/*
+ * This is a direct take-off from linux/include/completion.h
+ * The difference is that I do not wait on this thing, just poll.
+ * When I want to wait (ub_probe), I just use the stock completion.
+ *
+ * Note that INIT_COMPLETION takes no lock. It is correct. But why
+ * in the bloody hell that thing takes struct instead of pointer to struct
+ * is quite beyond me. I just copied it from the stock completion.
+ */
+struct ub_completion {
+	unsigned int done;
+	spinlock_t lock;
+};
+
+static DEFINE_MUTEX(ub_mutex);
+static inline void ub_init_completion(struct ub_completion *x)
+{
+	x->done = 0;
+	spin_lock_init(&x->lock);
+}
+
+#define UB_INIT_COMPLETION(x)	((x).done = 0)
+
+static void ub_complete(struct ub_completion *x)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&x->lock, flags);
+	x->done++;
+	spin_unlock_irqrestore(&x->lock, flags);
+}
+
+static int ub_is_completed(struct ub_completion *x)
+{
+	unsigned long flags;
+	int ret;
+
+	spin_lock_irqsave(&x->lock, flags);
+	ret = x->done;
+	spin_unlock_irqrestore(&x->lock, flags);
+	return ret;
+}
+
+/*
+ */
+struct ub_scsi_cmd_queue {
+	int qlen, qmax;
+	struct ub_scsi_cmd *head, *tail;
+};
+
+/*
+ * The block device instance (one per LUN).
+ */
+struct ub_lun {
+	struct ub_dev *udev;
+	struct list_head link;
+	struct gendisk *disk;
+	int id;				/* Host index */
+	int num;			/* LUN number */
+	char name[16];
+
+	int changed;			/* Media was changed */
+	int removable;
+	int readonly;
+
+	struct ub_request urq;
+
+	/* Use Ingo's mempool if or when we have more than one command. */
+	/*
+	 * Currently we never need more than one command for the whole device.
+	 * However, giving every LUN a command is a cheap and automatic way
+	 * to enforce fairness between them.
+	 */
+	int cmda[1];
+	struct ub_scsi_cmd cmdv[1];
+
+	struct ub_capacity capacity; 
+};
+
+/*
+ * The USB device instance.
+ */
+struct ub_dev {
+	spinlock_t *lock;
+	atomic_t poison;		/* The USB device is disconnected */
+	int openc;			/* protected by ub_lock! */
+					/* kref is too implicit for our taste */
+	int reset;			/* Reset is running */
+	int bad_resid;
+	unsigned int tagcnt;
+	char name[12];
+	struct usb_device *dev;
+	struct usb_interface *intf;
+
+	struct list_head luns;
+
+	unsigned int send_bulk_pipe;	/* cached pipe values */
+	unsigned int recv_bulk_pipe;
+	unsigned int send_ctrl_pipe;
+	unsigned int recv_ctrl_pipe;
+
+	struct tasklet_struct tasklet;
+
+	struct ub_scsi_cmd_queue cmd_queue;
+	struct ub_scsi_cmd top_rqs_cmd;	/* REQUEST SENSE */
+	unsigned char top_sense[UB_SENSE_SIZE];
+
+	struct ub_completion work_done;
+	struct urb work_urb;
+	struct timer_list work_timer;
+	int last_pipe;			/* What might need clearing */
+	__le32 signature;		/* Learned signature */
+	struct bulk_cb_wrap work_bcb;
+	struct bulk_cs_wrap work_bcs;
+	struct usb_ctrlrequest work_cr;
+
+	struct work_struct reset_work;
+	wait_queue_head_t reset_wait;
+};
+
+/*
+ */
+static void ub_cleanup(struct ub_dev *sc);
+static int ub_request_fn_1(struct ub_lun *lun, struct request *rq);
+static void ub_cmd_build_block(struct ub_dev *sc, struct ub_lun *lun,
+    struct ub_scsi_cmd *cmd, struct ub_request *urq);
+static void ub_cmd_build_packet(struct ub_dev *sc, struct ub_lun *lun,
+    struct ub_scsi_cmd *cmd, struct ub_request *urq);
+static void ub_rw_cmd_done(struct ub_dev *sc, struct ub_scsi_cmd *cmd);
+static void ub_end_rq(struct request *rq, unsigned int status);
+static int ub_rw_cmd_retry(struct ub_dev *sc, struct ub_lun *lun,
+    struct ub_request *urq, struct ub_scsi_cmd *cmd);
+static int ub_submit_scsi(struct ub_dev *sc, struct ub_scsi_cmd *cmd);
+static void ub_urb_complete(struct urb *urb);
+static void ub_scsi_action(unsigned long _dev);
+static void ub_scsi_dispatch(struct ub_dev *sc);
+static void ub_scsi_urb_compl(struct ub_dev *sc, struct ub_scsi_cmd *cmd);
+static void ub_data_start(struct ub_dev *sc, struct ub_scsi_cmd *cmd);
+static void ub_state_done(struct ub_dev *sc, struct ub_scsi_cmd *cmd, int rc);
+static int __ub_state_stat(struct ub_dev *sc, struct ub_scsi_cmd *cmd);
+static void ub_state_stat(struct ub_dev *sc, struct ub_scsi_cmd *cmd);
+static void ub_state_stat_counted(struct ub_dev *sc, struct ub_scsi_cmd *cmd);
+static void ub_state_sense(struct ub_dev *sc, struct ub_scsi_cmd *cmd);
+static int ub_submit_clear_stall(struct ub_dev *sc, struct ub_scsi_cmd *cmd,
+    int stalled_pipe);
+static void ub_top_sense_done(struct ub_dev *sc, struct ub_scsi_cmd *scmd);
+static void ub_reset_enter(struct ub_dev *sc, int try);
+static void ub_reset_task(struct work_struct *work);
+static int ub_sync_tur(struct ub_dev *sc, struct ub_lun *lun);
+static int ub_sync_read_cap(struct ub_dev *sc, struct ub_lun *lun,
+    struct ub_capacity *ret);
+static int ub_sync_reset(struct ub_dev *sc);
+static int ub_probe_clear_stall(struct ub_dev *sc, int stalled_pipe);
+static int ub_probe_lun(struct ub_dev *sc, int lnum);
+
+/*
+ */
+#ifdef CONFIG_USB_LIBUSUAL
+
+#define ub_usb_ids  usb_storage_usb_ids
+#else
+
+static const struct usb_device_id ub_usb_ids[] = {
+	{ USB_INTERFACE_INFO(USB_CLASS_MASS_STORAGE, USB_SC_SCSI, USB_PR_BULK) },
+	{ }
+};
+
+MODULE_DEVICE_TABLE(usb, ub_usb_ids);
+#endif /* CONFIG_USB_LIBUSUAL */
+
+/*
+ * Find me a way to identify "next free minor" for add_disk(),
+ * and the array disappears the next day. However, the number of
+ * hosts has something to do with the naming and /proc/partitions.
+ * This has to be thought out in detail before changing.
+ * If UB_MAX_HOST was 1000, we'd use a bitmap. Or a better data structure.
+ */
+#define UB_MAX_HOSTS  26
+static char ub_hostv[UB_MAX_HOSTS];
+
+#define UB_QLOCK_NUM 5
+static spinlock_t ub_qlockv[UB_QLOCK_NUM];
+static int ub_qlock_next = 0;
+
+static DEFINE_SPINLOCK(ub_lock);	/* Locks globals and ->openc */
+
+/*
+ * The id allocator.
+ *
+ * This also stores the host for indexing by minor, which is somewhat dirty.
+ */
+static int ub_id_get(void)
+{
+	unsigned long flags;
+	int i;
+
+	spin_lock_irqsave(&ub_lock, flags);
+	for (i = 0; i < UB_MAX_HOSTS; i++) {
+		if (ub_hostv[i] == 0) {
+			ub_hostv[i] = 1;
+			spin_unlock_irqrestore(&ub_lock, flags);
+			return i;
+		}
+	}
+	spin_unlock_irqrestore(&ub_lock, flags);
+	return -1;
+}
+
+static void ub_id_put(int id)
+{
+	unsigned long flags;
+
+	if (id < 0 || id >= UB_MAX_HOSTS) {
+		printk(KERN_ERR DRV_NAME ": bad host ID %d\n", id);
+		return;
+	}
+
+	spin_lock_irqsave(&ub_lock, flags);
+	if (ub_hostv[id] == 0) {
+		spin_unlock_irqrestore(&ub_lock, flags);
+		printk(KERN_ERR DRV_NAME ": freeing free host ID %d\n", id);
+		return;
+	}
+	ub_hostv[id] = 0;
+	spin_unlock_irqrestore(&ub_lock, flags);
+}
+
+/*
+ * This is necessitated by the fact that blk_cleanup_queue does not
+ * necesserily destroy the queue. Instead, it may merely decrease q->refcnt.
+ * Since our blk_init_queue() passes a spinlock common with ub_dev,
+ * we have life time issues when ub_cleanup frees ub_dev.
+ */
+static spinlock_t *ub_next_lock(void)
+{
+	unsigned long flags;
+	spinlock_t *ret;
+
+	spin_lock_irqsave(&ub_lock, flags);
+	ret = &ub_qlockv[ub_qlock_next];
+	ub_qlock_next = (ub_qlock_next + 1) % UB_QLOCK_NUM;
+	spin_unlock_irqrestore(&ub_lock, flags);
+	return ret;
+}
+
+/*
+ * Downcount for deallocation. This rides on two assumptions:
+ *  - once something is poisoned, its refcount cannot grow
+ *  - opens cannot happen at this time (del_gendisk was done)
+ * If the above is true, we can drop the lock, which we need for
+ * blk_cleanup_queue(): the silly thing may attempt to sleep.
+ * [Actually, it never needs to sleep for us, but it calls might_sleep()]
+ */
+static void ub_put(struct ub_dev *sc)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&ub_lock, flags);
+	--sc->openc;
+	if (sc->openc == 0 && atomic_read(&sc->poison)) {
+		spin_unlock_irqrestore(&ub_lock, flags);
+		ub_cleanup(sc);
+	} else {
+		spin_unlock_irqrestore(&ub_lock, flags);
+	}
+}
+
+/*
+ * Final cleanup and deallocation.
+ */
+static void ub_cleanup(struct ub_dev *sc)
+{
+	struct list_head *p;
+	struct ub_lun *lun;
+	struct request_queue *q;
+
+	while (!list_empty(&sc->luns)) {
+		p = sc->luns.next;
+		lun = list_entry(p, struct ub_lun, link);
+		list_del(p);
+
+		/* I don't think queue can be NULL. But... Stolen from sx8.c */
+		if ((q = lun->disk->queue) != NULL)
+			blk_cleanup_queue(q);
+		/*
+		 * If we zero disk->private_data BEFORE put_disk, we have
+		 * to check for NULL all over the place in open, release,
+		 * check_media and revalidate, because the block level
+		 * semaphore is well inside the put_disk.
+		 * But we cannot zero after the call, because *disk is gone.
+		 * The sd.c is blatantly racy in this area.
+		 */
+		/* disk->private_data = NULL; */
+		put_disk(lun->disk);
+		lun->disk = NULL;
+
+		ub_id_put(lun->id);
+		kfree(lun);
+	}
+
+	usb_set_intfdata(sc->intf, NULL);
+	usb_put_intf(sc->intf);
+	usb_put_dev(sc->dev);
+	kfree(sc);
+}
+
+/*
+ * The "command allocator".
+ */
+static struct ub_scsi_cmd *ub_get_cmd(struct ub_lun *lun)
+{
+	struct ub_scsi_cmd *ret;
+
+	if (lun->cmda[0])
+		return NULL;
+	ret = &lun->cmdv[0];
+	lun->cmda[0] = 1;
+	return ret;
+}
+
+static void ub_put_cmd(struct ub_lun *lun, struct ub_scsi_cmd *cmd)
+{
+	if (cmd != &lun->cmdv[0]) {
+		printk(KERN_WARNING "%s: releasing a foreign cmd %p\n",
+		    lun->name, cmd);
+		return;
+	}
+	if (!lun->cmda[0]) {
+		printk(KERN_WARNING "%s: releasing a free cmd\n", lun->name);
+		return;
+	}
+	lun->cmda[0] = 0;
+}
+
+/*
+ * The command queue.
+ */
+static void ub_cmdq_add(struct ub_dev *sc, struct ub_scsi_cmd *cmd)
+{
+	struct ub_scsi_cmd_queue *t = &sc->cmd_queue;
+
+	if (t->qlen++ == 0) {
+		t->head = cmd;
+		t->tail = cmd;
+	} else {
+		t->tail->next = cmd;
+		t->tail = cmd;
+	}
+
+	if (t->qlen > t->qmax)
+		t->qmax = t->qlen;
+}
+
+static void ub_cmdq_insert(struct ub_dev *sc, struct ub_scsi_cmd *cmd)
+{
+	struct ub_scsi_cmd_queue *t = &sc->cmd_queue;
+
+	if (t->qlen++ == 0) {
+		t->head = cmd;
+		t->tail = cmd;
+	} else {
+		cmd->next = t->head;
+		t->head = cmd;
+	}
+
+	if (t->qlen > t->qmax)
+		t->qmax = t->qlen;
+}
+
+static struct ub_scsi_cmd *ub_cmdq_pop(struct ub_dev *sc)
+{
+	struct ub_scsi_cmd_queue *t = &sc->cmd_queue;
+	struct ub_scsi_cmd *cmd;
+
+	if (t->qlen == 0)
+		return NULL;
+	if (--t->qlen == 0)
+		t->tail = NULL;
+	cmd = t->head;
+	t->head = cmd->next;
+	cmd->next = NULL;
+	return cmd;
+}
+
+#define ub_cmdq_peek(sc)  ((sc)->cmd_queue.head)
+
+/*
+ * The request function is our main entry point
+ */
+
+static void ub_request_fn(struct request_queue *q)
+{
+	struct ub_lun *lun = q->queuedata;
+	struct request *rq;
+
+	while ((rq = blk_peek_request(q)) != NULL) {
+		if (ub_request_fn_1(lun, rq) != 0) {
+			blk_stop_queue(q);
+			break;
+		}
+	}
+}
+
+static int ub_request_fn_1(struct ub_lun *lun, struct request *rq)
+{
+	struct ub_dev *sc = lun->udev;
+	struct ub_scsi_cmd *cmd;
+	struct ub_request *urq;
+	int n_elem;
+
+	if (atomic_read(&sc->poison)) {
+		blk_start_request(rq);
+		ub_end_rq(rq, DID_NO_CONNECT << 16);
+		return 0;
+	}
+
+	if (lun->changed && rq->cmd_type != REQ_TYPE_BLOCK_PC) {
+		blk_start_request(rq);
+		ub_end_rq(rq, SAM_STAT_CHECK_CONDITION);
+		return 0;
+	}
+
+	if (lun->urq.rq != NULL)
+		return -1;
+	if ((cmd = ub_get_cmd(lun)) == NULL)
+		return -1;
+	memset(cmd, 0, sizeof(struct ub_scsi_cmd));
+
+	blk_start_request(rq);
+
+	urq = &lun->urq;
+	memset(urq, 0, sizeof(struct ub_request));
+	urq->rq = rq;
+
+	/*
+	 * get scatterlist from block layer
+	 */
+	sg_init_table(&urq->sgv[0], UB_MAX_REQ_SG);
+	n_elem = blk_rq_map_sg(lun->disk->queue, rq, &urq->sgv[0]);
+	if (n_elem < 0) {
+		/* Impossible, because blk_rq_map_sg should not hit ENOMEM. */
+		printk(KERN_INFO "%s: failed request map (%d)\n",
+		    lun->name, n_elem);
+		goto drop;
+	}
+	if (n_elem > UB_MAX_REQ_SG) {	/* Paranoia */
+		printk(KERN_WARNING "%s: request with %d segments\n",
+		    lun->name, n_elem);
+		goto drop;
+	}
+	urq->nsg = n_elem;
+
+	if (rq->cmd_type == REQ_TYPE_BLOCK_PC) {
+		ub_cmd_build_packet(sc, lun, cmd, urq);
+	} else {
+		ub_cmd_build_block(sc, lun, cmd, urq);
+	}
+	cmd->state = UB_CMDST_INIT;
+	cmd->lun = lun;
+	cmd->done = ub_rw_cmd_done;
+	cmd->back = urq;
+
+	cmd->tag = sc->tagcnt++;
+	if (ub_submit_scsi(sc, cmd) != 0)
+		goto drop;
+
+	return 0;
+
+drop:
+	ub_put_cmd(lun, cmd);
+	ub_end_rq(rq, DID_ERROR << 16);
+	return 0;
+}
+
+static void ub_cmd_build_block(struct ub_dev *sc, struct ub_lun *lun,
+    struct ub_scsi_cmd *cmd, struct ub_request *urq)
+{
+	struct request *rq = urq->rq;
+	unsigned int block, nblks;
+
+	if (rq_data_dir(rq) == WRITE)
+		cmd->dir = UB_DIR_WRITE;
+	else
+		cmd->dir = UB_DIR_READ;
+
+	cmd->nsg = urq->nsg;
+	memcpy(cmd->sgv, urq->sgv, sizeof(struct scatterlist) * cmd->nsg);
+
+	/*
+	 * build the command
+	 *
+	 * The call to blk_queue_logical_block_size() guarantees that request
+	 * is aligned, but it is given in terms of 512 byte units, always.
+	 */
+	block = blk_rq_pos(rq) >> lun->capacity.bshift;
+	nblks = blk_rq_sectors(rq) >> lun->capacity.bshift;
+
+	cmd->cdb[0] = (cmd->dir == UB_DIR_READ)? READ_10: WRITE_10;
+	/* 10-byte uses 4 bytes of LBA: 2147483648KB, 2097152MB, 2048GB */
+	cmd->cdb[2] = block >> 24;
+	cmd->cdb[3] = block >> 16;
+	cmd->cdb[4] = block >> 8;
+	cmd->cdb[5] = block;
+	cmd->cdb[7] = nblks >> 8;
+	cmd->cdb[8] = nblks;
+	cmd->cdb_len = 10;
+
+	cmd->len = blk_rq_bytes(rq);
+}
+
+static void ub_cmd_build_packet(struct ub_dev *sc, struct ub_lun *lun,
+    struct ub_scsi_cmd *cmd, struct ub_request *urq)
+{
+	struct request *rq = urq->rq;
+
+	if (blk_rq_bytes(rq) == 0) {
+		cmd->dir = UB_DIR_NONE;
+	} else {
+		if (rq_data_dir(rq) == WRITE)
+			cmd->dir = UB_DIR_WRITE;
+		else
+			cmd->dir = UB_DIR_READ;
+	}
+
+	cmd->nsg = urq->nsg;
+	memcpy(cmd->sgv, urq->sgv, sizeof(struct scatterlist) * cmd->nsg);
+
+	memcpy(&cmd->cdb, rq->cmd, rq->cmd_len);
+	cmd->cdb_len = rq->cmd_len;
+
+	cmd->len = blk_rq_bytes(rq);
+
+	/*
+	 * To reapply this to every URB is not as incorrect as it looks.
+	 * In return, we avoid any complicated tracking calculations.
+	 */
+	cmd->timeo = rq->timeout;
+}
+
+static void ub_rw_cmd_done(struct ub_dev *sc, struct ub_scsi_cmd *cmd)
+{
+	struct ub_lun *lun = cmd->lun;
+	struct ub_request *urq = cmd->back;
+	struct request *rq;
+	unsigned int scsi_status;
+
+	rq = urq->rq;
+
+	if (cmd->error == 0) {
+		if (rq->cmd_type == REQ_TYPE_BLOCK_PC) {
+			if (cmd->act_len >= rq->resid_len)
+				rq->resid_len = 0;
+			else
+				rq->resid_len -= cmd->act_len;
+			scsi_status = 0;
+		} else {
+			if (cmd->act_len != cmd->len) {
+				scsi_status = SAM_STAT_CHECK_CONDITION;
+			} else {
+				scsi_status = 0;
+			}
+		}
+	} else {
+		if (rq->cmd_type == REQ_TYPE_BLOCK_PC) {
+			/* UB_SENSE_SIZE is smaller than SCSI_SENSE_BUFFERSIZE */
+			memcpy(rq->sense, sc->top_sense, UB_SENSE_SIZE);
+			rq->sense_len = UB_SENSE_SIZE;
+			if (sc->top_sense[0] != 0)
+				scsi_status = SAM_STAT_CHECK_CONDITION;
+			else
+				scsi_status = DID_ERROR << 16;
+		} else {
+			if (cmd->error == -EIO &&
+			    (cmd->key == 0 ||
+			     cmd->key == MEDIUM_ERROR ||
+			     cmd->key == UNIT_ATTENTION)) {
+				if (ub_rw_cmd_retry(sc, lun, urq, cmd) == 0)
+					return;
+			}
+			scsi_status = SAM_STAT_CHECK_CONDITION;
+		}
+	}
+
+	urq->rq = NULL;
+
+	ub_put_cmd(lun, cmd);
+	ub_end_rq(rq, scsi_status);
+	blk_start_queue(lun->disk->queue);
+}
+
+static void ub_end_rq(struct request *rq, unsigned int scsi_status)
+{
+	int error;
+
+	if (scsi_status == 0) {
+		error = 0;
+	} else {
+		error = -EIO;
+		rq->errors = scsi_status;
+	}
+	__blk_end_request_all(rq, error);
+}
+
+static int ub_rw_cmd_retry(struct ub_dev *sc, struct ub_lun *lun,
+    struct ub_request *urq, struct ub_scsi_cmd *cmd)
+{
+
+	if (atomic_read(&sc->poison))
+		return -ENXIO;
+
+	ub_reset_enter(sc, urq->current_try);
+
+	if (urq->current_try >= 3)
+		return -EIO;
+	urq->current_try++;
+
+	/* Remove this if anyone complains of flooding. */
+	printk(KERN_DEBUG "%s: dir %c len/act %d/%d "
+	    "[sense %x %02x %02x] retry %d\n",
+	    sc->name, UB_DIR_CHAR(cmd->dir), cmd->len, cmd->act_len,
+	    cmd->key, cmd->asc, cmd->ascq, urq->current_try);
+
+	memset(cmd, 0, sizeof(struct ub_scsi_cmd));
+	ub_cmd_build_block(sc, lun, cmd, urq);
+
+	cmd->state = UB_CMDST_INIT;
+	cmd->lun = lun;
+	cmd->done = ub_rw_cmd_done;
+	cmd->back = urq;
+
+	cmd->tag = sc->tagcnt++;
+
+#if 0 /* Wasteful */
+	return ub_submit_scsi(sc, cmd);
+#else
+	ub_cmdq_add(sc, cmd);
+	return 0;
+#endif
+}
+
+/*
+ * Submit a regular SCSI operation (not an auto-sense).
+ *
+ * The Iron Law of Good Submit Routine is:
+ * Zero return - callback is done, Nonzero return - callback is not done.
+ * No exceptions.
+ *
+ * Host is assumed locked.
+ */
+static int ub_submit_scsi(struct ub_dev *sc, struct ub_scsi_cmd *cmd)
+{
+
+	if (cmd->state != UB_CMDST_INIT ||
+	    (cmd->dir != UB_DIR_NONE && cmd->len == 0)) {
+		return -EINVAL;
+	}
+
+	ub_cmdq_add(sc, cmd);
+	/*
+	 * We can call ub_scsi_dispatch(sc) right away here, but it's a little
+	 * safer to jump to a tasklet, in case upper layers do something silly.
+	 */
+	tasklet_schedule(&sc->tasklet);
+	return 0;
+}
+
+/*
+ * Submit the first URB for the queued command.
+ * This function does not deal with queueing in any way.
+ */
+static int ub_scsi_cmd_start(struct ub_dev *sc, struct ub_scsi_cmd *cmd)
+{
+	struct bulk_cb_wrap *bcb;
+	int rc;
+
+	bcb = &sc->work_bcb;
+
+	/*
+	 * ``If the allocation length is eighteen or greater, and a device
+	 * server returns less than eithteen bytes of data, the application
+	 * client should assume that the bytes not transferred would have been
+	 * zeroes had the device server returned those bytes.''
+	 *
+	 * We zero sense for all commands so that when a packet request
+	 * fails it does not return a stale sense.
+	 */
+	memset(&sc->top_sense, 0, UB_SENSE_SIZE);
+
+	/* set up the command wrapper */
+	bcb->Signature = cpu_to_le32(US_BULK_CB_SIGN);
+	bcb->Tag = cmd->tag;		/* Endianness is not important */
+	bcb->DataTransferLength = cpu_to_le32(cmd->len);
+	bcb->Flags = (cmd->dir == UB_DIR_READ) ? 0x80 : 0;
+	bcb->Lun = (cmd->lun != NULL) ? cmd->lun->num : 0;
+	bcb->Length = cmd->cdb_len;
+
+	/* copy the command payload */
+	memcpy(bcb->CDB, cmd->cdb, UB_MAX_CDB_SIZE);
+
+	UB_INIT_COMPLETION(sc->work_done);
+
+	sc->last_pipe = sc->send_bulk_pipe;
+	usb_fill_bulk_urb(&sc->work_urb, sc->dev, sc->send_bulk_pipe,
+	    bcb, US_BULK_CB_WRAP_LEN, ub_urb_complete, sc);
+
+	if ((rc = usb_submit_urb(&sc->work_urb, GFP_ATOMIC)) != 0) {
+		/* XXX Clear stalls */
+		ub_complete(&sc->work_done);
+		return rc;
+	}
+
+	sc->work_timer.expires = jiffies + UB_URB_TIMEOUT;
+	add_timer(&sc->work_timer);
+
+	cmd->state = UB_CMDST_CMD;
+	return 0;
+}
+
+/*
+ * Timeout handler.
+ */
+static void ub_urb_timeout(unsigned long arg)
+{
+	struct ub_dev *sc = (struct ub_dev *) arg;
+	unsigned long flags;
+
+	spin_lock_irqsave(sc->lock, flags);
+	if (!ub_is_completed(&sc->work_done))
+		usb_unlink_urb(&sc->work_urb);
+	spin_unlock_irqrestore(sc->lock, flags);
+}
+
+/*
+ * Completion routine for the work URB.
+ *
+ * This can be called directly from usb_submit_urb (while we have
+ * the sc->lock taken) and from an interrupt (while we do NOT have
+ * the sc->lock taken). Therefore, bounce this off to a tasklet.
+ */
+static void ub_urb_complete(struct urb *urb)
+{
+	struct ub_dev *sc = urb->context;
+
+	ub_complete(&sc->work_done);
+	tasklet_schedule(&sc->tasklet);
+}
+
+static void ub_scsi_action(unsigned long _dev)
+{
+	struct ub_dev *sc = (struct ub_dev *) _dev;
+	unsigned long flags;
+
+	spin_lock_irqsave(sc->lock, flags);
+	ub_scsi_dispatch(sc);
+	spin_unlock_irqrestore(sc->lock, flags);
+}
+
+static void ub_scsi_dispatch(struct ub_dev *sc)
+{
+	struct ub_scsi_cmd *cmd;
+	int rc;
+
+	while (!sc->reset && (cmd = ub_cmdq_peek(sc)) != NULL) {
+		if (cmd->state == UB_CMDST_DONE) {
+			ub_cmdq_pop(sc);
+			(*cmd->done)(sc, cmd);
+		} else if (cmd->state == UB_CMDST_INIT) {
+			if ((rc = ub_scsi_cmd_start(sc, cmd)) == 0)
+				break;
+			cmd->error = rc;
+			cmd->state = UB_CMDST_DONE;
+		} else {
+			if (!ub_is_completed(&sc->work_done))
+				break;
+			del_timer(&sc->work_timer);
+			ub_scsi_urb_compl(sc, cmd);
+		}
+	}
+}
+
+static void ub_scsi_urb_compl(struct ub_dev *sc, struct ub_scsi_cmd *cmd)
+{
+	struct urb *urb = &sc->work_urb;
+	struct bulk_cs_wrap *bcs;
+	int endp;
+	int len;
+	int rc;
+
+	if (atomic_read(&sc->poison)) {
+		ub_state_done(sc, cmd, -ENODEV);
+		return;
+	}
+
+	endp = usb_pipeendpoint(sc->last_pipe);
+	if (usb_pipein(sc->last_pipe))
+		endp |= USB_DIR_IN;
+
+	if (cmd->state == UB_CMDST_CLEAR) {
+		if (urb->status == -EPIPE) {
+			/*
+			 * STALL while clearning STALL.
+			 * The control pipe clears itself - nothing to do.
+			 */
+			printk(KERN_NOTICE "%s: stall on control pipe\n",
+			    sc->name);
+			goto Bad_End;
+		}
+
+		/*
+		 * We ignore the result for the halt clear.
+		 */
+
+		usb_reset_endpoint(sc->dev, endp);
+
+		ub_state_sense(sc, cmd);
+
+	} else if (cmd->state == UB_CMDST_CLR2STS) {
+		if (urb->status == -EPIPE) {
+			printk(KERN_NOTICE "%s: stall on control pipe\n",
+			    sc->name);
+			goto Bad_End;
+		}
+
+		/*
+		 * We ignore the result for the halt clear.
+		 */
+
+		usb_reset_endpoint(sc->dev, endp);
+
+		ub_state_stat(sc, cmd);
+
+	} else if (cmd->state == UB_CMDST_CLRRS) {
+		if (urb->status == -EPIPE) {
+			printk(KERN_NOTICE "%s: stall on control pipe\n",
+			    sc->name);
+			goto Bad_End;
+		}
+
+		/*
+		 * We ignore the result for the halt clear.
+		 */
+
+		usb_reset_endpoint(sc->dev, endp);
+
+		ub_state_stat_counted(sc, cmd);
+
+	} else if (cmd->state == UB_CMDST_CMD) {
+		switch (urb->status) {
+		case 0:
+			break;
+		case -EOVERFLOW:
+			goto Bad_End;
+		case -EPIPE:
+			rc = ub_submit_clear_stall(sc, cmd, sc->last_pipe);
+			if (rc != 0) {
+				printk(KERN_NOTICE "%s: "
+				    "unable to submit clear (%d)\n",
+				    sc->name, rc);
+				/*
+				 * This is typically ENOMEM or some other such shit.
+				 * Retrying is pointless. Just do Bad End on it...
+				 */
+				ub_state_done(sc, cmd, rc);
+				return;
+			}
+			cmd->state = UB_CMDST_CLEAR;
+			return;
+		case -ESHUTDOWN:	/* unplug */
+		case -EILSEQ:		/* unplug timeout on uhci */
+			ub_state_done(sc, cmd, -ENODEV);
+			return;
+		default:
+			goto Bad_End;
+		}
+		if (urb->actual_length != US_BULK_CB_WRAP_LEN) {
+			goto Bad_End;
+		}
+
+		if (cmd->dir == UB_DIR_NONE || cmd->nsg < 1) {
+			ub_state_stat(sc, cmd);
+			return;
+		}
+
+		// udelay(125);		// usb-storage has this
+		ub_data_start(sc, cmd);
+
+	} else if (cmd->state == UB_CMDST_DATA) {
+		if (urb->status == -EPIPE) {
+			rc = ub_submit_clear_stall(sc, cmd, sc->last_pipe);
+			if (rc != 0) {
+				printk(KERN_NOTICE "%s: "
+				    "unable to submit clear (%d)\n",
+				    sc->name, rc);
+				ub_state_done(sc, cmd, rc);
+				return;
+			}
+			cmd->state = UB_CMDST_CLR2STS;
+			return;
+		}
+		if (urb->status == -EOVERFLOW) {
+			/*
+			 * A babble? Failure, but we must transfer CSW now.
+			 */
+			cmd->error = -EOVERFLOW;	/* A cheap trick... */
+			ub_state_stat(sc, cmd);
+			return;
+		}
+
+		if (cmd->dir == UB_DIR_WRITE) {
+			/*
+			 * Do not continue writes in case of a failure.
+			 * Doing so would cause sectors to be mixed up,
+			 * which is worse than sectors lost.
+			 *
+			 * We must try to read the CSW, or many devices
+			 * get confused.
+			 */
+			len = urb->actual_length;
+			if (urb->status != 0 ||
+			    len != cmd->sgv[cmd->current_sg].length) {
+				cmd->act_len += len;
+
+				cmd->error = -EIO;
+				ub_state_stat(sc, cmd);
+				return;
+			}
+
+		} else {
+			/*
+			 * If an error occurs on read, we record it, and
+			 * continue to fetch data in order to avoid bubble.
+			 *
+			 * As a small shortcut, we stop if we detect that
+			 * a CSW mixed into data.
+			 */
+			if (urb->status != 0)
+				cmd->error = -EIO;
+
+			len = urb->actual_length;
+			if (urb->status != 0 ||
+			    len != cmd->sgv[cmd->current_sg].length) {
+				if ((len & 0x1FF) == US_BULK_CS_WRAP_LEN)
+					goto Bad_End;
+			}
+		}
+
+		cmd->act_len += urb->actual_length;
+
+		if (++cmd->current_sg < cmd->nsg) {
+			ub_data_start(sc, cmd);
+			return;
+		}
+		ub_state_stat(sc, cmd);
+
+	} else if (cmd->state == UB_CMDST_STAT) {
+		if (urb->status == -EPIPE) {
+			rc = ub_submit_clear_stall(sc, cmd, sc->last_pipe);
+			if (rc != 0) {
+				printk(KERN_NOTICE "%s: "
+				    "unable to submit clear (%d)\n",
+				    sc->name, rc);
+				ub_state_done(sc, cmd, rc);
+				return;
+			}
+
+			/*
+			 * Having a stall when getting CSW is an error, so
+			 * make sure uppper levels are not oblivious to it.
+			 */
+			cmd->error = -EIO;		/* A cheap trick... */
+
+			cmd->state = UB_CMDST_CLRRS;
+			return;
+		}
+
+		/* Catch everything, including -EOVERFLOW and other nasties. */
+		if (urb->status != 0)
+			goto Bad_End;
+
+		if (urb->actual_length == 0) {
+			ub_state_stat_counted(sc, cmd);
+			return;
+		}
+
+		/*
+		 * Check the returned Bulk protocol status.
+		 * The status block has to be validated first.
+		 */
+
+		bcs = &sc->work_bcs;
+
+		if (sc->signature == cpu_to_le32(0)) {
+			/*
+			 * This is the first reply, so do not perform the check.
+			 * Instead, remember the signature the device uses
+			 * for future checks. But do not allow a nul.
+			 */
+			sc->signature = bcs->Signature;
+			if (sc->signature == cpu_to_le32(0)) {
+				ub_state_stat_counted(sc, cmd);
+				return;
+			}
+		} else {
+			if (bcs->Signature != sc->signature) {
+				ub_state_stat_counted(sc, cmd);
+				return;
+			}
+		}
+
+		if (bcs->Tag != cmd->tag) {
+			/*
+			 * This usually happens when we disagree with the
+			 * device's microcode about something. For instance,
+			 * a few of them throw this after timeouts. They buffer
+			 * commands and reply at commands we timed out before.
+			 * Without flushing these replies we loop forever.
+			 */
+			ub_state_stat_counted(sc, cmd);
+			return;
+		}
+
+		if (!sc->bad_resid) {
+			len = le32_to_cpu(bcs->Residue);
+			if (len != cmd->len - cmd->act_len) {
+				/*
+				 * Only start ignoring if this cmd ended well.
+				 */
+				if (cmd->len == cmd->act_len) {
+					printk(KERN_NOTICE "%s: "
+					    "bad residual %d of %d, ignoring\n",
+					    sc->name, len, cmd->len);
+					sc->bad_resid = 1;
+				}
+			}
+		}
+
+		switch (bcs->Status) {
+		case US_BULK_STAT_OK:
+			break;
+		case US_BULK_STAT_FAIL:
+			ub_state_sense(sc, cmd);
+			return;
+		case US_BULK_STAT_PHASE:
+			goto Bad_End;
+		default:
+			printk(KERN_INFO "%s: unknown CSW status 0x%x\n",
+			    sc->name, bcs->Status);
+			ub_state_done(sc, cmd, -EINVAL);
+			return;
+		}
+
+		/* Not zeroing error to preserve a babble indicator */
+		if (cmd->error != 0) {
+			ub_state_sense(sc, cmd);
+			return;
+		}
+		cmd->state = UB_CMDST_DONE;
+		ub_cmdq_pop(sc);
+		(*cmd->done)(sc, cmd);
+
+	} else if (cmd->state == UB_CMDST_SENSE) {
+		ub_state_done(sc, cmd, -EIO);
+
+	} else {
+		printk(KERN_WARNING "%s: wrong command state %d\n",
+		    sc->name, cmd->state);
+		ub_state_done(sc, cmd, -EINVAL);
+		return;
+	}
+	return;
+
+Bad_End: /* Little Excel is dead */
+	ub_state_done(sc, cmd, -EIO);
+}
+
+/*
+ * Factorization helper for the command state machine:
+ * Initiate a data segment transfer.
+ */
+static void ub_data_start(struct ub_dev *sc, struct ub_scsi_cmd *cmd)
+{
+	struct scatterlist *sg = &cmd->sgv[cmd->current_sg];
+	int pipe;
+	int rc;
+
+	UB_INIT_COMPLETION(sc->work_done);
+
+	if (cmd->dir == UB_DIR_READ)
+		pipe = sc->recv_bulk_pipe;
+	else
+		pipe = sc->send_bulk_pipe;
+	sc->last_pipe = pipe;
+	usb_fill_bulk_urb(&sc->work_urb, sc->dev, pipe, sg_virt(sg),
+	    sg->length, ub_urb_complete, sc);
+
+	if ((rc = usb_submit_urb(&sc->work_urb, GFP_ATOMIC)) != 0) {
+		/* XXX Clear stalls */
+		ub_complete(&sc->work_done);
+		ub_state_done(sc, cmd, rc);
+		return;
+	}
+
+	if (cmd->timeo)
+		sc->work_timer.expires = jiffies + cmd->timeo;
+	else
+		sc->work_timer.expires = jiffies + UB_DATA_TIMEOUT;
+	add_timer(&sc->work_timer);
+
+	cmd->state = UB_CMDST_DATA;
+}
+
+/*
+ * Factorization helper for the command state machine:
+ * Finish the command.
+ */
+static void ub_state_done(struct ub_dev *sc, struct ub_scsi_cmd *cmd, int rc)
+{
+
+	cmd->error = rc;
+	cmd->state = UB_CMDST_DONE;
+	ub_cmdq_pop(sc);
+	(*cmd->done)(sc, cmd);
+}
+
+/*
+ * Factorization helper for the command state machine:
+ * Submit a CSW read.
+ */
+static int __ub_state_stat(struct ub_dev *sc, struct ub_scsi_cmd *cmd)
+{
+	int rc;
+
+	UB_INIT_COMPLETION(sc->work_done);
+
+	sc->last_pipe = sc->recv_bulk_pipe;
+	usb_fill_bulk_urb(&sc->work_urb, sc->dev, sc->recv_bulk_pipe,
+	    &sc->work_bcs, US_BULK_CS_WRAP_LEN, ub_urb_complete, sc);
+
+	if ((rc = usb_submit_urb(&sc->work_urb, GFP_ATOMIC)) != 0) {
+		/* XXX Clear stalls */
+		ub_complete(&sc->work_done);
+		ub_state_done(sc, cmd, rc);
+		return -1;
+	}
+
+	if (cmd->timeo)
+		sc->work_timer.expires = jiffies + cmd->timeo;
+	else
+		sc->work_timer.expires = jiffies + UB_STAT_TIMEOUT;
+	add_timer(&sc->work_timer);
+	return 0;
+}
+
+/*
+ * Factorization helper for the command state machine:
+ * Submit a CSW read and go to STAT state.
+ */
+static void ub_state_stat(struct ub_dev *sc, struct ub_scsi_cmd *cmd)
+{
+
+	if (__ub_state_stat(sc, cmd) != 0)
+		return;
+
+	cmd->stat_count = 0;
+	cmd->state = UB_CMDST_STAT;
+}
+
+/*
+ * Factorization helper for the command state machine:
+ * Submit a CSW read and go to STAT state with counter (along [C] path).
+ */
+static void ub_state_stat_counted(struct ub_dev *sc, struct ub_scsi_cmd *cmd)
+{
+
+	if (++cmd->stat_count >= 4) {
+		ub_state_sense(sc, cmd);
+		return;
+	}
+
+	if (__ub_state_stat(sc, cmd) != 0)
+		return;
+
+	cmd->state = UB_CMDST_STAT;
+}
+
+/*
+ * Factorization helper for the command state machine:
+ * Submit a REQUEST SENSE and go to SENSE state.
+ */
+static void ub_state_sense(struct ub_dev *sc, struct ub_scsi_cmd *cmd)
+{
+	struct ub_scsi_cmd *scmd;
+	struct scatterlist *sg;
+	int rc;
+
+	if (cmd->cdb[0] == REQUEST_SENSE) {
+		rc = -EPIPE;
+		goto error;
+	}
+
+	scmd = &sc->top_rqs_cmd;
+	memset(scmd, 0, sizeof(struct ub_scsi_cmd));
+	scmd->cdb[0] = REQUEST_SENSE;
+	scmd->cdb[4] = UB_SENSE_SIZE;
+	scmd->cdb_len = 6;
+	scmd->dir = UB_DIR_READ;
+	scmd->state = UB_CMDST_INIT;
+	scmd->nsg = 1;
+	sg = &scmd->sgv[0];
+	sg_init_table(sg, UB_MAX_REQ_SG);
+	sg_set_page(sg, virt_to_page(sc->top_sense), UB_SENSE_SIZE,
+			(unsigned long)sc->top_sense & (PAGE_SIZE-1));
+	scmd->len = UB_SENSE_SIZE;
+	scmd->lun = cmd->lun;
+	scmd->done = ub_top_sense_done;
+	scmd->back = cmd;
+
+	scmd->tag = sc->tagcnt++;
+
+	cmd->state = UB_CMDST_SENSE;
+
+	ub_cmdq_insert(sc, scmd);
+	return;
+
+error:
+	ub_state_done(sc, cmd, rc);
+}
+
+/*
+ * A helper for the command's state machine:
+ * Submit a stall clear.
+ */
+static int ub_submit_clear_stall(struct ub_dev *sc, struct ub_scsi_cmd *cmd,
+    int stalled_pipe)
+{
+	int endp;
+	struct usb_ctrlrequest *cr;
+	int rc;
+
+	endp = usb_pipeendpoint(stalled_pipe);
+	if (usb_pipein (stalled_pipe))
+		endp |= USB_DIR_IN;
+
+	cr = &sc->work_cr;
+	cr->bRequestType = USB_RECIP_ENDPOINT;
+	cr->bRequest = USB_REQ_CLEAR_FEATURE;
+	cr->wValue = cpu_to_le16(USB_ENDPOINT_HALT);
+	cr->wIndex = cpu_to_le16(endp);
+	cr->wLength = cpu_to_le16(0);
+
+	UB_INIT_COMPLETION(sc->work_done);
+
+	usb_fill_control_urb(&sc->work_urb, sc->dev, sc->send_ctrl_pipe,
+	    (unsigned char*) cr, NULL, 0, ub_urb_complete, sc);
+
+	if ((rc = usb_submit_urb(&sc->work_urb, GFP_ATOMIC)) != 0) {
+		ub_complete(&sc->work_done);
+		return rc;
+	}
+
+	sc->work_timer.expires = jiffies + UB_CTRL_TIMEOUT;
+	add_timer(&sc->work_timer);
+	return 0;
+}
+
+/*
+ */
+static void ub_top_sense_done(struct ub_dev *sc, struct ub_scsi_cmd *scmd)
+{
+	unsigned char *sense = sc->top_sense;
+	struct ub_scsi_cmd *cmd;
+
+	/*
+	 * Find the command which triggered the unit attention or a check,
+	 * save the sense into it, and advance its state machine.
+	 */
+	if ((cmd = ub_cmdq_peek(sc)) == NULL) {
+		printk(KERN_WARNING "%s: sense done while idle\n", sc->name);
+		return;
+	}
+	if (cmd != scmd->back) {
+		printk(KERN_WARNING "%s: "
+		    "sense done for wrong command 0x%x\n",
+		    sc->name, cmd->tag);
+		return;
+	}
+	if (cmd->state != UB_CMDST_SENSE) {
+		printk(KERN_WARNING "%s: sense done with bad cmd state %d\n",
+		    sc->name, cmd->state);
+		return;
+	}
+
+	/*
+	 * Ignoring scmd->act_len, because the buffer was pre-zeroed.
+	 */
+	cmd->key = sense[2] & 0x0F;
+	cmd->asc = sense[12];
+	cmd->ascq = sense[13];
+
+	ub_scsi_urb_compl(sc, cmd);
+}
+
+/*
+ * Reset management
+ */
+
+static void ub_reset_enter(struct ub_dev *sc, int try)
+{
+
+	if (sc->reset) {
+		/* This happens often on multi-LUN devices. */
+		return;
+	}
+	sc->reset = try + 1;
+
+#if 0 /* Not needed because the disconnect waits for us. */
+	unsigned long flags;
+	spin_lock_irqsave(&ub_lock, flags);
+	sc->openc++;
+	spin_unlock_irqrestore(&ub_lock, flags);
+#endif
+
+#if 0 /* We let them stop themselves. */
+	struct ub_lun *lun;
+	list_for_each_entry(lun, &sc->luns, link) {
+		blk_stop_queue(lun->disk->queue);
+	}
+#endif
+
+	schedule_work(&sc->reset_work);
+}
+
+static void ub_reset_task(struct work_struct *work)
+{
+	struct ub_dev *sc = container_of(work, struct ub_dev, reset_work);
+	unsigned long flags;
+	struct ub_lun *lun;
+	int rc;
+
+	if (!sc->reset) {
+		printk(KERN_WARNING "%s: Running reset unrequested\n",
+		    sc->name);
+		return;
+	}
+
+	if (atomic_read(&sc->poison)) {
+		;
+	} else if ((sc->reset & 1) == 0) {
+		ub_sync_reset(sc);
+		msleep(700);	/* usb-storage sleeps 6s (!) */
+		ub_probe_clear_stall(sc, sc->recv_bulk_pipe);
+		ub_probe_clear_stall(sc, sc->send_bulk_pipe);
+	} else if (sc->dev->actconfig->desc.bNumInterfaces != 1) {
+		;
+	} else {
+		rc = usb_lock_device_for_reset(sc->dev, sc->intf);
+		if (rc < 0) {
+			printk(KERN_NOTICE
+			    "%s: usb_lock_device_for_reset failed (%d)\n",
+			    sc->name, rc);
+		} else {
+			rc = usb_reset_device(sc->dev);
+			if (rc < 0) {
+				printk(KERN_NOTICE "%s: "
+				    "usb_lock_device_for_reset failed (%d)\n",
+				    sc->name, rc);
+			}
+			usb_unlock_device(sc->dev);
+		}
+	}
+
+	/*
+	 * In theory, no commands can be running while reset is active,
+	 * so nobody can ask for another reset, and so we do not need any
+	 * queues of resets or anything. We do need a spinlock though,
+	 * to interact with block layer.
+	 */
+	spin_lock_irqsave(sc->lock, flags);
+	sc->reset = 0;
+	tasklet_schedule(&sc->tasklet);
+	list_for_each_entry(lun, &sc->luns, link) {
+		blk_start_queue(lun->disk->queue);
+	}
+	wake_up(&sc->reset_wait);
+	spin_unlock_irqrestore(sc->lock, flags);
+}
+
+/*
+ * XXX Reset brackets are too much hassle to implement, so just stub them
+ * in order to prevent forced unbinding (which deadlocks solid when our
+ * ->disconnect method waits for the reset to complete and this kills keventd).
+ *
+ * XXX Tell Alan to move usb_unlock_device inside of usb_reset_device,
+ * or else the post_reset is invoked, and restats I/O on a locked device.
+ */
+static int ub_pre_reset(struct usb_interface *iface) {
+	return 0;
+}
+
+static int ub_post_reset(struct usb_interface *iface) {
+	return 0;
+}
+
+/*
+ * This is called from a process context.
+ */
+static void ub_revalidate(struct ub_dev *sc, struct ub_lun *lun)
+{
+
+	lun->readonly = 0;	/* XXX Query this from the device */
+
+	lun->capacity.nsec = 0;
+	lun->capacity.bsize = 512;
+	lun->capacity.bshift = 0;
+
+	if (ub_sync_tur(sc, lun) != 0)
+		return;			/* Not ready */
+	lun->changed = 0;
+
+	if (ub_sync_read_cap(sc, lun, &lun->capacity) != 0) {
+		/*
+		 * The retry here means something is wrong, either with the
+		 * device, with the transport, or with our code.
+		 * We keep this because sd.c has retries for capacity.
+		 */
+		if (ub_sync_read_cap(sc, lun, &lun->capacity) != 0) {
+			lun->capacity.nsec = 0;
+			lun->capacity.bsize = 512;
+			lun->capacity.bshift = 0;
+		}
+	}
+}
+
+/*
+ * The open funcion.
+ * This is mostly needed to keep refcounting, but also to support
+ * media checks on removable media drives.
+ */
+static int ub_bd_open(struct block_device *bdev, fmode_t mode)
+{
+	struct ub_lun *lun = bdev->bd_disk->private_data;
+	struct ub_dev *sc = lun->udev;
+	unsigned long flags;
+	int rc;
+
+	spin_lock_irqsave(&ub_lock, flags);
+	if (atomic_read(&sc->poison)) {
+		spin_unlock_irqrestore(&ub_lock, flags);
+		return -ENXIO;
+	}
+	sc->openc++;
+	spin_unlock_irqrestore(&ub_lock, flags);
+
+	if (lun->removable || lun->readonly)
+		check_disk_change(bdev);
+
+	/*
+	 * The sd.c considers ->media_present and ->changed not equivalent,
+	 * under some pretty murky conditions (a failure of READ CAPACITY).
+	 * We may need it one day.
+	 */
+	if (lun->removable && lun->changed && !(mode & FMODE_NDELAY)) {
+		rc = -ENOMEDIUM;
+		goto err_open;
+	}
+
+	if (lun->readonly && (mode & FMODE_WRITE)) {
+		rc = -EROFS;
+		goto err_open;
+	}
+
+	return 0;
+
+err_open:
+	ub_put(sc);
+	return rc;
+}
+
+static int ub_bd_unlocked_open(struct block_device *bdev, fmode_t mode)
+{
+	int ret;
+
+	mutex_lock(&ub_mutex);
+	ret = ub_bd_open(bdev, mode);
+	mutex_unlock(&ub_mutex);
+
+	return ret;
+}
+
+
+/*
+ */
+static int ub_bd_release(struct gendisk *disk, fmode_t mode)
+{
+	struct ub_lun *lun = disk->private_data;
+	struct ub_dev *sc = lun->udev;
+
+	mutex_lock(&ub_mutex);
+	ub_put(sc);
+	mutex_unlock(&ub_mutex);
+
+	return 0;
+}
+
+/*
+ * The ioctl interface.
+ */
+static int ub_bd_ioctl(struct block_device *bdev, fmode_t mode,
+    unsigned int cmd, unsigned long arg)
+{
+	void __user *usermem = (void __user *) arg;
+	int ret;
+
+	mutex_lock(&ub_mutex);
+	ret = scsi_cmd_blk_ioctl(bdev, mode, cmd, usermem);
+	mutex_unlock(&ub_mutex);
+
+	return ret;
+}
+
+/*
+ * This is called by check_disk_change if we reported a media change.
+ * The main onjective here is to discover the features of the media such as
+ * the capacity, read-only status, etc. USB storage generally does not
+ * need to be spun up, but if we needed it, this would be the place.
+ *
+ * This call can sleep.
+ *
+ * The return code is not used.
+ */
+static int ub_bd_revalidate(struct gendisk *disk)
+{
+	struct ub_lun *lun = disk->private_data;
+
+	ub_revalidate(lun->udev, lun);
+
+	/* XXX Support sector size switching like in sr.c */
+	blk_queue_logical_block_size(disk->queue, lun->capacity.bsize);
+	set_capacity(disk, lun->capacity.nsec);
+	// set_disk_ro(sdkp->disk, lun->readonly);
+
+	return 0;
+}
+
+/*
+ * The check is called by the block layer to verify if the media
+ * is still available. It is supposed to be harmless, lightweight and
+ * non-intrusive in case the media was not changed.
+ *
+ * This call can sleep.
+ *
+ * The return code is bool!
+ */
+static unsigned int ub_bd_check_events(struct gendisk *disk,
+				       unsigned int clearing)
+{
+	struct ub_lun *lun = disk->private_data;
+
+	if (!lun->removable)
+		return 0;
+
+	/*
+	 * We clean checks always after every command, so this is not
+	 * as dangerous as it looks. If the TEST_UNIT_READY fails here,
+	 * the device is actually not ready with operator or software
+	 * intervention required. One dangerous item might be a drive which
+	 * spins itself down, and come the time to write dirty pages, this
+	 * will fail, then block layer discards the data. Since we never
+	 * spin drives up, such devices simply cannot be used with ub anyway.
+	 */
+	if (ub_sync_tur(lun->udev, lun) != 0) {
+		lun->changed = 1;
+		return DISK_EVENT_MEDIA_CHANGE;
+	}
+
+	return lun->changed ? DISK_EVENT_MEDIA_CHANGE : 0;
+}
+
+static const struct block_device_operations ub_bd_fops = {
+	.owner		= THIS_MODULE,
+	.open		= ub_bd_unlocked_open,
+	.release	= ub_bd_release,
+	.ioctl		= ub_bd_ioctl,
+	.check_events	= ub_bd_check_events,
+	.revalidate_disk = ub_bd_revalidate,
+};
+
+/*
+ * Common ->done routine for commands executed synchronously.
+ */
+static void ub_probe_done(struct ub_dev *sc, struct ub_scsi_cmd *cmd)
+{
+	struct completion *cop = cmd->back;
+	complete(cop);
+}
+
+/*
+ * Test if the device has a check condition on it, synchronously.
+ */
+static int ub_sync_tur(struct ub_dev *sc, struct ub_lun *lun)
+{
+	struct ub_scsi_cmd *cmd;
+	enum { ALLOC_SIZE = sizeof(struct ub_scsi_cmd) };
+	unsigned long flags;
+	struct completion compl;
+	int rc;
+
+	init_completion(&compl);
+
+	rc = -ENOMEM;
+	if ((cmd = kzalloc(ALLOC_SIZE, GFP_KERNEL)) == NULL)
+		goto err_alloc;
+
+	cmd->cdb[0] = TEST_UNIT_READY;
+	cmd->cdb_len = 6;
+	cmd->dir = UB_DIR_NONE;
+	cmd->state = UB_CMDST_INIT;
+	cmd->lun = lun;			/* This may be NULL, but that's ok */
+	cmd->done = ub_probe_done;
+	cmd->back = &compl;
+
+	spin_lock_irqsave(sc->lock, flags);
+	cmd->tag = sc->tagcnt++;
+
+	rc = ub_submit_scsi(sc, cmd);
+	spin_unlock_irqrestore(sc->lock, flags);
+
+	if (rc != 0)
+		goto err_submit;
+
+	wait_for_completion(&compl);
+
+	rc = cmd->error;
+
+	if (rc == -EIO && cmd->key != 0)	/* Retries for benh's key */
+		rc = cmd->key;
+
+err_submit:
+	kfree(cmd);
+err_alloc:
+	return rc;
+}
+
+/*
+ * Read the SCSI capacity synchronously (for probing).
+ */
+static int ub_sync_read_cap(struct ub_dev *sc, struct ub_lun *lun,
+    struct ub_capacity *ret)
+{
+	struct ub_scsi_cmd *cmd;
+	struct scatterlist *sg;
+	char *p;
+	enum { ALLOC_SIZE = sizeof(struct ub_scsi_cmd) + 8 };
+	unsigned long flags;
+	unsigned int bsize, shift;
+	unsigned long nsec;
+	struct completion compl;
+	int rc;
+
+	init_completion(&compl);
+
+	rc = -ENOMEM;
+	if ((cmd = kzalloc(ALLOC_SIZE, GFP_KERNEL)) == NULL)
+		goto err_alloc;
+	p = (char *)cmd + sizeof(struct ub_scsi_cmd);
+
+	cmd->cdb[0] = 0x25;
+	cmd->cdb_len = 10;
+	cmd->dir = UB_DIR_READ;
+	cmd->state = UB_CMDST_INIT;
+	cmd->nsg = 1;
+	sg = &cmd->sgv[0];
+	sg_init_table(sg, UB_MAX_REQ_SG);
+	sg_set_page(sg, virt_to_page(p), 8, (unsigned long)p & (PAGE_SIZE-1));
+	cmd->len = 8;
+	cmd->lun = lun;
+	cmd->done = ub_probe_done;
+	cmd->back = &compl;
+
+	spin_lock_irqsave(sc->lock, flags);
+	cmd->tag = sc->tagcnt++;
+
+	rc = ub_submit_scsi(sc, cmd);
+	spin_unlock_irqrestore(sc->lock, flags);
+
+	if (rc != 0)
+		goto err_submit;
+
+	wait_for_completion(&compl);
+
+	if (cmd->error != 0) {
+		rc = -EIO;
+		goto err_read;
+	}
+	if (cmd->act_len != 8) {
+		rc = -EIO;
+		goto err_read;
+	}
+
+	/* sd.c special-cases sector size of 0 to mean 512. Needed? Safe? */
+	nsec = be32_to_cpu(*(__be32 *)p) + 1;
+	bsize = be32_to_cpu(*(__be32 *)(p + 4));
+	switch (bsize) {
+	case 512:	shift = 0;	break;
+	case 1024:	shift = 1;	break;
+	case 2048:	shift = 2;	break;
+	case 4096:	shift = 3;	break;
+	default:
+		rc = -EDOM;
+		goto err_inv_bsize;
+	}
+
+	ret->bsize = bsize;
+	ret->bshift = shift;
+	ret->nsec = nsec << shift;
+	rc = 0;
+
+err_inv_bsize:
+err_read:
+err_submit:
+	kfree(cmd);
+err_alloc:
+	return rc;
+}
+
+/*
+ */
+static void ub_probe_urb_complete(struct urb *urb)
+{
+	struct completion *cop = urb->context;
+	complete(cop);
+}
+
+static void ub_probe_timeout(unsigned long arg)
+{
+	struct completion *cop = (struct completion *) arg;
+	complete(cop);
+}
+
+/*
+ * Reset with a Bulk reset.
+ */
+static int ub_sync_reset(struct ub_dev *sc)
+{
+	int ifnum = sc->intf->cur_altsetting->desc.bInterfaceNumber;
+	struct usb_ctrlrequest *cr;
+	struct completion compl;
+	struct timer_list timer;
+	int rc;
+
+	init_completion(&compl);
+
+	cr = &sc->work_cr;
+	cr->bRequestType = USB_TYPE_CLASS | USB_RECIP_INTERFACE;
+	cr->bRequest = US_BULK_RESET_REQUEST;
+	cr->wValue = cpu_to_le16(0);
+	cr->wIndex = cpu_to_le16(ifnum);
+	cr->wLength = cpu_to_le16(0);
+
+	usb_fill_control_urb(&sc->work_urb, sc->dev, sc->send_ctrl_pipe,
+	    (unsigned char*) cr, NULL, 0, ub_probe_urb_complete, &compl);
+
+	if ((rc = usb_submit_urb(&sc->work_urb, GFP_KERNEL)) != 0) {
+		printk(KERN_WARNING
+		     "%s: Unable to submit a bulk reset (%d)\n", sc->name, rc);
+		return rc;
+	}
+
+	init_timer(&timer);
+	timer.function = ub_probe_timeout;
+	timer.data = (unsigned long) &compl;
+	timer.expires = jiffies + UB_CTRL_TIMEOUT;
+	add_timer(&timer);
+
+	wait_for_completion(&compl);
+
+	del_timer_sync(&timer);
+	usb_kill_urb(&sc->work_urb);
+
+	return sc->work_urb.status;
+}
+
+/*
+ * Get number of LUNs by the way of Bulk GetMaxLUN command.
+ */
+static int ub_sync_getmaxlun(struct ub_dev *sc)
+{
+	int ifnum = sc->intf->cur_altsetting->desc.bInterfaceNumber;
+	unsigned char *p;
+	enum { ALLOC_SIZE = 1 };
+	struct usb_ctrlrequest *cr;
+	struct completion compl;
+	struct timer_list timer;
+	int nluns;
+	int rc;
+
+	init_completion(&compl);
+
+	rc = -ENOMEM;
+	if ((p = kmalloc(ALLOC_SIZE, GFP_KERNEL)) == NULL)
+		goto err_alloc;
+	*p = 55;
+
+	cr = &sc->work_cr;
+	cr->bRequestType = USB_DIR_IN | USB_TYPE_CLASS | USB_RECIP_INTERFACE;
+	cr->bRequest = US_BULK_GET_MAX_LUN;
+	cr->wValue = cpu_to_le16(0);
+	cr->wIndex = cpu_to_le16(ifnum);
+	cr->wLength = cpu_to_le16(1);
+
+	usb_fill_control_urb(&sc->work_urb, sc->dev, sc->recv_ctrl_pipe,
+	    (unsigned char*) cr, p, 1, ub_probe_urb_complete, &compl);
+
+	if ((rc = usb_submit_urb(&sc->work_urb, GFP_KERNEL)) != 0)
+		goto err_submit;
+
+	init_timer(&timer);
+	timer.function = ub_probe_timeout;
+	timer.data = (unsigned long) &compl;
+	timer.expires = jiffies + UB_CTRL_TIMEOUT;
+	add_timer(&timer);
+
+	wait_for_completion(&compl);
+
+	del_timer_sync(&timer);
+	usb_kill_urb(&sc->work_urb);
+
+	if ((rc = sc->work_urb.status) < 0)
+		goto err_io;
+
+	if (sc->work_urb.actual_length != 1) {
+		nluns = 0;
+	} else {
+		if ((nluns = *p) == 55) {
+			nluns = 0;
+		} else {
+  			/* GetMaxLUN returns the maximum LUN number */
+			nluns += 1;
+			if (nluns > UB_MAX_LUNS)
+				nluns = UB_MAX_LUNS;
+		}
+	}
+
+	kfree(p);
+	return nluns;
+
+err_io:
+err_submit:
+	kfree(p);
+err_alloc:
+	return rc;
+}
+
+/*
+ * Clear initial stalls.
+ */
+static int ub_probe_clear_stall(struct ub_dev *sc, int stalled_pipe)
+{
+	int endp;
+	struct usb_ctrlrequest *cr;
+	struct completion compl;
+	struct timer_list timer;
+	int rc;
+
+	init_completion(&compl);
+
+	endp = usb_pipeendpoint(stalled_pipe);
+	if (usb_pipein (stalled_pipe))
+		endp |= USB_DIR_IN;
+
+	cr = &sc->work_cr;
+	cr->bRequestType = USB_RECIP_ENDPOINT;
+	cr->bRequest = USB_REQ_CLEAR_FEATURE;
+	cr->wValue = cpu_to_le16(USB_ENDPOINT_HALT);
+	cr->wIndex = cpu_to_le16(endp);
+	cr->wLength = cpu_to_le16(0);
+
+	usb_fill_control_urb(&sc->work_urb, sc->dev, sc->send_ctrl_pipe,
+	    (unsigned char*) cr, NULL, 0, ub_probe_urb_complete, &compl);
+
+	if ((rc = usb_submit_urb(&sc->work_urb, GFP_KERNEL)) != 0) {
+		printk(KERN_WARNING
+		     "%s: Unable to submit a probe clear (%d)\n", sc->name, rc);
+		return rc;
+	}
+
+	init_timer(&timer);
+	timer.function = ub_probe_timeout;
+	timer.data = (unsigned long) &compl;
+	timer.expires = jiffies + UB_CTRL_TIMEOUT;
+	add_timer(&timer);
+
+	wait_for_completion(&compl);
+
+	del_timer_sync(&timer);
+	usb_kill_urb(&sc->work_urb);
+
+	usb_reset_endpoint(sc->dev, endp);
+
+	return 0;
+}
+
+/*
+ * Get the pipe settings.
+ */
+static int ub_get_pipes(struct ub_dev *sc, struct usb_device *dev,
+    struct usb_interface *intf)
+{
+	struct usb_host_interface *altsetting = intf->cur_altsetting;
+	struct usb_endpoint_descriptor *ep_in = NULL;
+	struct usb_endpoint_descriptor *ep_out = NULL;
+	struct usb_endpoint_descriptor *ep;
+	int i;
+
+	/*
+	 * Find the endpoints we need.
+	 * We are expecting a minimum of 2 endpoints - in and out (bulk).
+	 * We will ignore any others.
+	 */
+	for (i = 0; i < altsetting->desc.bNumEndpoints; i++) {
+		ep = &altsetting->endpoint[i].desc;
+
+		/* Is it a BULK endpoint? */
+		if (usb_endpoint_xfer_bulk(ep)) {
+			/* BULK in or out? */
+			if (usb_endpoint_dir_in(ep)) {
+				if (ep_in == NULL)
+					ep_in = ep;
+			} else {
+				if (ep_out == NULL)
+					ep_out = ep;
+			}
+		}
+	}
+
+	if (ep_in == NULL || ep_out == NULL) {
+		printk(KERN_NOTICE "%s: failed endpoint check\n", sc->name);
+		return -ENODEV;
+	}
+
+	/* Calculate and store the pipe values */
+	sc->send_ctrl_pipe = usb_sndctrlpipe(dev, 0);
+	sc->recv_ctrl_pipe = usb_rcvctrlpipe(dev, 0);
+	sc->send_bulk_pipe = usb_sndbulkpipe(dev,
+		usb_endpoint_num(ep_out));
+	sc->recv_bulk_pipe = usb_rcvbulkpipe(dev, 
+		usb_endpoint_num(ep_in));
+
+	return 0;
+}
+
+/*
+ * Probing is done in the process context, which allows us to cheat
+ * and not to build a state machine for the discovery.
+ */
+static int ub_probe(struct usb_interface *intf,
+    const struct usb_device_id *dev_id)
+{
+	struct ub_dev *sc;
+	int nluns;
+	int rc;
+	int i;
+
+	if (usb_usual_check_type(dev_id, USB_US_TYPE_UB))
+		return -ENXIO;
+
+	rc = -ENOMEM;
+	if ((sc = kzalloc(sizeof(struct ub_dev), GFP_KERNEL)) == NULL)
+		goto err_core;
+	sc->lock = ub_next_lock();
+	INIT_LIST_HEAD(&sc->luns);
+	usb_init_urb(&sc->work_urb);
+	tasklet_init(&sc->tasklet, ub_scsi_action, (unsigned long)sc);
+	atomic_set(&sc->poison, 0);
+	INIT_WORK(&sc->reset_work, ub_reset_task);
+	init_waitqueue_head(&sc->reset_wait);
+
+	init_timer(&sc->work_timer);
+	sc->work_timer.data = (unsigned long) sc;
+	sc->work_timer.function = ub_urb_timeout;
+
+	ub_init_completion(&sc->work_done);
+	sc->work_done.done = 1;		/* A little yuk, but oh well... */
+
+	sc->dev = interface_to_usbdev(intf);
+	sc->intf = intf;
+	// sc->ifnum = intf->cur_altsetting->desc.bInterfaceNumber;
+	usb_set_intfdata(intf, sc);
+	usb_get_dev(sc->dev);
+	/*
+	 * Since we give the interface struct to the block level through
+	 * disk->driverfs_dev, we have to pin it. Otherwise, block_uevent
+	 * oopses on close after a disconnect (kernels 2.6.16 and up).
+	 */
+	usb_get_intf(sc->intf);
+
+	snprintf(sc->name, 12, DRV_NAME "(%d.%d)",
+	    sc->dev->bus->busnum, sc->dev->devnum);
+
+	/* XXX Verify that we can handle the device (from descriptors) */
+
+	if (ub_get_pipes(sc, sc->dev, intf) != 0)
+		goto err_dev_desc;
+
+	/*
+	 * At this point, all USB initialization is done, do upper layer.
+	 * We really hate halfway initialized structures, so from the
+	 * invariants perspective, this ub_dev is fully constructed at
+	 * this point.
+	 */
+
+	/*
+	 * This is needed to clear toggles. It is a problem only if we do
+	 * `rmmod ub && modprobe ub` without disconnects, but we like that.
+	 */
+#if 0 /* iPod Mini fails if we do this (big white iPod works) */
+	ub_probe_clear_stall(sc, sc->recv_bulk_pipe);
+	ub_probe_clear_stall(sc, sc->send_bulk_pipe);
+#endif
+
+	/*
+	 * The way this is used by the startup code is a little specific.
+	 * A SCSI check causes a USB stall. Our common case code sees it
+	 * and clears the check, after which the device is ready for use.
+	 * But if a check was not present, any command other than
+	 * TEST_UNIT_READY ends with a lockup (including REQUEST_SENSE).
+	 *
+	 * If we neglect to clear the SCSI check, the first real command fails
+	 * (which is the capacity readout). We clear that and retry, but why
+	 * causing spurious retries for no reason.
+	 *
+	 * Revalidation may start with its own TEST_UNIT_READY, but that one
+	 * has to succeed, so we clear checks with an additional one here.
+	 * In any case it's not our business how revaliadation is implemented.
+	 */
+	for (i = 0; i < 3; i++) {  /* Retries for the schwag key from KS'04 */
+		if ((rc = ub_sync_tur(sc, NULL)) <= 0) break;
+		if (rc != 0x6) break;
+		msleep(10);
+	}
+
+	nluns = 1;
+	for (i = 0; i < 3; i++) {
+		if ((rc = ub_sync_getmaxlun(sc)) < 0)
+			break;
+		if (rc != 0) {
+			nluns = rc;
+			break;
+		}
+		msleep(100);
+	}
+
+	for (i = 0; i < nluns; i++) {
+		ub_probe_lun(sc, i);
+	}
+	return 0;
+
+err_dev_desc:
+	usb_set_intfdata(intf, NULL);
+	usb_put_intf(sc->intf);
+	usb_put_dev(sc->dev);
+	kfree(sc);
+err_core:
+	return rc;
+}
+
+static int ub_probe_lun(struct ub_dev *sc, int lnum)
+{
+	struct ub_lun *lun;
+	struct request_queue *q;
+	struct gendisk *disk;
+	int rc;
+
+	rc = -ENOMEM;
+	if ((lun = kzalloc(sizeof(struct ub_lun), GFP_KERNEL)) == NULL)
+		goto err_alloc;
+	lun->num = lnum;
+
+	rc = -ENOSR;
+	if ((lun->id = ub_id_get()) == -1)
+		goto err_id;
+
+	lun->udev = sc;
+
+	snprintf(lun->name, 16, DRV_NAME "%c(%d.%d.%d)",
+	    lun->id + 'a', sc->dev->bus->busnum, sc->dev->devnum, lun->num);
+
+	lun->removable = 1;		/* XXX Query this from the device */
+	lun->changed = 1;		/* ub_revalidate clears only */
+	ub_revalidate(sc, lun);
+
+	rc = -ENOMEM;
+	if ((disk = alloc_disk(UB_PARTS_PER_LUN)) == NULL)
+		goto err_diskalloc;
+
+	sprintf(disk->disk_name, DRV_NAME "%c", lun->id + 'a');
+	disk->major = UB_MAJOR;
+	disk->first_minor = lun->id * UB_PARTS_PER_LUN;
+	disk->fops = &ub_bd_fops;
+	disk->private_data = lun;
+	disk->driverfs_dev = &sc->intf->dev;
+
+	rc = -ENOMEM;
+	if ((q = blk_init_queue(ub_request_fn, sc->lock)) == NULL)
+		goto err_blkqinit;
+
+	disk->queue = q;
+
+	blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH);
+	blk_queue_max_segments(q, UB_MAX_REQ_SG);
+	blk_queue_segment_boundary(q, 0xffffffff);	/* Dubious. */
+	blk_queue_max_hw_sectors(q, UB_MAX_SECTORS);
+	blk_queue_logical_block_size(q, lun->capacity.bsize);
+
+	lun->disk = disk;
+	q->queuedata = lun;
+	list_add(&lun->link, &sc->luns);
+
+	set_capacity(disk, lun->capacity.nsec);
+	if (lun->removable)
+		disk->flags |= GENHD_FL_REMOVABLE;
+
+	add_disk(disk);
+
+	return 0;
+
+err_blkqinit:
+	put_disk(disk);
+err_diskalloc:
+	ub_id_put(lun->id);
+err_id:
+	kfree(lun);
+err_alloc:
+	return rc;
+}
+
+static void ub_disconnect(struct usb_interface *intf)
+{
+	struct ub_dev *sc = usb_get_intfdata(intf);
+	struct ub_lun *lun;
+	unsigned long flags;
+
+	/*
+	 * Prevent ub_bd_release from pulling the rug from under us.
+	 * XXX This is starting to look like a kref.
+	 * XXX Why not to take this ref at probe time?
+	 */
+	spin_lock_irqsave(&ub_lock, flags);
+	sc->openc++;
+	spin_unlock_irqrestore(&ub_lock, flags);
+
+	/*
+	 * Fence stall clearings, operations triggered by unlinkings and so on.
+	 * We do not attempt to unlink any URBs, because we do not trust the
+	 * unlink paths in HC drivers. Also, we get -84 upon disconnect anyway.
+	 */
+	atomic_set(&sc->poison, 1);
+
+	/*
+	 * Wait for reset to end, if any.
+	 */
+	wait_event(sc->reset_wait, !sc->reset);
+
+	/*
+	 * Blow away queued commands.
+	 *
+	 * Actually, this never works, because before we get here
+	 * the HCD terminates outstanding URB(s). It causes our
+	 * SCSI command queue to advance, commands fail to submit,
+	 * and the whole queue drains. So, we just use this code to
+	 * print warnings.
+	 */
+	spin_lock_irqsave(sc->lock, flags);
+	{
+		struct ub_scsi_cmd *cmd;
+		int cnt = 0;
+		while ((cmd = ub_cmdq_peek(sc)) != NULL) {
+			cmd->error = -ENOTCONN;
+			cmd->state = UB_CMDST_DONE;
+			ub_cmdq_pop(sc);
+			(*cmd->done)(sc, cmd);
+			cnt++;
+		}
+		if (cnt != 0) {
+			printk(KERN_WARNING "%s: "
+			    "%d was queued after shutdown\n", sc->name, cnt);
+		}
+	}
+	spin_unlock_irqrestore(sc->lock, flags);
+
+	/*
+	 * Unregister the upper layer.
+	 */
+	list_for_each_entry(lun, &sc->luns, link) {
+		del_gendisk(lun->disk);
+		/*
+		 * I wish I could do:
+		 *    queue_flag_set(QUEUE_FLAG_DEAD, q);
+		 * As it is, we rely on our internal poisoning and let
+		 * the upper levels to spin furiously failing all the I/O.
+		 */
+	}
+
+	/*
+	 * Testing for -EINPROGRESS is always a bug, so we are bending
+	 * the rules a little.
+	 */
+	spin_lock_irqsave(sc->lock, flags);
+	if (sc->work_urb.status == -EINPROGRESS) {	/* janitors: ignore */
+		printk(KERN_WARNING "%s: "
+		    "URB is active after disconnect\n", sc->name);
+	}
+	spin_unlock_irqrestore(sc->lock, flags);
+
+	/*
+	 * There is virtually no chance that other CPU runs a timeout so long
+	 * after ub_urb_complete should have called del_timer, but only if HCD
+	 * didn't forget to deliver a callback on unlink.
+	 */
+	del_timer_sync(&sc->work_timer);
+
+	/*
+	 * At this point there must be no commands coming from anyone
+	 * and no URBs left in transit.
+	 */
+
+	ub_put(sc);
+}
+
+static struct usb_driver ub_driver = {
+	.name =		"ub",
+	.probe =	ub_probe,
+	.disconnect =	ub_disconnect,
+	.id_table =	ub_usb_ids,
+	.pre_reset =	ub_pre_reset,
+	.post_reset =	ub_post_reset,
+};
+
+static int __init ub_init(void)
+{
+	int rc;
+	int i;
+
+	for (i = 0; i < UB_QLOCK_NUM; i++)
+		spin_lock_init(&ub_qlockv[i]);
+
+	if ((rc = register_blkdev(UB_MAJOR, DRV_NAME)) != 0)
+		goto err_regblkdev;
+
+	if ((rc = usb_register(&ub_driver)) != 0)
+		goto err_register;
+
+	usb_usual_set_present(USB_US_TYPE_UB);
+	return 0;
+
+err_register:
+	unregister_blkdev(UB_MAJOR, DRV_NAME);
+err_regblkdev:
+	return rc;
+}
+
+static void __exit ub_exit(void)
+{
+	usb_deregister(&ub_driver);
+
+	unregister_blkdev(UB_MAJOR, DRV_NAME);
+	usb_usual_clear_present(USB_US_TYPE_UB);
+}
+
+module_init(ub_init);
+module_exit(ub_exit);
+
+MODULE_LICENSE("GPL");
diff --git a/drivers/block/umem.c b/drivers/block/umem.c
new file mode 100644
index 00000000..031ca720
--- /dev/null
+++ b/drivers/block/umem.c
@@ -0,0 +1,1123 @@
+/*
+ * mm.c - Micro Memory(tm) PCI memory board block device driver - v2.3
+ *
+ * (C) 2001 San Mehat <nettwerk@valinux.com>
+ * (C) 2001 Johannes Erdfelt <jerdfelt@valinux.com>
+ * (C) 2001 NeilBrown <neilb@cse.unsw.edu.au>
+ *
+ * This driver for the Micro Memory PCI Memory Module with Battery Backup
+ * is Copyright Micro Memory Inc 2001-2002.  All rights reserved.
+ *
+ * This driver is released to the public under the terms of the
+ *  GNU GENERAL PUBLIC LICENSE version 2
+ * See the file COPYING for details.
+ *
+ * This driver provides a standard block device interface for Micro Memory(tm)
+ * PCI based RAM boards.
+ * 10/05/01: Phap Nguyen - Rebuilt the driver
+ * 10/22/01: Phap Nguyen - v2.1 Added disk partitioning
+ * 29oct2001:NeilBrown   - Use make_request_fn instead of request_fn
+ *                       - use stand disk partitioning (so fdisk works).
+ * 08nov2001:NeilBrown	 - change driver name from "mm" to "umem"
+ *			 - incorporate into main kernel
+ * 08apr2002:NeilBrown   - Move some of interrupt handle to tasklet
+ *			 - use spin_lock_bh instead of _irq
+ *			 - Never block on make_request.  queue
+ *			   bh's instead.
+ *			 - unregister umem from devfs at mod unload
+ *			 - Change version to 2.3
+ * 07Nov2001:Phap Nguyen - Select pci read command: 06, 12, 15 (Decimal)
+ * 07Jan2002: P. Nguyen  - Used PCI Memory Write & Invalidate for DMA
+ * 15May2002:NeilBrown   - convert to bio for 2.5
+ * 17May2002:NeilBrown   - remove init_mem initialisation.  Instead detect
+ *			 - a sequence of writes that cover the card, and
+ *			 - set initialised bit then.
+ */
+
+#undef DEBUG	/* #define DEBUG if you want debugging info (pr_debug) */
+#include <linux/fs.h>
+#include <linux/bio.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/mman.h>
+#include <linux/gfp.h>
+#include <linux/ioctl.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/timer.h>
+#include <linux/pci.h>
+#include <linux/dma-mapping.h>
+
+#include <linux/fcntl.h>        /* O_ACCMODE */
+#include <linux/hdreg.h>  /* HDIO_GETGEO */
+
+#include "umem.h"
+
+#include <asm/uaccess.h>
+#include <asm/io.h>
+
+#define MM_MAXCARDS 4
+#define MM_RAHEAD 2      /* two sectors */
+#define MM_BLKSIZE 1024  /* 1k blocks */
+#define MM_HARDSECT 512  /* 512-byte hardware sectors */
+#define MM_SHIFT 6       /* max 64 partitions on 4 cards  */
+
+/*
+ * Version Information
+ */
+
+#define DRIVER_NAME	"umem"
+#define DRIVER_VERSION	"v2.3"
+#define DRIVER_AUTHOR	"San Mehat, Johannes Erdfelt, NeilBrown"
+#define DRIVER_DESC	"Micro Memory(tm) PCI memory board block driver"
+
+static int debug;
+/* #define HW_TRACE(x)     writeb(x,cards[0].csr_remap + MEMCTRLSTATUS_MAGIC) */
+#define HW_TRACE(x)
+
+#define DEBUG_LED_ON_TRANSFER	0x01
+#define DEBUG_BATTERY_POLLING	0x02
+
+module_param(debug, int, 0644);
+MODULE_PARM_DESC(debug, "Debug bitmask");
+
+static int pci_read_cmd = 0x0C;		/* Read Multiple */
+module_param(pci_read_cmd, int, 0);
+MODULE_PARM_DESC(pci_read_cmd, "PCI read command");
+
+static int pci_write_cmd = 0x0F;	/* Write and Invalidate */
+module_param(pci_write_cmd, int, 0);
+MODULE_PARM_DESC(pci_write_cmd, "PCI write command");
+
+static int pci_cmds;
+
+static int major_nr;
+
+#include <linux/blkdev.h>
+#include <linux/blkpg.h>
+
+struct cardinfo {
+	struct pci_dev	*dev;
+
+	unsigned char	__iomem *csr_remap;
+	unsigned int	mm_size;  /* size in kbytes */
+
+	unsigned int	init_size; /* initial segment, in sectors,
+				    * that we know to
+				    * have been written
+				    */
+	struct bio	*bio, *currentbio, **biotail;
+	int		current_idx;
+	sector_t	current_sector;
+
+	struct request_queue *queue;
+
+	struct mm_page {
+		dma_addr_t		page_dma;
+		struct mm_dma_desc	*desc;
+		int	 		cnt, headcnt;
+		struct bio		*bio, **biotail;
+		int			idx;
+	} mm_pages[2];
+#define DESC_PER_PAGE ((PAGE_SIZE*2)/sizeof(struct mm_dma_desc))
+
+	int  Active, Ready;
+
+	struct tasklet_struct	tasklet;
+	unsigned int dma_status;
+
+	struct {
+		int		good;
+		int		warned;
+		unsigned long	last_change;
+	} battery[2];
+
+	spinlock_t 	lock;
+	int		check_batteries;
+
+	int		flags;
+};
+
+static struct cardinfo cards[MM_MAXCARDS];
+static struct timer_list battery_timer;
+
+static int num_cards;
+
+static struct gendisk *mm_gendisk[MM_MAXCARDS];
+
+static void check_batteries(struct cardinfo *card);
+
+static int get_userbit(struct cardinfo *card, int bit)
+{
+	unsigned char led;
+
+	led = readb(card->csr_remap + MEMCTRLCMD_LEDCTRL);
+	return led & bit;
+}
+
+static int set_userbit(struct cardinfo *card, int bit, unsigned char state)
+{
+	unsigned char led;
+
+	led = readb(card->csr_remap + MEMCTRLCMD_LEDCTRL);
+	if (state)
+		led |= bit;
+	else
+		led &= ~bit;
+	writeb(led, card->csr_remap + MEMCTRLCMD_LEDCTRL);
+
+	return 0;
+}
+
+/*
+ * NOTE: For the power LED, use the LED_POWER_* macros since they differ
+ */
+static void set_led(struct cardinfo *card, int shift, unsigned char state)
+{
+	unsigned char led;
+
+	led = readb(card->csr_remap + MEMCTRLCMD_LEDCTRL);
+	if (state == LED_FLIP)
+		led ^= (1<<shift);
+	else {
+		led &= ~(0x03 << shift);
+		led |= (state << shift);
+	}
+	writeb(led, card->csr_remap + MEMCTRLCMD_LEDCTRL);
+
+}
+
+#ifdef MM_DIAG
+static void dump_regs(struct cardinfo *card)
+{
+	unsigned char *p;
+	int i, i1;
+
+	p = card->csr_remap;
+	for (i = 0; i < 8; i++) {
+		printk(KERN_DEBUG "%p   ", p);
+
+		for (i1 = 0; i1 < 16; i1++)
+			printk("%02x ", *p++);
+
+		printk("\n");
+	}
+}
+#endif
+
+static void dump_dmastat(struct cardinfo *card, unsigned int dmastat)
+{
+	dev_printk(KERN_DEBUG, &card->dev->dev, "DMAstat - ");
+	if (dmastat & DMASCR_ANY_ERR)
+		printk(KERN_CONT "ANY_ERR ");
+	if (dmastat & DMASCR_MBE_ERR)
+		printk(KERN_CONT "MBE_ERR ");
+	if (dmastat & DMASCR_PARITY_ERR_REP)
+		printk(KERN_CONT "PARITY_ERR_REP ");
+	if (dmastat & DMASCR_PARITY_ERR_DET)
+		printk(KERN_CONT "PARITY_ERR_DET ");
+	if (dmastat & DMASCR_SYSTEM_ERR_SIG)
+		printk(KERN_CONT "SYSTEM_ERR_SIG ");
+	if (dmastat & DMASCR_TARGET_ABT)
+		printk(KERN_CONT "TARGET_ABT ");
+	if (dmastat & DMASCR_MASTER_ABT)
+		printk(KERN_CONT "MASTER_ABT ");
+	if (dmastat & DMASCR_CHAIN_COMPLETE)
+		printk(KERN_CONT "CHAIN_COMPLETE ");
+	if (dmastat & DMASCR_DMA_COMPLETE)
+		printk(KERN_CONT "DMA_COMPLETE ");
+	printk("\n");
+}
+
+/*
+ * Theory of request handling
+ *
+ * Each bio is assigned to one mm_dma_desc - which may not be enough FIXME
+ * We have two pages of mm_dma_desc, holding about 64 descriptors
+ * each.  These are allocated at init time.
+ * One page is "Ready" and is either full, or can have request added.
+ * The other page might be "Active", which DMA is happening on it.
+ *
+ * Whenever IO on the active page completes, the Ready page is activated
+ * and the ex-Active page is clean out and made Ready.
+ * Otherwise the Ready page is only activated when it becomes full.
+ *
+ * If a request arrives while both pages a full, it is queued, and b_rdev is
+ * overloaded to record whether it was a read or a write.
+ *
+ * The interrupt handler only polls the device to clear the interrupt.
+ * The processing of the result is done in a tasklet.
+ */
+
+static void mm_start_io(struct cardinfo *card)
+{
+	/* we have the lock, we know there is
+	 * no IO active, and we know that card->Active
+	 * is set
+	 */
+	struct mm_dma_desc *desc;
+	struct mm_page *page;
+	int offset;
+
+	/* make the last descriptor end the chain */
+	page = &card->mm_pages[card->Active];
+	pr_debug("start_io: %d %d->%d\n",
+		card->Active, page->headcnt, page->cnt - 1);
+	desc = &page->desc[page->cnt-1];
+
+	desc->control_bits |= cpu_to_le32(DMASCR_CHAIN_COMP_EN);
+	desc->control_bits &= ~cpu_to_le32(DMASCR_CHAIN_EN);
+	desc->sem_control_bits = desc->control_bits;
+
+
+	if (debug & DEBUG_LED_ON_TRANSFER)
+		set_led(card, LED_REMOVE, LED_ON);
+
+	desc = &page->desc[page->headcnt];
+	writel(0, card->csr_remap + DMA_PCI_ADDR);
+	writel(0, card->csr_remap + DMA_PCI_ADDR + 4);
+
+	writel(0, card->csr_remap + DMA_LOCAL_ADDR);
+	writel(0, card->csr_remap + DMA_LOCAL_ADDR + 4);
+
+	writel(0, card->csr_remap + DMA_TRANSFER_SIZE);
+	writel(0, card->csr_remap + DMA_TRANSFER_SIZE + 4);
+
+	writel(0, card->csr_remap + DMA_SEMAPHORE_ADDR);
+	writel(0, card->csr_remap + DMA_SEMAPHORE_ADDR + 4);
+
+	offset = ((char *)desc) - ((char *)page->desc);
+	writel(cpu_to_le32((page->page_dma+offset) & 0xffffffff),
+	       card->csr_remap + DMA_DESCRIPTOR_ADDR);
+	/* Force the value to u64 before shifting otherwise >> 32 is undefined C
+	 * and on some ports will do nothing ! */
+	writel(cpu_to_le32(((u64)page->page_dma)>>32),
+	       card->csr_remap + DMA_DESCRIPTOR_ADDR + 4);
+
+	/* Go, go, go */
+	writel(cpu_to_le32(DMASCR_GO | DMASCR_CHAIN_EN | pci_cmds),
+	       card->csr_remap + DMA_STATUS_CTRL);
+}
+
+static int add_bio(struct cardinfo *card);
+
+static void activate(struct cardinfo *card)
+{
+	/* if No page is Active, and Ready is
+	 * not empty, then switch Ready page
+	 * to active and start IO.
+	 * Then add any bh's that are available to Ready
+	 */
+
+	do {
+		while (add_bio(card))
+			;
+
+		if (card->Active == -1 &&
+		    card->mm_pages[card->Ready].cnt > 0) {
+			card->Active = card->Ready;
+			card->Ready = 1-card->Ready;
+			mm_start_io(card);
+		}
+
+	} while (card->Active == -1 && add_bio(card));
+}
+
+static inline void reset_page(struct mm_page *page)
+{
+	page->cnt = 0;
+	page->headcnt = 0;
+	page->bio = NULL;
+	page->biotail = &page->bio;
+}
+
+/*
+ * If there is room on Ready page, take
+ * one bh off list and add it.
+ * return 1 if there was room, else 0.
+ */
+static int add_bio(struct cardinfo *card)
+{
+	struct mm_page *p;
+	struct mm_dma_desc *desc;
+	dma_addr_t dma_handle;
+	int offset;
+	struct bio *bio;
+	struct bio_vec *vec;
+	int idx;
+	int rw;
+	int len;
+
+	bio = card->currentbio;
+	if (!bio && card->bio) {
+		card->currentbio = card->bio;
+		card->current_idx = card->bio->bi_idx;
+		card->current_sector = card->bio->bi_sector;
+		card->bio = card->bio->bi_next;
+		if (card->bio == NULL)
+			card->biotail = &card->bio;
+		card->currentbio->bi_next = NULL;
+		return 1;
+	}
+	if (!bio)
+		return 0;
+	idx = card->current_idx;
+
+	rw = bio_rw(bio);
+	if (card->mm_pages[card->Ready].cnt >= DESC_PER_PAGE)
+		return 0;
+
+	vec = bio_iovec_idx(bio, idx);
+	len = vec->bv_len;
+	dma_handle = pci_map_page(card->dev,
+				  vec->bv_page,
+				  vec->bv_offset,
+				  len,
+				  (rw == READ) ?
+				  PCI_DMA_FROMDEVICE : PCI_DMA_TODEVICE);
+
+	p = &card->mm_pages[card->Ready];
+	desc = &p->desc[p->cnt];
+	p->cnt++;
+	if (p->bio == NULL)
+		p->idx = idx;
+	if ((p->biotail) != &bio->bi_next) {
+		*(p->biotail) = bio;
+		p->biotail = &(bio->bi_next);
+		bio->bi_next = NULL;
+	}
+
+	desc->data_dma_handle = dma_handle;
+
+	desc->pci_addr = cpu_to_le64((u64)desc->data_dma_handle);
+	desc->local_addr = cpu_to_le64(card->current_sector << 9);
+	desc->transfer_size = cpu_to_le32(len);
+	offset = (((char *)&desc->sem_control_bits) - ((char *)p->desc));
+	desc->sem_addr = cpu_to_le64((u64)(p->page_dma+offset));
+	desc->zero1 = desc->zero2 = 0;
+	offset = (((char *)(desc+1)) - ((char *)p->desc));
+	desc->next_desc_addr = cpu_to_le64(p->page_dma+offset);
+	desc->control_bits = cpu_to_le32(DMASCR_GO|DMASCR_ERR_INT_EN|
+					 DMASCR_PARITY_INT_EN|
+					 DMASCR_CHAIN_EN |
+					 DMASCR_SEM_EN |
+					 pci_cmds);
+	if (rw == WRITE)
+		desc->control_bits |= cpu_to_le32(DMASCR_TRANSFER_READ);
+	desc->sem_control_bits = desc->control_bits;
+
+	card->current_sector += (len >> 9);
+	idx++;
+	card->current_idx = idx;
+	if (idx >= bio->bi_vcnt)
+		card->currentbio = NULL;
+
+	return 1;
+}
+
+static void process_page(unsigned long data)
+{
+	/* check if any of the requests in the page are DMA_COMPLETE,
+	 * and deal with them appropriately.
+	 * If we find a descriptor without DMA_COMPLETE in the semaphore, then
+	 * dma must have hit an error on that descriptor, so use dma_status
+	 * instead and assume that all following descriptors must be re-tried.
+	 */
+	struct mm_page *page;
+	struct bio *return_bio = NULL;
+	struct cardinfo *card = (struct cardinfo *)data;
+	unsigned int dma_status = card->dma_status;
+
+	spin_lock_bh(&card->lock);
+	if (card->Active < 0)
+		goto out_unlock;
+	page = &card->mm_pages[card->Active];
+
+	while (page->headcnt < page->cnt) {
+		struct bio *bio = page->bio;
+		struct mm_dma_desc *desc = &page->desc[page->headcnt];
+		int control = le32_to_cpu(desc->sem_control_bits);
+		int last = 0;
+		int idx;
+
+		if (!(control & DMASCR_DMA_COMPLETE)) {
+			control = dma_status;
+			last = 1;
+		}
+		page->headcnt++;
+		idx = page->idx;
+		page->idx++;
+		if (page->idx >= bio->bi_vcnt) {
+			page->bio = bio->bi_next;
+			if (page->bio)
+				page->idx = page->bio->bi_idx;
+		}
+
+		pci_unmap_page(card->dev, desc->data_dma_handle,
+			       bio_iovec_idx(bio, idx)->bv_len,
+				 (control & DMASCR_TRANSFER_READ) ?
+				PCI_DMA_TODEVICE : PCI_DMA_FROMDEVICE);
+		if (control & DMASCR_HARD_ERROR) {
+			/* error */
+			clear_bit(BIO_UPTODATE, &bio->bi_flags);
+			dev_printk(KERN_WARNING, &card->dev->dev,
+				"I/O error on sector %d/%d\n",
+				le32_to_cpu(desc->local_addr)>>9,
+				le32_to_cpu(desc->transfer_size));
+			dump_dmastat(card, control);
+		} else if ((bio->bi_rw & REQ_WRITE) &&
+			   le32_to_cpu(desc->local_addr) >> 9 ==
+				card->init_size) {
+			card->init_size += le32_to_cpu(desc->transfer_size) >> 9;
+			if (card->init_size >> 1 >= card->mm_size) {
+				dev_printk(KERN_INFO, &card->dev->dev,
+					"memory now initialised\n");
+				set_userbit(card, MEMORY_INITIALIZED, 1);
+			}
+		}
+		if (bio != page->bio) {
+			bio->bi_next = return_bio;
+			return_bio = bio;
+		}
+
+		if (last)
+			break;
+	}
+
+	if (debug & DEBUG_LED_ON_TRANSFER)
+		set_led(card, LED_REMOVE, LED_OFF);
+
+	if (card->check_batteries) {
+		card->check_batteries = 0;
+		check_batteries(card);
+	}
+	if (page->headcnt >= page->cnt) {
+		reset_page(page);
+		card->Active = -1;
+		activate(card);
+	} else {
+		/* haven't finished with this one yet */
+		pr_debug("do some more\n");
+		mm_start_io(card);
+	}
+ out_unlock:
+	spin_unlock_bh(&card->lock);
+
+	while (return_bio) {
+		struct bio *bio = return_bio;
+
+		return_bio = bio->bi_next;
+		bio->bi_next = NULL;
+		bio_endio(bio, 0);
+	}
+}
+
+static int mm_make_request(struct request_queue *q, struct bio *bio)
+{
+	struct cardinfo *card = q->queuedata;
+	pr_debug("mm_make_request %llu %u\n",
+		 (unsigned long long)bio->bi_sector, bio->bi_size);
+
+	spin_lock_irq(&card->lock);
+	*card->biotail = bio;
+	bio->bi_next = NULL;
+	card->biotail = &bio->bi_next;
+	spin_unlock_irq(&card->lock);
+
+	return 0;
+}
+
+static irqreturn_t mm_interrupt(int irq, void *__card)
+{
+	struct cardinfo *card = (struct cardinfo *) __card;
+	unsigned int dma_status;
+	unsigned short cfg_status;
+
+HW_TRACE(0x30);
+
+	dma_status = le32_to_cpu(readl(card->csr_remap + DMA_STATUS_CTRL));
+
+	if (!(dma_status & (DMASCR_ERROR_MASK | DMASCR_CHAIN_COMPLETE))) {
+		/* interrupt wasn't for me ... */
+		return IRQ_NONE;
+	}
+
+	/* clear COMPLETION interrupts */
+	if (card->flags & UM_FLAG_NO_BYTE_STATUS)
+		writel(cpu_to_le32(DMASCR_DMA_COMPLETE|DMASCR_CHAIN_COMPLETE),
+		       card->csr_remap + DMA_STATUS_CTRL);
+	else
+		writeb((DMASCR_DMA_COMPLETE|DMASCR_CHAIN_COMPLETE) >> 16,
+		       card->csr_remap + DMA_STATUS_CTRL + 2);
+
+	/* log errors and clear interrupt status */
+	if (dma_status & DMASCR_ANY_ERR) {
+		unsigned int	data_log1, data_log2;
+		unsigned int	addr_log1, addr_log2;
+		unsigned char	stat, count, syndrome, check;
+
+		stat = readb(card->csr_remap + MEMCTRLCMD_ERRSTATUS);
+
+		data_log1 = le32_to_cpu(readl(card->csr_remap +
+						ERROR_DATA_LOG));
+		data_log2 = le32_to_cpu(readl(card->csr_remap +
+						ERROR_DATA_LOG + 4));
+		addr_log1 = le32_to_cpu(readl(card->csr_remap +
+						ERROR_ADDR_LOG));
+		addr_log2 = readb(card->csr_remap + ERROR_ADDR_LOG + 4);
+
+		count = readb(card->csr_remap + ERROR_COUNT);
+		syndrome = readb(card->csr_remap + ERROR_SYNDROME);
+		check = readb(card->csr_remap + ERROR_CHECK);
+
+		dump_dmastat(card, dma_status);
+
+		if (stat & 0x01)
+			dev_printk(KERN_ERR, &card->dev->dev,
+				"Memory access error detected (err count %d)\n",
+				count);
+		if (stat & 0x02)
+			dev_printk(KERN_ERR, &card->dev->dev,
+				"Multi-bit EDC error\n");
+
+		dev_printk(KERN_ERR, &card->dev->dev,
+			"Fault Address 0x%02x%08x, Fault Data 0x%08x%08x\n",
+			addr_log2, addr_log1, data_log2, data_log1);
+		dev_printk(KERN_ERR, &card->dev->dev,
+			"Fault Check 0x%02x, Fault Syndrome 0x%02x\n",
+			check, syndrome);
+
+		writeb(0, card->csr_remap + ERROR_COUNT);
+	}
+
+	if (dma_status & DMASCR_PARITY_ERR_REP) {
+		dev_printk(KERN_ERR, &card->dev->dev,
+			"PARITY ERROR REPORTED\n");
+		pci_read_config_word(card->dev, PCI_STATUS, &cfg_status);
+		pci_write_config_word(card->dev, PCI_STATUS, cfg_status);
+	}
+
+	if (dma_status & DMASCR_PARITY_ERR_DET) {
+		dev_printk(KERN_ERR, &card->dev->dev,
+			"PARITY ERROR DETECTED\n");
+		pci_read_config_word(card->dev, PCI_STATUS, &cfg_status);
+		pci_write_config_word(card->dev, PCI_STATUS, cfg_status);
+	}
+
+	if (dma_status & DMASCR_SYSTEM_ERR_SIG) {
+		dev_printk(KERN_ERR, &card->dev->dev, "SYSTEM ERROR\n");
+		pci_read_config_word(card->dev, PCI_STATUS, &cfg_status);
+		pci_write_config_word(card->dev, PCI_STATUS, cfg_status);
+	}
+
+	if (dma_status & DMASCR_TARGET_ABT) {
+		dev_printk(KERN_ERR, &card->dev->dev, "TARGET ABORT\n");
+		pci_read_config_word(card->dev, PCI_STATUS, &cfg_status);
+		pci_write_config_word(card->dev, PCI_STATUS, cfg_status);
+	}
+
+	if (dma_status & DMASCR_MASTER_ABT) {
+		dev_printk(KERN_ERR, &card->dev->dev, "MASTER ABORT\n");
+		pci_read_config_word(card->dev, PCI_STATUS, &cfg_status);
+		pci_write_config_word(card->dev, PCI_STATUS, cfg_status);
+	}
+
+	/* and process the DMA descriptors */
+	card->dma_status = dma_status;
+	tasklet_schedule(&card->tasklet);
+
+HW_TRACE(0x36);
+
+	return IRQ_HANDLED;
+}
+
+/*
+ * If both batteries are good, no LED
+ * If either battery has been warned, solid LED
+ * If both batteries are bad, flash the LED quickly
+ * If either battery is bad, flash the LED semi quickly
+ */
+static void set_fault_to_battery_status(struct cardinfo *card)
+{
+	if (card->battery[0].good && card->battery[1].good)
+		set_led(card, LED_FAULT, LED_OFF);
+	else if (card->battery[0].warned || card->battery[1].warned)
+		set_led(card, LED_FAULT, LED_ON);
+	else if (!card->battery[0].good && !card->battery[1].good)
+		set_led(card, LED_FAULT, LED_FLASH_7_0);
+	else
+		set_led(card, LED_FAULT, LED_FLASH_3_5);
+}
+
+static void init_battery_timer(void);
+
+static int check_battery(struct cardinfo *card, int battery, int status)
+{
+	if (status != card->battery[battery].good) {
+		card->battery[battery].good = !card->battery[battery].good;
+		card->battery[battery].last_change = jiffies;
+
+		if (card->battery[battery].good) {
+			dev_printk(KERN_ERR, &card->dev->dev,
+				"Battery %d now good\n", battery + 1);
+			card->battery[battery].warned = 0;
+		} else
+			dev_printk(KERN_ERR, &card->dev->dev,
+				"Battery %d now FAILED\n", battery + 1);
+
+		return 1;
+	} else if (!card->battery[battery].good &&
+		   !card->battery[battery].warned &&
+		   time_after_eq(jiffies, card->battery[battery].last_change +
+				 (HZ * 60 * 60 * 5))) {
+		dev_printk(KERN_ERR, &card->dev->dev,
+			"Battery %d still FAILED after 5 hours\n", battery + 1);
+		card->battery[battery].warned = 1;
+
+		return 1;
+	}
+
+	return 0;
+}
+
+static void check_batteries(struct cardinfo *card)
+{
+	/* NOTE: this must *never* be called while the card
+	 * is doing (bus-to-card) DMA, or you will need the
+	 * reset switch
+	 */
+	unsigned char status;
+	int ret1, ret2;
+
+	status = readb(card->csr_remap + MEMCTRLSTATUS_BATTERY);
+	if (debug & DEBUG_BATTERY_POLLING)
+		dev_printk(KERN_DEBUG, &card->dev->dev,
+			"checking battery status, 1 = %s, 2 = %s\n",
+		       (status & BATTERY_1_FAILURE) ? "FAILURE" : "OK",
+		       (status & BATTERY_2_FAILURE) ? "FAILURE" : "OK");
+
+	ret1 = check_battery(card, 0, !(status & BATTERY_1_FAILURE));
+	ret2 = check_battery(card, 1, !(status & BATTERY_2_FAILURE));
+
+	if (ret1 || ret2)
+		set_fault_to_battery_status(card);
+}
+
+static void check_all_batteries(unsigned long ptr)
+{
+	int i;
+
+	for (i = 0; i < num_cards; i++)
+		if (!(cards[i].flags & UM_FLAG_NO_BATT)) {
+			struct cardinfo *card = &cards[i];
+			spin_lock_bh(&card->lock);
+			if (card->Active >= 0)
+				card->check_batteries = 1;
+			else
+				check_batteries(card);
+			spin_unlock_bh(&card->lock);
+		}
+
+	init_battery_timer();
+}
+
+static void init_battery_timer(void)
+{
+	init_timer(&battery_timer);
+	battery_timer.function = check_all_batteries;
+	battery_timer.expires = jiffies + (HZ * 60);
+	add_timer(&battery_timer);
+}
+
+static void del_battery_timer(void)
+{
+	del_timer(&battery_timer);
+}
+
+/*
+ * Note no locks taken out here.  In a worst case scenario, we could drop
+ * a chunk of system memory.  But that should never happen, since validation
+ * happens at open or mount time, when locks are held.
+ *
+ *	That's crap, since doing that while some partitions are opened
+ * or mounted will give you really nasty results.
+ */
+static int mm_revalidate(struct gendisk *disk)
+{
+	struct cardinfo *card = disk->private_data;
+	set_capacity(disk, card->mm_size << 1);
+	return 0;
+}
+
+static int mm_getgeo(struct block_device *bdev, struct hd_geometry *geo)
+{
+	struct cardinfo *card = bdev->bd_disk->private_data;
+	int size = card->mm_size * (1024 / MM_HARDSECT);
+
+	/*
+	 * get geometry: we have to fake one...  trim the size to a
+	 * multiple of 2048 (1M): tell we have 32 sectors, 64 heads,
+	 * whatever cylinders.
+	 */
+	geo->heads     = 64;
+	geo->sectors   = 32;
+	geo->cylinders = size / (geo->heads * geo->sectors);
+	return 0;
+}
+
+static const struct block_device_operations mm_fops = {
+	.owner		= THIS_MODULE,
+	.getgeo		= mm_getgeo,
+	.revalidate_disk = mm_revalidate,
+};
+
+static int __devinit mm_pci_probe(struct pci_dev *dev,
+				const struct pci_device_id *id)
+{
+	int ret = -ENODEV;
+	struct cardinfo *card = &cards[num_cards];
+	unsigned char	mem_present;
+	unsigned char	batt_status;
+	unsigned int	saved_bar, data;
+	unsigned long	csr_base;
+	unsigned long	csr_len;
+	int		magic_number;
+	static int	printed_version;
+
+	if (!printed_version++)
+		printk(KERN_INFO DRIVER_VERSION " : " DRIVER_DESC "\n");
+
+	ret = pci_enable_device(dev);
+	if (ret)
+		return ret;
+
+	pci_write_config_byte(dev, PCI_LATENCY_TIMER, 0xF8);
+	pci_set_master(dev);
+
+	card->dev         = dev;
+
+	csr_base = pci_resource_start(dev, 0);
+	csr_len  = pci_resource_len(dev, 0);
+	if (!csr_base || !csr_len)
+		return -ENODEV;
+
+	dev_printk(KERN_INFO, &dev->dev,
+	  "Micro Memory(tm) controller found (PCI Mem Module (Battery Backup))\n");
+
+	if (pci_set_dma_mask(dev, DMA_BIT_MASK(64)) &&
+	    pci_set_dma_mask(dev, DMA_BIT_MASK(32))) {
+		dev_printk(KERN_WARNING, &dev->dev, "NO suitable DMA found\n");
+		return  -ENOMEM;
+	}
+
+	ret = pci_request_regions(dev, DRIVER_NAME);
+	if (ret) {
+		dev_printk(KERN_ERR, &card->dev->dev,
+			"Unable to request memory region\n");
+		goto failed_req_csr;
+	}
+
+	card->csr_remap = ioremap_nocache(csr_base, csr_len);
+	if (!card->csr_remap) {
+		dev_printk(KERN_ERR, &card->dev->dev,
+			"Unable to remap memory region\n");
+		ret = -ENOMEM;
+
+		goto failed_remap_csr;
+	}
+
+	dev_printk(KERN_INFO, &card->dev->dev,
+		"CSR 0x%08lx -> 0x%p (0x%lx)\n",
+	       csr_base, card->csr_remap, csr_len);
+
+	switch (card->dev->device) {
+	case 0x5415:
+		card->flags |= UM_FLAG_NO_BYTE_STATUS | UM_FLAG_NO_BATTREG;
+		magic_number = 0x59;
+		break;
+
+	case 0x5425:
+		card->flags |= UM_FLAG_NO_BYTE_STATUS;
+		magic_number = 0x5C;
+		break;
+
+	case 0x6155:
+		card->flags |= UM_FLAG_NO_BYTE_STATUS |
+				UM_FLAG_NO_BATTREG | UM_FLAG_NO_BATT;
+		magic_number = 0x99;
+		break;
+
+	default:
+		magic_number = 0x100;
+		break;
+	}
+
+	if (readb(card->csr_remap + MEMCTRLSTATUS_MAGIC) != magic_number) {
+		dev_printk(KERN_ERR, &card->dev->dev, "Magic number invalid\n");
+		ret = -ENOMEM;
+		goto failed_magic;
+	}
+
+	card->mm_pages[0].desc = pci_alloc_consistent(card->dev,
+						PAGE_SIZE * 2,
+						&card->mm_pages[0].page_dma);
+	card->mm_pages[1].desc = pci_alloc_consistent(card->dev,
+						PAGE_SIZE * 2,
+						&card->mm_pages[1].page_dma);
+	if (card->mm_pages[0].desc == NULL ||
+	    card->mm_pages[1].desc == NULL) {
+		dev_printk(KERN_ERR, &card->dev->dev, "alloc failed\n");
+		goto failed_alloc;
+	}
+	reset_page(&card->mm_pages[0]);
+	reset_page(&card->mm_pages[1]);
+	card->Ready = 0;	/* page 0 is ready */
+	card->Active = -1;	/* no page is active */
+	card->bio = NULL;
+	card->biotail = &card->bio;
+
+	card->queue = blk_alloc_queue(GFP_KERNEL);
+	if (!card->queue)
+		goto failed_alloc;
+
+	blk_queue_make_request(card->queue, mm_make_request);
+	card->queue->queue_lock = &card->lock;
+	card->queue->queuedata = card;
+
+	tasklet_init(&card->tasklet, process_page, (unsigned long)card);
+
+	card->check_batteries = 0;
+
+	mem_present = readb(card->csr_remap + MEMCTRLSTATUS_MEMORY);
+	switch (mem_present) {
+	case MEM_128_MB:
+		card->mm_size = 1024 * 128;
+		break;
+	case MEM_256_MB:
+		card->mm_size = 1024 * 256;
+		break;
+	case MEM_512_MB:
+		card->mm_size = 1024 * 512;
+		break;
+	case MEM_1_GB:
+		card->mm_size = 1024 * 1024;
+		break;
+	case MEM_2_GB:
+		card->mm_size = 1024 * 2048;
+		break;
+	default:
+		card->mm_size = 0;
+		break;
+	}
+
+	/* Clear the LED's we control */
+	set_led(card, LED_REMOVE, LED_OFF);
+	set_led(card, LED_FAULT, LED_OFF);
+
+	batt_status = readb(card->csr_remap + MEMCTRLSTATUS_BATTERY);
+
+	card->battery[0].good = !(batt_status & BATTERY_1_FAILURE);
+	card->battery[1].good = !(batt_status & BATTERY_2_FAILURE);
+	card->battery[0].last_change = card->battery[1].last_change = jiffies;
+
+	if (card->flags & UM_FLAG_NO_BATT)
+		dev_printk(KERN_INFO, &card->dev->dev,
+			"Size %d KB\n", card->mm_size);
+	else {
+		dev_printk(KERN_INFO, &card->dev->dev,
+			"Size %d KB, Battery 1 %s (%s), Battery 2 %s (%s)\n",
+		       card->mm_size,
+		       batt_status & BATTERY_1_DISABLED ? "Disabled" : "Enabled",
+		       card->battery[0].good ? "OK" : "FAILURE",
+		       batt_status & BATTERY_2_DISABLED ? "Disabled" : "Enabled",
+		       card->battery[1].good ? "OK" : "FAILURE");
+
+		set_fault_to_battery_status(card);
+	}
+
+	pci_read_config_dword(dev, PCI_BASE_ADDRESS_1, &saved_bar);
+	data = 0xffffffff;
+	pci_write_config_dword(dev, PCI_BASE_ADDRESS_1, data);
+	pci_read_config_dword(dev, PCI_BASE_ADDRESS_1, &data);
+	pci_write_config_dword(dev, PCI_BASE_ADDRESS_1, saved_bar);
+	data &= 0xfffffff0;
+	data = ~data;
+	data += 1;
+
+	if (request_irq(dev->irq, mm_interrupt, IRQF_SHARED, DRIVER_NAME,
+			card)) {
+		dev_printk(KERN_ERR, &card->dev->dev,
+			"Unable to allocate IRQ\n");
+		ret = -ENODEV;
+		goto failed_req_irq;
+	}
+
+	dev_printk(KERN_INFO, &card->dev->dev,
+		"Window size %d bytes, IRQ %d\n", data, dev->irq);
+
+	spin_lock_init(&card->lock);
+
+	pci_set_drvdata(dev, card);
+
+	if (pci_write_cmd != 0x0F) 	/* If not Memory Write & Invalidate */
+		pci_write_cmd = 0x07;	/* then Memory Write command */
+
+	if (pci_write_cmd & 0x08) { /* use Memory Write and Invalidate */
+		unsigned short cfg_command;
+		pci_read_config_word(dev, PCI_COMMAND, &cfg_command);
+		cfg_command |= 0x10; /* Memory Write & Invalidate Enable */
+		pci_write_config_word(dev, PCI_COMMAND, cfg_command);
+	}
+	pci_cmds = (pci_read_cmd << 28) | (pci_write_cmd << 24);
+
+	num_cards++;
+
+	if (!get_userbit(card, MEMORY_INITIALIZED)) {
+		dev_printk(KERN_INFO, &card->dev->dev,
+		  "memory NOT initialized. Consider over-writing whole device.\n");
+		card->init_size = 0;
+	} else {
+		dev_printk(KERN_INFO, &card->dev->dev,
+			"memory already initialized\n");
+		card->init_size = card->mm_size;
+	}
+
+	/* Enable ECC */
+	writeb(EDC_STORE_CORRECT, card->csr_remap + MEMCTRLCMD_ERRCTRL);
+
+	return 0;
+
+ failed_req_irq:
+ failed_alloc:
+	if (card->mm_pages[0].desc)
+		pci_free_consistent(card->dev, PAGE_SIZE*2,
+				    card->mm_pages[0].desc,
+				    card->mm_pages[0].page_dma);
+	if (card->mm_pages[1].desc)
+		pci_free_consistent(card->dev, PAGE_SIZE*2,
+				    card->mm_pages[1].desc,
+				    card->mm_pages[1].page_dma);
+ failed_magic:
+	iounmap(card->csr_remap);
+ failed_remap_csr:
+	pci_release_regions(dev);
+ failed_req_csr:
+
+	return ret;
+}
+
+static void mm_pci_remove(struct pci_dev *dev)
+{
+	struct cardinfo *card = pci_get_drvdata(dev);
+
+	tasklet_kill(&card->tasklet);
+	free_irq(dev->irq, card);
+	iounmap(card->csr_remap);
+
+	if (card->mm_pages[0].desc)
+		pci_free_consistent(card->dev, PAGE_SIZE*2,
+				    card->mm_pages[0].desc,
+				    card->mm_pages[0].page_dma);
+	if (card->mm_pages[1].desc)
+		pci_free_consistent(card->dev, PAGE_SIZE*2,
+				    card->mm_pages[1].desc,
+				    card->mm_pages[1].page_dma);
+	blk_cleanup_queue(card->queue);
+
+	pci_release_regions(dev);
+	pci_disable_device(dev);
+}
+
+static const struct pci_device_id mm_pci_ids[] = {
+    {PCI_DEVICE(PCI_VENDOR_ID_MICRO_MEMORY, PCI_DEVICE_ID_MICRO_MEMORY_5415CN)},
+    {PCI_DEVICE(PCI_VENDOR_ID_MICRO_MEMORY, PCI_DEVICE_ID_MICRO_MEMORY_5425CN)},
+    {PCI_DEVICE(PCI_VENDOR_ID_MICRO_MEMORY, PCI_DEVICE_ID_MICRO_MEMORY_6155)},
+    {
+	.vendor	=	0x8086,
+	.device	=	0xB555,
+	.subvendor =	0x1332,
+	.subdevice =	0x5460,
+	.class =	0x050000,
+	.class_mask =	0,
+    }, { /* end: all zeroes */ }
+};
+
+MODULE_DEVICE_TABLE(pci, mm_pci_ids);
+
+static struct pci_driver mm_pci_driver = {
+	.name		= DRIVER_NAME,
+	.id_table	= mm_pci_ids,
+	.probe		= mm_pci_probe,
+	.remove		= mm_pci_remove,
+};
+
+static int __init mm_init(void)
+{
+	int retval, i;
+	int err;
+
+	retval = pci_register_driver(&mm_pci_driver);
+	if (retval)
+		return -ENOMEM;
+
+	err = major_nr = register_blkdev(0, DRIVER_NAME);
+	if (err < 0) {
+		pci_unregister_driver(&mm_pci_driver);
+		return -EIO;
+	}
+
+	for (i = 0; i < num_cards; i++) {
+		mm_gendisk[i] = alloc_disk(1 << MM_SHIFT);
+		if (!mm_gendisk[i])
+			goto out;
+	}
+
+	for (i = 0; i < num_cards; i++) {
+		struct gendisk *disk = mm_gendisk[i];
+		sprintf(disk->disk_name, "umem%c", 'a'+i);
+		spin_lock_init(&cards[i].lock);
+		disk->major = major_nr;
+		disk->first_minor  = i << MM_SHIFT;
+		disk->fops = &mm_fops;
+		disk->private_data = &cards[i];
+		disk->queue = cards[i].queue;
+		set_capacity(disk, cards[i].mm_size << 1);
+		add_disk(disk);
+	}
+
+	init_battery_timer();
+	printk(KERN_INFO "MM: desc_per_page = %ld\n", DESC_PER_PAGE);
+/* printk("mm_init: Done. 10-19-01 9:00\n"); */
+	return 0;
+
+out:
+	pci_unregister_driver(&mm_pci_driver);
+	unregister_blkdev(major_nr, DRIVER_NAME);
+	while (i--)
+		put_disk(mm_gendisk[i]);
+	return -ENOMEM;
+}
+
+static void __exit mm_cleanup(void)
+{
+	int i;
+
+	del_battery_timer();
+
+	for (i = 0; i < num_cards ; i++) {
+		del_gendisk(mm_gendisk[i]);
+		put_disk(mm_gendisk[i]);
+	}
+
+	pci_unregister_driver(&mm_pci_driver);
+
+	unregister_blkdev(major_nr, DRIVER_NAME);
+}
+
+module_init(mm_init);
+module_exit(mm_cleanup);
+
+MODULE_AUTHOR(DRIVER_AUTHOR);
+MODULE_DESCRIPTION(DRIVER_DESC);
+MODULE_LICENSE("GPL");
diff --git a/drivers/block/umem.h b/drivers/block/umem.h
new file mode 100644
index 00000000..375c6897
--- /dev/null
+++ b/drivers/block/umem.h
@@ -0,0 +1,133 @@
+
+/*
+ * This file contains defines for the
+ *   Micro Memory MM5415
+ * family PCI Memory Module with Battery Backup.
+ *
+ * Copyright Micro Memory INC 2001.  All rights reserved.
+ * Release under the terms of the GNU GENERAL PUBLIC LICENSE version 2.
+ * See the file COPYING.
+ */
+
+#ifndef _DRIVERS_BLOCK_MM_H
+#define _DRIVERS_BLOCK_MM_H
+
+
+#define IRQ_TIMEOUT (1 * HZ)
+
+/* CSR register definition */
+#define MEMCTRLSTATUS_MAGIC	0x00
+#define  MM_MAGIC_VALUE		(unsigned char)0x59
+
+#define MEMCTRLSTATUS_BATTERY	0x04
+#define  BATTERY_1_DISABLED	0x01
+#define  BATTERY_1_FAILURE	0x02
+#define  BATTERY_2_DISABLED	0x04
+#define  BATTERY_2_FAILURE	0x08
+
+#define MEMCTRLSTATUS_MEMORY	0x07
+#define  MEM_128_MB		0xfe
+#define  MEM_256_MB		0xfc
+#define  MEM_512_MB		0xf8
+#define  MEM_1_GB		0xf0
+#define  MEM_2_GB		0xe0
+
+#define MEMCTRLCMD_LEDCTRL	0x08
+#define  LED_REMOVE		2
+#define  LED_FAULT		4
+#define  LED_POWER		6
+#define	 LED_FLIP		255
+#define  LED_OFF		0x00
+#define  LED_ON			0x01
+#define  LED_FLASH_3_5		0x02
+#define  LED_FLASH_7_0		0x03
+#define  LED_POWER_ON		0x00
+#define  LED_POWER_OFF		0x01
+#define  USER_BIT1		0x01
+#define  USER_BIT2		0x02
+
+#define MEMORY_INITIALIZED	USER_BIT1
+
+#define MEMCTRLCMD_ERRCTRL	0x0C
+#define  EDC_NONE_DEFAULT	0x00
+#define  EDC_NONE		0x01
+#define  EDC_STORE_READ		0x02
+#define  EDC_STORE_CORRECT	0x03
+
+#define MEMCTRLCMD_ERRCNT	0x0D
+#define MEMCTRLCMD_ERRSTATUS	0x0E
+
+#define ERROR_DATA_LOG		0x20
+#define ERROR_ADDR_LOG		0x28
+#define ERROR_COUNT		0x3D
+#define ERROR_SYNDROME		0x3E
+#define ERROR_CHECK		0x3F
+
+#define DMA_PCI_ADDR		0x40
+#define DMA_LOCAL_ADDR		0x48
+#define DMA_TRANSFER_SIZE	0x50
+#define DMA_DESCRIPTOR_ADDR	0x58
+#define DMA_SEMAPHORE_ADDR	0x60
+#define DMA_STATUS_CTRL		0x68
+#define  DMASCR_GO		0x00001
+#define  DMASCR_TRANSFER_READ	0x00002
+#define  DMASCR_CHAIN_EN	0x00004
+#define  DMASCR_SEM_EN		0x00010
+#define  DMASCR_DMA_COMP_EN	0x00020
+#define  DMASCR_CHAIN_COMP_EN	0x00040
+#define  DMASCR_ERR_INT_EN	0x00080
+#define  DMASCR_PARITY_INT_EN	0x00100
+#define  DMASCR_ANY_ERR		0x00800
+#define  DMASCR_MBE_ERR		0x01000
+#define  DMASCR_PARITY_ERR_REP	0x02000
+#define  DMASCR_PARITY_ERR_DET	0x04000
+#define  DMASCR_SYSTEM_ERR_SIG	0x08000
+#define  DMASCR_TARGET_ABT	0x10000
+#define  DMASCR_MASTER_ABT	0x20000
+#define  DMASCR_DMA_COMPLETE	0x40000
+#define  DMASCR_CHAIN_COMPLETE	0x80000
+
+/*
+3.SOME PCs HAVE HOST BRIDGES WHICH APPARENTLY DO NOT CORRECTLY HANDLE
+READ-LINE (0xE) OR READ-MULTIPLE (0xC) PCI COMMAND CODES DURING DMA
+TRANSFERS. IN OTHER SYSTEMS THESE COMMAND CODES WILL CAUSE THE HOST BRIDGE
+TO ALLOW LONGER BURSTS DURING DMA READ OPERATIONS. THE UPPER FOUR BITS
+(31..28) OF THE DMA CSR HAVE BEEN MADE PROGRAMMABLE, SO THAT EITHER A 0x6,
+AN 0xE OR A 0xC CAN BE WRITTEN TO THEM TO SET THE COMMAND CODE USED DURING
+DMA READ OPERATIONS.
+*/
+#define        DMASCR_READ   0x60000000
+#define        DMASCR_READLINE   0xE0000000
+#define        DMASCR_READMULTI   0xC0000000
+
+
+#define DMASCR_ERROR_MASK	(DMASCR_MASTER_ABT | DMASCR_TARGET_ABT | DMASCR_SYSTEM_ERR_SIG | DMASCR_PARITY_ERR_DET | DMASCR_MBE_ERR | DMASCR_ANY_ERR)
+#define DMASCR_HARD_ERROR	(DMASCR_MASTER_ABT | DMASCR_TARGET_ABT | DMASCR_SYSTEM_ERR_SIG | DMASCR_PARITY_ERR_DET | DMASCR_MBE_ERR)
+
+#define WINDOWMAP_WINNUM	0x7B
+
+#define DMA_READ_FROM_HOST 0
+#define DMA_WRITE_TO_HOST 1
+
+struct mm_dma_desc {
+	__le64	pci_addr;
+	__le64	local_addr;
+	__le32	transfer_size;
+	u32	zero1;
+	__le64	next_desc_addr;
+	__le64	sem_addr;
+	__le32	control_bits;
+	u32	zero2;
+
+	dma_addr_t data_dma_handle;
+
+	/* Copy of the bits */
+	__le64	sem_control_bits;
+} __attribute__((aligned(8)));
+
+/* bits for card->flags */
+#define UM_FLAG_DMA_IN_REGS		1
+#define UM_FLAG_NO_BYTE_STATUS		2
+#define UM_FLAG_NO_BATTREG		4
+#define	UM_FLAG_NO_BATT			8
+#endif
diff --git a/drivers/block/viodasd.c b/drivers/block/viodasd.c
new file mode 100644
index 00000000..9a5b2a2d
--- /dev/null
+++ b/drivers/block/viodasd.c
@@ -0,0 +1,809 @@
+/* -*- linux-c -*-
+ * viodasd.c
+ *  Authors: Dave Boutcher <boutcher@us.ibm.com>
+ *           Ryan Arnold <ryanarn@us.ibm.com>
+ *           Colin Devilbiss <devilbis@us.ibm.com>
+ *           Stephen Rothwell
+ *
+ * (C) Copyright 2000-2004 IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * This routine provides access to disk space (termed "DASD" in historical
+ * IBM terms) owned and managed by an OS/400 partition running on the
+ * same box as this Linux partition.
+ *
+ * All disk operations are performed by sending messages back and forth to
+ * the OS/400 partition.
+ */
+
+#define pr_fmt(fmt) "viod: " fmt
+
+#include <linux/major.h>
+#include <linux/fs.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/blkdev.h>
+#include <linux/genhd.h>
+#include <linux/hdreg.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/string.h>
+#include <linux/mutex.h>
+#include <linux/dma-mapping.h>
+#include <linux/completion.h>
+#include <linux/device.h>
+#include <linux/scatterlist.h>
+
+#include <asm/uaccess.h>
+#include <asm/vio.h>
+#include <asm/iseries/hv_types.h>
+#include <asm/iseries/hv_lp_event.h>
+#include <asm/iseries/hv_lp_config.h>
+#include <asm/iseries/vio.h>
+#include <asm/firmware.h>
+
+MODULE_DESCRIPTION("iSeries Virtual DASD");
+MODULE_AUTHOR("Dave Boutcher");
+MODULE_LICENSE("GPL");
+
+/*
+ * We only support 7 partitions per physical disk....so with minor
+ * numbers 0-255 we get a maximum of 32 disks.
+ */
+#define VIOD_GENHD_NAME		"iseries/vd"
+
+#define VIOD_VERS		"1.64"
+
+enum {
+	PARTITION_SHIFT = 3,
+	MAX_DISKNO = HVMAXARCHITECTEDVIRTUALDISKS,
+	MAX_DISK_NAME = FIELD_SIZEOF(struct gendisk, disk_name)
+};
+
+static DEFINE_MUTEX(viodasd_mutex);
+static DEFINE_SPINLOCK(viodasd_spinlock);
+
+#define VIOMAXREQ		16
+
+#define DEVICE_NO(cell)	((struct viodasd_device *)(cell) - &viodasd_devices[0])
+
+struct viodasd_waitevent {
+	struct completion	com;
+	int			rc;
+	u16			sub_result;
+	int			max_disk;	/* open */
+};
+
+static const struct vio_error_entry viodasd_err_table[] = {
+	{ 0x0201, EINVAL, "Invalid Range" },
+	{ 0x0202, EINVAL, "Invalid Token" },
+	{ 0x0203, EIO, "DMA Error" },
+	{ 0x0204, EIO, "Use Error" },
+	{ 0x0205, EIO, "Release Error" },
+	{ 0x0206, EINVAL, "Invalid Disk" },
+	{ 0x0207, EBUSY, "Can't Lock" },
+	{ 0x0208, EIO, "Already Locked" },
+	{ 0x0209, EIO, "Already Unlocked" },
+	{ 0x020A, EIO, "Invalid Arg" },
+	{ 0x020B, EIO, "Bad IFS File" },
+	{ 0x020C, EROFS, "Read Only Device" },
+	{ 0x02FF, EIO, "Internal Error" },
+	{ 0x0000, 0, NULL },
+};
+
+/*
+ * Figure out the biggest I/O request (in sectors) we can accept
+ */
+#define VIODASD_MAXSECTORS (4096 / 512 * VIOMAXBLOCKDMA)
+
+/*
+ * Number of disk I/O requests we've sent to OS/400
+ */
+static int num_req_outstanding;
+
+/*
+ * This is our internal structure for keeping track of disk devices
+ */
+struct viodasd_device {
+	u16		cylinders;
+	u16		tracks;
+	u16		sectors;
+	u16		bytes_per_sector;
+	u64		size;
+	int		read_only;
+	spinlock_t	q_lock;
+	struct gendisk	*disk;
+	struct device	*dev;
+} viodasd_devices[MAX_DISKNO];
+
+/*
+ * External open entry point.
+ */
+static int viodasd_open(struct block_device *bdev, fmode_t mode)
+{
+	struct viodasd_device *d = bdev->bd_disk->private_data;
+	HvLpEvent_Rc hvrc;
+	struct viodasd_waitevent we;
+	u16 flags = 0;
+
+	if (d->read_only) {
+		if (mode & FMODE_WRITE)
+			return -EROFS;
+		flags = vioblockflags_ro;
+	}
+
+	init_completion(&we.com);
+
+	/* Send the open event to OS/400 */
+	hvrc = HvCallEvent_signalLpEventFast(viopath_hostLp,
+			HvLpEvent_Type_VirtualIo,
+			viomajorsubtype_blockio | vioblockopen,
+			HvLpEvent_AckInd_DoAck, HvLpEvent_AckType_ImmediateAck,
+			viopath_sourceinst(viopath_hostLp),
+			viopath_targetinst(viopath_hostLp),
+			(u64)(unsigned long)&we, VIOVERSION << 16,
+			((u64)DEVICE_NO(d) << 48) | ((u64)flags << 32),
+			0, 0, 0);
+	if (hvrc != 0) {
+		pr_warning("HV open failed %d\n", (int)hvrc);
+		return -EIO;
+	}
+
+	wait_for_completion(&we.com);
+
+	/* Check the return code */
+	if (we.rc != 0) {
+		const struct vio_error_entry *err =
+			vio_lookup_rc(viodasd_err_table, we.sub_result);
+
+		pr_warning("bad rc opening disk: %d:0x%04x (%s)\n",
+			   (int)we.rc, we.sub_result, err->msg);
+		return -EIO;
+	}
+
+	return 0;
+}
+
+static int viodasd_unlocked_open(struct block_device *bdev, fmode_t mode)
+{
+	int ret;
+
+	mutex_lock(&viodasd_mutex);
+	ret = viodasd_open(bdev, mode);
+	mutex_unlock(&viodasd_mutex);
+
+	return ret;
+}
+
+
+/*
+ * External release entry point.
+ */
+static int viodasd_release(struct gendisk *disk, fmode_t mode)
+{
+	struct viodasd_device *d = disk->private_data;
+	HvLpEvent_Rc hvrc;
+
+	mutex_lock(&viodasd_mutex);
+	/* Send the event to OS/400.  We DON'T expect a response */
+	hvrc = HvCallEvent_signalLpEventFast(viopath_hostLp,
+			HvLpEvent_Type_VirtualIo,
+			viomajorsubtype_blockio | vioblockclose,
+			HvLpEvent_AckInd_NoAck, HvLpEvent_AckType_ImmediateAck,
+			viopath_sourceinst(viopath_hostLp),
+			viopath_targetinst(viopath_hostLp),
+			0, VIOVERSION << 16,
+			((u64)DEVICE_NO(d) << 48) /* | ((u64)flags << 32) */,
+			0, 0, 0);
+	if (hvrc != 0)
+		pr_warning("HV close call failed %d\n", (int)hvrc);
+
+	mutex_unlock(&viodasd_mutex);
+
+	return 0;
+}
+
+
+/* External ioctl entry point.
+ */
+static int viodasd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
+{
+	struct gendisk *disk = bdev->bd_disk;
+	struct viodasd_device *d = disk->private_data;
+
+	geo->sectors = d->sectors ? d->sectors : 32;
+	geo->heads = d->tracks ? d->tracks  : 64;
+	geo->cylinders = d->cylinders ? d->cylinders :
+		get_capacity(disk) / (geo->sectors * geo->heads);
+
+	return 0;
+}
+
+/*
+ * Our file operations table
+ */
+static const struct block_device_operations viodasd_fops = {
+	.owner = THIS_MODULE,
+	.open = viodasd_unlocked_open,
+	.release = viodasd_release,
+	.getgeo = viodasd_getgeo,
+};
+
+/*
+ * End a request
+ */
+static void viodasd_end_request(struct request *req, int error,
+		int num_sectors)
+{
+	__blk_end_request(req, error, num_sectors << 9);
+}
+
+/*
+ * Send an actual I/O request to OS/400
+ */
+static int send_request(struct request *req)
+{
+	u64 start;
+	int direction;
+	int nsg;
+	u16 viocmd;
+	HvLpEvent_Rc hvrc;
+	struct vioblocklpevent *bevent;
+	struct HvLpEvent *hev;
+	struct scatterlist sg[VIOMAXBLOCKDMA];
+	int sgindex;
+	struct viodasd_device *d;
+	unsigned long flags;
+
+	start = (u64)blk_rq_pos(req) << 9;
+
+	if (rq_data_dir(req) == READ) {
+		direction = DMA_FROM_DEVICE;
+		viocmd = viomajorsubtype_blockio | vioblockread;
+	} else {
+		direction = DMA_TO_DEVICE;
+		viocmd = viomajorsubtype_blockio | vioblockwrite;
+	}
+
+        d = req->rq_disk->private_data;
+
+	/* Now build the scatter-gather list */
+	sg_init_table(sg, VIOMAXBLOCKDMA);
+	nsg = blk_rq_map_sg(req->q, req, sg);
+	nsg = dma_map_sg(d->dev, sg, nsg, direction);
+
+	spin_lock_irqsave(&viodasd_spinlock, flags);
+	num_req_outstanding++;
+
+	/* This optimization handles a single DMA block */
+	if (nsg == 1)
+		hvrc = HvCallEvent_signalLpEventFast(viopath_hostLp,
+				HvLpEvent_Type_VirtualIo, viocmd,
+				HvLpEvent_AckInd_DoAck,
+				HvLpEvent_AckType_ImmediateAck,
+				viopath_sourceinst(viopath_hostLp),
+				viopath_targetinst(viopath_hostLp),
+				(u64)(unsigned long)req, VIOVERSION << 16,
+				((u64)DEVICE_NO(d) << 48), start,
+				((u64)sg_dma_address(&sg[0])) << 32,
+				sg_dma_len(&sg[0]));
+	else {
+		bevent = (struct vioblocklpevent *)
+			vio_get_event_buffer(viomajorsubtype_blockio);
+		if (bevent == NULL) {
+			pr_warning("error allocating disk event buffer\n");
+			goto error_ret;
+		}
+
+		/*
+		 * Now build up the actual request.  Note that we store
+		 * the pointer to the request in the correlation
+		 * token so we can match the response up later
+		 */
+		memset(bevent, 0, sizeof(struct vioblocklpevent));
+		hev = &bevent->event;
+		hev->flags = HV_LP_EVENT_VALID | HV_LP_EVENT_DO_ACK |
+			HV_LP_EVENT_INT;
+		hev->xType = HvLpEvent_Type_VirtualIo;
+		hev->xSubtype = viocmd;
+		hev->xSourceLp = HvLpConfig_getLpIndex();
+		hev->xTargetLp = viopath_hostLp;
+		hev->xSizeMinus1 =
+			offsetof(struct vioblocklpevent, u.rw_data.dma_info) +
+			(sizeof(bevent->u.rw_data.dma_info[0]) * nsg) - 1;
+		hev->xSourceInstanceId = viopath_sourceinst(viopath_hostLp);
+		hev->xTargetInstanceId = viopath_targetinst(viopath_hostLp);
+		hev->xCorrelationToken = (u64)req;
+		bevent->version = VIOVERSION;
+		bevent->disk = DEVICE_NO(d);
+		bevent->u.rw_data.offset = start;
+
+		/*
+		 * Copy just the dma information from the sg list
+		 * into the request
+		 */
+		for (sgindex = 0; sgindex < nsg; sgindex++) {
+			bevent->u.rw_data.dma_info[sgindex].token =
+				sg_dma_address(&sg[sgindex]);
+			bevent->u.rw_data.dma_info[sgindex].len =
+				sg_dma_len(&sg[sgindex]);
+		}
+
+		/* Send the request */
+		hvrc = HvCallEvent_signalLpEvent(&bevent->event);
+		vio_free_event_buffer(viomajorsubtype_blockio, bevent);
+	}
+
+	if (hvrc != HvLpEvent_Rc_Good) {
+		pr_warning("error sending disk event to OS/400 (rc %d)\n",
+			   (int)hvrc);
+		goto error_ret;
+	}
+	spin_unlock_irqrestore(&viodasd_spinlock, flags);
+	return 0;
+
+error_ret:
+	num_req_outstanding--;
+	spin_unlock_irqrestore(&viodasd_spinlock, flags);
+	dma_unmap_sg(d->dev, sg, nsg, direction);
+	return -1;
+}
+
+/*
+ * This is the external request processing routine
+ */
+static void do_viodasd_request(struct request_queue *q)
+{
+	struct request *req;
+
+	/*
+	 * If we already have the maximum number of requests
+	 * outstanding to OS/400 just bail out. We'll come
+	 * back later.
+	 */
+	while (num_req_outstanding < VIOMAXREQ) {
+		req = blk_fetch_request(q);
+		if (req == NULL)
+			return;
+		/* check that request contains a valid command */
+		if (req->cmd_type != REQ_TYPE_FS) {
+			viodasd_end_request(req, -EIO, blk_rq_sectors(req));
+			continue;
+		}
+		/* Try sending the request */
+		if (send_request(req) != 0)
+			viodasd_end_request(req, -EIO, blk_rq_sectors(req));
+	}
+}
+
+/*
+ * Probe a single disk and fill in the viodasd_device structure
+ * for it.
+ */
+static int probe_disk(struct viodasd_device *d)
+{
+	HvLpEvent_Rc hvrc;
+	struct viodasd_waitevent we;
+	int dev_no = DEVICE_NO(d);
+	struct gendisk *g;
+	struct request_queue *q;
+	u16 flags = 0;
+
+retry:
+	init_completion(&we.com);
+
+	/* Send the open event to OS/400 */
+	hvrc = HvCallEvent_signalLpEventFast(viopath_hostLp,
+			HvLpEvent_Type_VirtualIo,
+			viomajorsubtype_blockio | vioblockopen,
+			HvLpEvent_AckInd_DoAck, HvLpEvent_AckType_ImmediateAck,
+			viopath_sourceinst(viopath_hostLp),
+			viopath_targetinst(viopath_hostLp),
+			(u64)(unsigned long)&we, VIOVERSION << 16,
+			((u64)dev_no << 48) | ((u64)flags<< 32),
+			0, 0, 0);
+	if (hvrc != 0) {
+		pr_warning("bad rc on HV open %d\n", (int)hvrc);
+		return 0;
+	}
+
+	wait_for_completion(&we.com);
+
+	if (we.rc != 0) {
+		if (flags != 0)
+			return 0;
+		/* try again with read only flag set */
+		flags = vioblockflags_ro;
+		goto retry;
+	}
+	if (we.max_disk > (MAX_DISKNO - 1)) {
+		printk_once(KERN_INFO pr_fmt("Only examining the first %d of %d disks connected\n"),
+			    MAX_DISKNO, we.max_disk + 1);
+	}
+
+	/* Send the close event to OS/400.  We DON'T expect a response */
+	hvrc = HvCallEvent_signalLpEventFast(viopath_hostLp,
+			HvLpEvent_Type_VirtualIo,
+			viomajorsubtype_blockio | vioblockclose,
+			HvLpEvent_AckInd_NoAck, HvLpEvent_AckType_ImmediateAck,
+			viopath_sourceinst(viopath_hostLp),
+			viopath_targetinst(viopath_hostLp),
+			0, VIOVERSION << 16,
+			((u64)dev_no << 48) | ((u64)flags << 32),
+			0, 0, 0);
+	if (hvrc != 0) {
+		pr_warning("bad rc sending event to OS/400 %d\n", (int)hvrc);
+		return 0;
+	}
+
+	if (d->dev == NULL) {
+		/* this is when we reprobe for new disks */
+		if (vio_create_viodasd(dev_no) == NULL) {
+			pr_warning("cannot allocate virtual device for disk %d\n",
+				   dev_no);
+			return 0;
+		}
+		/*
+		 * The vio_create_viodasd will have recursed into this
+		 * routine with d->dev set to the new vio device and
+		 * will finish the setup of the disk below.
+		 */
+		return 1;
+	}
+
+	/* create the request queue for the disk */
+	spin_lock_init(&d->q_lock);
+	q = blk_init_queue(do_viodasd_request, &d->q_lock);
+	if (q == NULL) {
+		pr_warning("cannot allocate queue for disk %d\n", dev_no);
+		return 0;
+	}
+	g = alloc_disk(1 << PARTITION_SHIFT);
+	if (g == NULL) {
+		pr_warning("cannot allocate disk structure for disk %d\n",
+			   dev_no);
+		blk_cleanup_queue(q);
+		return 0;
+	}
+
+	d->disk = g;
+	blk_queue_max_segments(q, VIOMAXBLOCKDMA);
+	blk_queue_max_hw_sectors(q, VIODASD_MAXSECTORS);
+	g->major = VIODASD_MAJOR;
+	g->first_minor = dev_no << PARTITION_SHIFT;
+	if (dev_no >= 26)
+		snprintf(g->disk_name, sizeof(g->disk_name),
+				VIOD_GENHD_NAME "%c%c",
+				'a' + (dev_no / 26) - 1, 'a' + (dev_no % 26));
+	else
+		snprintf(g->disk_name, sizeof(g->disk_name),
+				VIOD_GENHD_NAME "%c", 'a' + (dev_no % 26));
+	g->fops = &viodasd_fops;
+	g->queue = q;
+	g->private_data = d;
+	g->driverfs_dev = d->dev;
+	set_capacity(g, d->size >> 9);
+
+	pr_info("disk %d: %lu sectors (%lu MB) CHS=%d/%d/%d sector size %d%s\n",
+		dev_no, (unsigned long)(d->size >> 9),
+		(unsigned long)(d->size >> 20),
+		(int)d->cylinders, (int)d->tracks,
+		(int)d->sectors, (int)d->bytes_per_sector,
+		d->read_only ? " (RO)" : "");
+
+	/* register us in the global list */
+	add_disk(g);
+	return 1;
+}
+
+/* returns the total number of scatterlist elements converted */
+static int block_event_to_scatterlist(const struct vioblocklpevent *bevent,
+		struct scatterlist *sg, int *total_len)
+{
+	int i, numsg;
+	const struct rw_data *rw_data = &bevent->u.rw_data;
+	static const int offset =
+		offsetof(struct vioblocklpevent, u.rw_data.dma_info);
+	static const int element_size = sizeof(rw_data->dma_info[0]);
+
+	numsg = ((bevent->event.xSizeMinus1 + 1) - offset) / element_size;
+	if (numsg > VIOMAXBLOCKDMA)
+		numsg = VIOMAXBLOCKDMA;
+
+	*total_len = 0;
+	sg_init_table(sg, VIOMAXBLOCKDMA);
+	for (i = 0; (i < numsg) && (rw_data->dma_info[i].len > 0); ++i) {
+		sg_dma_address(&sg[i]) = rw_data->dma_info[i].token;
+		sg_dma_len(&sg[i]) = rw_data->dma_info[i].len;
+		*total_len += rw_data->dma_info[i].len;
+	}
+	return i;
+}
+
+/*
+ * Restart all queues, starting with the one _after_ the disk given,
+ * thus reducing the chance of starvation of higher numbered disks.
+ */
+static void viodasd_restart_all_queues_starting_from(int first_index)
+{
+	int i;
+
+	for (i = first_index + 1; i < MAX_DISKNO; ++i)
+		if (viodasd_devices[i].disk)
+			blk_run_queue(viodasd_devices[i].disk->queue);
+	for (i = 0; i <= first_index; ++i)
+		if (viodasd_devices[i].disk)
+			blk_run_queue(viodasd_devices[i].disk->queue);
+}
+
+/*
+ * For read and write requests, decrement the number of outstanding requests,
+ * Free the DMA buffers we allocated.
+ */
+static int viodasd_handle_read_write(struct vioblocklpevent *bevent)
+{
+	int num_sg, num_sect, pci_direction, total_len;
+	struct request *req;
+	struct scatterlist sg[VIOMAXBLOCKDMA];
+	struct HvLpEvent *event = &bevent->event;
+	unsigned long irq_flags;
+	struct viodasd_device *d;
+	int error;
+	spinlock_t *qlock;
+
+	num_sg = block_event_to_scatterlist(bevent, sg, &total_len);
+	num_sect = total_len >> 9;
+	if (event->xSubtype == (viomajorsubtype_blockio | vioblockread))
+		pci_direction = DMA_FROM_DEVICE;
+	else
+		pci_direction = DMA_TO_DEVICE;
+	req = (struct request *)bevent->event.xCorrelationToken;
+	d = req->rq_disk->private_data;
+
+	dma_unmap_sg(d->dev, sg, num_sg, pci_direction);
+
+	/*
+	 * Since this is running in interrupt mode, we need to make sure
+	 * we're not stepping on any global I/O operations
+	 */
+	spin_lock_irqsave(&viodasd_spinlock, irq_flags);
+	num_req_outstanding--;
+	spin_unlock_irqrestore(&viodasd_spinlock, irq_flags);
+
+	error = (event->xRc == HvLpEvent_Rc_Good) ? 0 : -EIO;
+	if (error) {
+		const struct vio_error_entry *err;
+		err = vio_lookup_rc(viodasd_err_table, bevent->sub_result);
+		pr_warning("read/write error %d:0x%04x (%s)\n",
+			   event->xRc, bevent->sub_result, err->msg);
+		num_sect = blk_rq_sectors(req);
+	}
+	qlock = req->q->queue_lock;
+	spin_lock_irqsave(qlock, irq_flags);
+	viodasd_end_request(req, error, num_sect);
+	spin_unlock_irqrestore(qlock, irq_flags);
+
+	/* Finally, try to get more requests off of this device's queue */
+	viodasd_restart_all_queues_starting_from(DEVICE_NO(d));
+
+	return 0;
+}
+
+/* This routine handles incoming block LP events */
+static void handle_block_event(struct HvLpEvent *event)
+{
+	struct vioblocklpevent *bevent = (struct vioblocklpevent *)event;
+	struct viodasd_waitevent *pwe;
+
+	if (event == NULL)
+		/* Notification that a partition went away! */
+		return;
+	/* First, we should NEVER get an int here...only acks */
+	if (hvlpevent_is_int(event)) {
+		pr_warning("Yikes! got an int in viodasd event handler!\n");
+		if (hvlpevent_need_ack(event)) {
+			event->xRc = HvLpEvent_Rc_InvalidSubtype;
+			HvCallEvent_ackLpEvent(event);
+		}
+	}
+
+	switch (event->xSubtype & VIOMINOR_SUBTYPE_MASK) {
+	case vioblockopen:
+		/*
+		 * Handle a response to an open request.  We get all the
+		 * disk information in the response, so update it.  The
+		 * correlation token contains a pointer to a waitevent
+		 * structure that has a completion in it.  update the
+		 * return code in the waitevent structure and post the
+		 * completion to wake up the guy who sent the request
+		 */
+		pwe = (struct viodasd_waitevent *)event->xCorrelationToken;
+		pwe->rc = event->xRc;
+		pwe->sub_result = bevent->sub_result;
+		if (event->xRc == HvLpEvent_Rc_Good) {
+			const struct open_data *data = &bevent->u.open_data;
+			struct viodasd_device *device =
+				&viodasd_devices[bevent->disk];
+			device->read_only =
+				bevent->flags & vioblockflags_ro;
+			device->size = data->disk_size;
+			device->cylinders = data->cylinders;
+			device->tracks = data->tracks;
+			device->sectors = data->sectors;
+			device->bytes_per_sector = data->bytes_per_sector;
+			pwe->max_disk = data->max_disk;
+		}
+		complete(&pwe->com);
+		break;
+	case vioblockclose:
+		break;
+	case vioblockread:
+	case vioblockwrite:
+		viodasd_handle_read_write(bevent);
+		break;
+
+	default:
+		pr_warning("invalid subtype!");
+		if (hvlpevent_need_ack(event)) {
+			event->xRc = HvLpEvent_Rc_InvalidSubtype;
+			HvCallEvent_ackLpEvent(event);
+		}
+	}
+}
+
+/*
+ * Get the driver to reprobe for more disks.
+ */
+static ssize_t probe_disks(struct device_driver *drv, const char *buf,
+		size_t count)
+{
+	struct viodasd_device *d;
+
+	for (d = viodasd_devices; d < &viodasd_devices[MAX_DISKNO]; d++) {
+		if (d->disk == NULL)
+			probe_disk(d);
+	}
+	return count;
+}
+static DRIVER_ATTR(probe, S_IWUSR, NULL, probe_disks);
+
+static int viodasd_probe(struct vio_dev *vdev, const struct vio_device_id *id)
+{
+	struct viodasd_device *d = &viodasd_devices[vdev->unit_address];
+
+	d->dev = &vdev->dev;
+	if (!probe_disk(d))
+		return -ENODEV;
+	return 0;
+}
+
+static int viodasd_remove(struct vio_dev *vdev)
+{
+	struct viodasd_device *d;
+
+	d = &viodasd_devices[vdev->unit_address];
+	if (d->disk) {
+		del_gendisk(d->disk);
+		blk_cleanup_queue(d->disk->queue);
+		put_disk(d->disk);
+		d->disk = NULL;
+	}
+	d->dev = NULL;
+	return 0;
+}
+
+/**
+ * viodasd_device_table: Used by vio.c to match devices that we
+ * support.
+ */
+static struct vio_device_id viodasd_device_table[] __devinitdata = {
+	{ "block", "IBM,iSeries-viodasd" },
+	{ "", "" }
+};
+MODULE_DEVICE_TABLE(vio, viodasd_device_table);
+
+static struct vio_driver viodasd_driver = {
+	.id_table = viodasd_device_table,
+	.probe = viodasd_probe,
+	.remove = viodasd_remove,
+	.driver = {
+		.name = "viodasd",
+		.owner = THIS_MODULE,
+	}
+};
+
+static int need_delete_probe;
+
+/*
+ * Initialize the whole device driver.  Handle module and non-module
+ * versions
+ */
+static int __init viodasd_init(void)
+{
+	int rc;
+
+	if (!firmware_has_feature(FW_FEATURE_ISERIES)) {
+		rc = -ENODEV;
+		goto early_fail;
+	}
+
+	/* Try to open to our host lp */
+	if (viopath_hostLp == HvLpIndexInvalid)
+		vio_set_hostlp();
+
+	if (viopath_hostLp == HvLpIndexInvalid) {
+		pr_warning("invalid hosting partition\n");
+		rc = -EIO;
+		goto early_fail;
+	}
+
+	pr_info("vers " VIOD_VERS ", hosting partition %d\n", viopath_hostLp);
+
+        /* register the block device */
+	rc =  register_blkdev(VIODASD_MAJOR, VIOD_GENHD_NAME);
+	if (rc) {
+		pr_warning("Unable to get major number %d for %s\n",
+			   VIODASD_MAJOR, VIOD_GENHD_NAME);
+		goto early_fail;
+	}
+	/* Actually open the path to the hosting partition */
+	rc = viopath_open(viopath_hostLp, viomajorsubtype_blockio,
+				VIOMAXREQ + 2);
+	if (rc) {
+		pr_warning("error opening path to host partition %d\n",
+			   viopath_hostLp);
+		goto unregister_blk;
+	}
+
+	/* Initialize our request handler */
+	vio_setHandler(viomajorsubtype_blockio, handle_block_event);
+
+	rc = vio_register_driver(&viodasd_driver);
+	if (rc) {
+		pr_warning("vio_register_driver failed\n");
+		goto unset_handler;
+	}
+
+	/*
+	 * If this call fails, it just means that we cannot dynamically
+	 * add virtual disks, but the driver will still work fine for
+	 * all existing disk, so ignore the failure.
+	 */
+	if (!driver_create_file(&viodasd_driver.driver, &driver_attr_probe))
+		need_delete_probe = 1;
+
+	return 0;
+
+unset_handler:
+	vio_clearHandler(viomajorsubtype_blockio);
+	viopath_close(viopath_hostLp, viomajorsubtype_blockio, VIOMAXREQ + 2);
+unregister_blk:
+	unregister_blkdev(VIODASD_MAJOR, VIOD_GENHD_NAME);
+early_fail:
+	return rc;
+}
+module_init(viodasd_init);
+
+void __exit viodasd_exit(void)
+{
+	if (need_delete_probe)
+		driver_remove_file(&viodasd_driver.driver, &driver_attr_probe);
+	vio_unregister_driver(&viodasd_driver);
+	vio_clearHandler(viomajorsubtype_blockio);
+	viopath_close(viopath_hostLp, viomajorsubtype_blockio, VIOMAXREQ + 2);
+	unregister_blkdev(VIODASD_MAJOR, VIOD_GENHD_NAME);
+}
+module_exit(viodasd_exit);
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
new file mode 100644
index 00000000..5d7a9340
--- /dev/null
+++ b/drivers/block/virtio_blk.c
@@ -0,0 +1,607 @@
+//#define DEBUG
+#include <linux/spinlock.h>
+#include <linux/slab.h>
+#include <linux/blkdev.h>
+#include <linux/hdreg.h>
+#include <linux/virtio.h>
+#include <linux/virtio_blk.h>
+#include <linux/scatterlist.h>
+#include <linux/string_helpers.h>
+#include <scsi/scsi_cmnd.h>
+
+#define PART_BITS 4
+
+static int major, index;
+struct workqueue_struct *virtblk_wq;
+
+struct virtio_blk
+{
+	spinlock_t lock;
+
+	struct virtio_device *vdev;
+	struct virtqueue *vq;
+
+	/* The disk structure for the kernel. */
+	struct gendisk *disk;
+
+	/* Request tracking. */
+	struct list_head reqs;
+
+	mempool_t *pool;
+
+	/* Process context for config space updates */
+	struct work_struct config_work;
+
+	/* What host tells us, plus 2 for header & tailer. */
+	unsigned int sg_elems;
+
+	/* Scatterlist: can be too big for stack. */
+	struct scatterlist sg[/*sg_elems*/];
+};
+
+struct virtblk_req
+{
+	struct list_head list;
+	struct request *req;
+	struct virtio_blk_outhdr out_hdr;
+	struct virtio_scsi_inhdr in_hdr;
+	u8 status;
+};
+
+static void blk_done(struct virtqueue *vq)
+{
+	struct virtio_blk *vblk = vq->vdev->priv;
+	struct virtblk_req *vbr;
+	unsigned int len;
+	unsigned long flags;
+
+	spin_lock_irqsave(&vblk->lock, flags);
+	while ((vbr = virtqueue_get_buf(vblk->vq, &len)) != NULL) {
+		int error;
+
+		switch (vbr->status) {
+		case VIRTIO_BLK_S_OK:
+			error = 0;
+			break;
+		case VIRTIO_BLK_S_UNSUPP:
+			error = -ENOTTY;
+			break;
+		default:
+			error = -EIO;
+			break;
+		}
+
+		switch (vbr->req->cmd_type) {
+		case REQ_TYPE_BLOCK_PC:
+			vbr->req->resid_len = vbr->in_hdr.residual;
+			vbr->req->sense_len = vbr->in_hdr.sense_len;
+			vbr->req->errors = vbr->in_hdr.errors;
+			break;
+		case REQ_TYPE_SPECIAL:
+			vbr->req->errors = (error != 0);
+			break;
+		default:
+			break;
+		}
+
+		__blk_end_request_all(vbr->req, error);
+		list_del(&vbr->list);
+		mempool_free(vbr, vblk->pool);
+	}
+	/* In case queue is stopped waiting for more buffers. */
+	blk_start_queue(vblk->disk->queue);
+	spin_unlock_irqrestore(&vblk->lock, flags);
+}
+
+static bool do_req(struct request_queue *q, struct virtio_blk *vblk,
+		   struct request *req)
+{
+	unsigned long num, out = 0, in = 0;
+	struct virtblk_req *vbr;
+
+	vbr = mempool_alloc(vblk->pool, GFP_ATOMIC);
+	if (!vbr)
+		/* When another request finishes we'll try again. */
+		return false;
+
+	vbr->req = req;
+
+	if (req->cmd_flags & REQ_FLUSH) {
+		vbr->out_hdr.type = VIRTIO_BLK_T_FLUSH;
+		vbr->out_hdr.sector = 0;
+		vbr->out_hdr.ioprio = req_get_ioprio(vbr->req);
+	} else {
+		switch (req->cmd_type) {
+		case REQ_TYPE_FS:
+			vbr->out_hdr.type = 0;
+			vbr->out_hdr.sector = blk_rq_pos(vbr->req);
+			vbr->out_hdr.ioprio = req_get_ioprio(vbr->req);
+			break;
+		case REQ_TYPE_BLOCK_PC:
+			vbr->out_hdr.type = VIRTIO_BLK_T_SCSI_CMD;
+			vbr->out_hdr.sector = 0;
+			vbr->out_hdr.ioprio = req_get_ioprio(vbr->req);
+			break;
+		case REQ_TYPE_SPECIAL:
+			vbr->out_hdr.type = VIRTIO_BLK_T_GET_ID;
+			vbr->out_hdr.sector = 0;
+			vbr->out_hdr.ioprio = req_get_ioprio(vbr->req);
+			break;
+		default:
+			/* We don't put anything else in the queue. */
+			BUG();
+		}
+	}
+
+	sg_set_buf(&vblk->sg[out++], &vbr->out_hdr, sizeof(vbr->out_hdr));
+
+	/*
+	 * If this is a packet command we need a couple of additional headers.
+	 * Behind the normal outhdr we put a segment with the scsi command
+	 * block, and before the normal inhdr we put the sense data and the
+	 * inhdr with additional status information before the normal inhdr.
+	 */
+	if (vbr->req->cmd_type == REQ_TYPE_BLOCK_PC)
+		sg_set_buf(&vblk->sg[out++], vbr->req->cmd, vbr->req->cmd_len);
+
+	num = blk_rq_map_sg(q, vbr->req, vblk->sg + out);
+
+	if (vbr->req->cmd_type == REQ_TYPE_BLOCK_PC) {
+		sg_set_buf(&vblk->sg[num + out + in++], vbr->req->sense, SCSI_SENSE_BUFFERSIZE);
+		sg_set_buf(&vblk->sg[num + out + in++], &vbr->in_hdr,
+			   sizeof(vbr->in_hdr));
+	}
+
+	sg_set_buf(&vblk->sg[num + out + in++], &vbr->status,
+		   sizeof(vbr->status));
+
+	if (num) {
+		if (rq_data_dir(vbr->req) == WRITE) {
+			vbr->out_hdr.type |= VIRTIO_BLK_T_OUT;
+			out += num;
+		} else {
+			vbr->out_hdr.type |= VIRTIO_BLK_T_IN;
+			in += num;
+		}
+	}
+
+	if (virtqueue_add_buf(vblk->vq, vblk->sg, out, in, vbr) < 0) {
+		mempool_free(vbr, vblk->pool);
+		return false;
+	}
+
+	list_add_tail(&vbr->list, &vblk->reqs);
+	return true;
+}
+
+static void do_virtblk_request(struct request_queue *q)
+{
+	struct virtio_blk *vblk = q->queuedata;
+	struct request *req;
+	unsigned int issued = 0;
+
+	while ((req = blk_peek_request(q)) != NULL) {
+		BUG_ON(req->nr_phys_segments + 2 > vblk->sg_elems);
+
+		/* If this request fails, stop queue and wait for something to
+		   finish to restart it. */
+		if (!do_req(q, vblk, req)) {
+			blk_stop_queue(q);
+			break;
+		}
+		blk_start_request(req);
+		issued++;
+	}
+
+	if (issued)
+		virtqueue_kick(vblk->vq);
+}
+
+/* return id (s/n) string for *disk to *id_str
+ */
+static int virtblk_get_id(struct gendisk *disk, char *id_str)
+{
+	struct virtio_blk *vblk = disk->private_data;
+	struct request *req;
+	struct bio *bio;
+	int err;
+
+	bio = bio_map_kern(vblk->disk->queue, id_str, VIRTIO_BLK_ID_BYTES,
+			   GFP_KERNEL);
+	if (IS_ERR(bio))
+		return PTR_ERR(bio);
+
+	req = blk_make_request(vblk->disk->queue, bio, GFP_KERNEL);
+	if (IS_ERR(req)) {
+		bio_put(bio);
+		return PTR_ERR(req);
+	}
+
+	req->cmd_type = REQ_TYPE_SPECIAL;
+	err = blk_execute_rq(vblk->disk->queue, vblk->disk, req, false);
+	blk_put_request(req);
+
+	return err;
+}
+
+static int virtblk_ioctl(struct block_device *bdev, fmode_t mode,
+			     unsigned int cmd, unsigned long data)
+{
+	struct gendisk *disk = bdev->bd_disk;
+	struct virtio_blk *vblk = disk->private_data;
+
+	/*
+	 * Only allow the generic SCSI ioctls if the host can support it.
+	 */
+	if (!virtio_has_feature(vblk->vdev, VIRTIO_BLK_F_SCSI))
+		return -ENOTTY;
+
+	return scsi_cmd_blk_ioctl(bdev, mode, cmd,
+				  (void __user *)data);
+}
+
+/* We provide getgeo only to please some old bootloader/partitioning tools */
+static int virtblk_getgeo(struct block_device *bd, struct hd_geometry *geo)
+{
+	struct virtio_blk *vblk = bd->bd_disk->private_data;
+	struct virtio_blk_geometry vgeo;
+	int err;
+
+	/* see if the host passed in geometry config */
+	err = virtio_config_val(vblk->vdev, VIRTIO_BLK_F_GEOMETRY,
+				offsetof(struct virtio_blk_config, geometry),
+				&vgeo);
+
+	if (!err) {
+		geo->heads = vgeo.heads;
+		geo->sectors = vgeo.sectors;
+		geo->cylinders = vgeo.cylinders;
+	} else {
+		/* some standard values, similar to sd */
+		geo->heads = 1 << 6;
+		geo->sectors = 1 << 5;
+		geo->cylinders = get_capacity(bd->bd_disk) >> 11;
+	}
+	return 0;
+}
+
+static const struct block_device_operations virtblk_fops = {
+	.ioctl  = virtblk_ioctl,
+	.owner  = THIS_MODULE,
+	.getgeo = virtblk_getgeo,
+};
+
+static int index_to_minor(int index)
+{
+	return index << PART_BITS;
+}
+
+static ssize_t virtblk_serial_show(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	struct gendisk *disk = dev_to_disk(dev);
+	int err;
+
+	/* sysfs gives us a PAGE_SIZE buffer */
+	BUILD_BUG_ON(PAGE_SIZE < VIRTIO_BLK_ID_BYTES);
+
+	buf[VIRTIO_BLK_ID_BYTES] = '\0';
+	err = virtblk_get_id(disk, buf);
+	if (!err)
+		return strlen(buf);
+
+	if (err == -EIO) /* Unsupported? Make it empty. */
+		return 0;
+
+	return err;
+}
+DEVICE_ATTR(serial, S_IRUGO, virtblk_serial_show, NULL);
+
+static void virtblk_config_changed_work(struct work_struct *work)
+{
+	struct virtio_blk *vblk =
+		container_of(work, struct virtio_blk, config_work);
+	struct virtio_device *vdev = vblk->vdev;
+	struct request_queue *q = vblk->disk->queue;
+	char cap_str_2[10], cap_str_10[10];
+	u64 capacity, size;
+
+	/* Host must always specify the capacity. */
+	vdev->config->get(vdev, offsetof(struct virtio_blk_config, capacity),
+			  &capacity, sizeof(capacity));
+
+	/* If capacity is too big, truncate with warning. */
+	if ((sector_t)capacity != capacity) {
+		dev_warn(&vdev->dev, "Capacity %llu too large: truncating\n",
+			 (unsigned long long)capacity);
+		capacity = (sector_t)-1;
+	}
+
+	size = capacity * queue_logical_block_size(q);
+	string_get_size(size, STRING_UNITS_2, cap_str_2, sizeof(cap_str_2));
+	string_get_size(size, STRING_UNITS_10, cap_str_10, sizeof(cap_str_10));
+
+	dev_notice(&vdev->dev,
+		  "new size: %llu %d-byte logical blocks (%s/%s)\n",
+		  (unsigned long long)capacity,
+		  queue_logical_block_size(q),
+		  cap_str_10, cap_str_2);
+
+	set_capacity(vblk->disk, capacity);
+}
+
+static void virtblk_config_changed(struct virtio_device *vdev)
+{
+	struct virtio_blk *vblk = vdev->priv;
+
+	queue_work(virtblk_wq, &vblk->config_work);
+}
+
+static int __devinit virtblk_probe(struct virtio_device *vdev)
+{
+	struct virtio_blk *vblk;
+	struct request_queue *q;
+	int err;
+	u64 cap;
+	u32 v, blk_size, sg_elems, opt_io_size;
+	u16 min_io_size;
+	u8 physical_block_exp, alignment_offset;
+
+	if (index_to_minor(index) >= 1 << MINORBITS)
+		return -ENOSPC;
+
+	/* We need to know how many segments before we allocate. */
+	err = virtio_config_val(vdev, VIRTIO_BLK_F_SEG_MAX,
+				offsetof(struct virtio_blk_config, seg_max),
+				&sg_elems);
+
+	/* We need at least one SG element, whatever they say. */
+	if (err || !sg_elems)
+		sg_elems = 1;
+
+	/* We need an extra sg elements at head and tail. */
+	sg_elems += 2;
+	vdev->priv = vblk = kmalloc(sizeof(*vblk) +
+				    sizeof(vblk->sg[0]) * sg_elems, GFP_KERNEL);
+	if (!vblk) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	INIT_LIST_HEAD(&vblk->reqs);
+	spin_lock_init(&vblk->lock);
+	vblk->vdev = vdev;
+	vblk->sg_elems = sg_elems;
+	sg_init_table(vblk->sg, vblk->sg_elems);
+	INIT_WORK(&vblk->config_work, virtblk_config_changed_work);
+
+	/* We expect one virtqueue, for output. */
+	vblk->vq = virtio_find_single_vq(vdev, blk_done, "requests");
+	if (IS_ERR(vblk->vq)) {
+		err = PTR_ERR(vblk->vq);
+		goto out_free_vblk;
+	}
+
+	vblk->pool = mempool_create_kmalloc_pool(1,sizeof(struct virtblk_req));
+	if (!vblk->pool) {
+		err = -ENOMEM;
+		goto out_free_vq;
+	}
+
+	/* FIXME: How many partitions?  How long is a piece of string? */
+	vblk->disk = alloc_disk(1 << PART_BITS);
+	if (!vblk->disk) {
+		err = -ENOMEM;
+		goto out_mempool;
+	}
+
+	q = vblk->disk->queue = blk_init_queue(do_virtblk_request, &vblk->lock);
+	if (!q) {
+		err = -ENOMEM;
+		goto out_put_disk;
+	}
+
+	q->queuedata = vblk;
+
+	if (index < 26) {
+		sprintf(vblk->disk->disk_name, "vd%c", 'a' + index % 26);
+	} else if (index < (26 + 1) * 26) {
+		sprintf(vblk->disk->disk_name, "vd%c%c",
+			'a' + index / 26 - 1, 'a' + index % 26);
+	} else {
+		const unsigned int m1 = (index / 26 - 1) / 26 - 1;
+		const unsigned int m2 = (index / 26 - 1) % 26;
+		const unsigned int m3 =  index % 26;
+		sprintf(vblk->disk->disk_name, "vd%c%c%c",
+			'a' + m1, 'a' + m2, 'a' + m3);
+	}
+
+	vblk->disk->major = major;
+	vblk->disk->first_minor = index_to_minor(index);
+	vblk->disk->private_data = vblk;
+	vblk->disk->fops = &virtblk_fops;
+	vblk->disk->driverfs_dev = &vdev->dev;
+	index++;
+
+	/* configure queue flush support */
+	if (virtio_has_feature(vdev, VIRTIO_BLK_F_FLUSH))
+		blk_queue_flush(q, REQ_FLUSH);
+
+	/* If disk is read-only in the host, the guest should obey */
+	if (virtio_has_feature(vdev, VIRTIO_BLK_F_RO))
+		set_disk_ro(vblk->disk, 1);
+
+	/* Host must always specify the capacity. */
+	vdev->config->get(vdev, offsetof(struct virtio_blk_config, capacity),
+			  &cap, sizeof(cap));
+
+	/* If capacity is too big, truncate with warning. */
+	if ((sector_t)cap != cap) {
+		dev_warn(&vdev->dev, "Capacity %llu too large: truncating\n",
+			 (unsigned long long)cap);
+		cap = (sector_t)-1;
+	}
+	set_capacity(vblk->disk, cap);
+
+	/* We can handle whatever the host told us to handle. */
+	blk_queue_max_segments(q, vblk->sg_elems-2);
+
+	/* No need to bounce any requests */
+	blk_queue_bounce_limit(q, BLK_BOUNCE_ANY);
+
+	/* No real sector limit. */
+	blk_queue_max_hw_sectors(q, -1U);
+
+	/* Host can optionally specify maximum segment size and number of
+	 * segments. */
+	err = virtio_config_val(vdev, VIRTIO_BLK_F_SIZE_MAX,
+				offsetof(struct virtio_blk_config, size_max),
+				&v);
+	if (!err)
+		blk_queue_max_segment_size(q, v);
+	else
+		blk_queue_max_segment_size(q, -1U);
+
+	/* Host can optionally specify the block size of the device */
+	err = virtio_config_val(vdev, VIRTIO_BLK_F_BLK_SIZE,
+				offsetof(struct virtio_blk_config, blk_size),
+				&blk_size);
+	if (!err)
+		blk_queue_logical_block_size(q, blk_size);
+	else
+		blk_size = queue_logical_block_size(q);
+
+	/* Use topology information if available */
+	err = virtio_config_val(vdev, VIRTIO_BLK_F_TOPOLOGY,
+			offsetof(struct virtio_blk_config, physical_block_exp),
+			&physical_block_exp);
+	if (!err && physical_block_exp)
+		blk_queue_physical_block_size(q,
+				blk_size * (1 << physical_block_exp));
+
+	err = virtio_config_val(vdev, VIRTIO_BLK_F_TOPOLOGY,
+			offsetof(struct virtio_blk_config, alignment_offset),
+			&alignment_offset);
+	if (!err && alignment_offset)
+		blk_queue_alignment_offset(q, blk_size * alignment_offset);
+
+	err = virtio_config_val(vdev, VIRTIO_BLK_F_TOPOLOGY,
+			offsetof(struct virtio_blk_config, min_io_size),
+			&min_io_size);
+	if (!err && min_io_size)
+		blk_queue_io_min(q, blk_size * min_io_size);
+
+	err = virtio_config_val(vdev, VIRTIO_BLK_F_TOPOLOGY,
+			offsetof(struct virtio_blk_config, opt_io_size),
+			&opt_io_size);
+	if (!err && opt_io_size)
+		blk_queue_io_opt(q, blk_size * opt_io_size);
+
+
+	add_disk(vblk->disk);
+	err = device_create_file(disk_to_dev(vblk->disk), &dev_attr_serial);
+	if (err)
+		goto out_del_disk;
+
+	return 0;
+
+out_del_disk:
+	del_gendisk(vblk->disk);
+	blk_cleanup_queue(vblk->disk->queue);
+out_put_disk:
+	put_disk(vblk->disk);
+out_mempool:
+	mempool_destroy(vblk->pool);
+out_free_vq:
+	vdev->config->del_vqs(vdev);
+out_free_vblk:
+	kfree(vblk);
+out:
+	return err;
+}
+
+static void __devexit virtblk_remove(struct virtio_device *vdev)
+{
+	struct virtio_blk *vblk = vdev->priv;
+
+	flush_work(&vblk->config_work);
+
+	/* Nothing should be pending. */
+	BUG_ON(!list_empty(&vblk->reqs));
+
+	/* Stop all the virtqueues. */
+	vdev->config->reset(vdev);
+
+	del_gendisk(vblk->disk);
+	blk_cleanup_queue(vblk->disk->queue);
+	put_disk(vblk->disk);
+	mempool_destroy(vblk->pool);
+	vdev->config->del_vqs(vdev);
+	kfree(vblk);
+}
+
+static const struct virtio_device_id id_table[] = {
+	{ VIRTIO_ID_BLOCK, VIRTIO_DEV_ANY_ID },
+	{ 0 },
+};
+
+static unsigned int features[] = {
+	VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX, VIRTIO_BLK_F_GEOMETRY,
+	VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE, VIRTIO_BLK_F_SCSI,
+	VIRTIO_BLK_F_FLUSH, VIRTIO_BLK_F_TOPOLOGY
+};
+
+/*
+ * virtio_blk causes spurious section mismatch warning by
+ * simultaneously referring to a __devinit and a __devexit function.
+ * Use __refdata to avoid this warning.
+ */
+static struct virtio_driver __refdata virtio_blk = {
+	.feature_table		= features,
+	.feature_table_size	= ARRAY_SIZE(features),
+	.driver.name		= KBUILD_MODNAME,
+	.driver.owner		= THIS_MODULE,
+	.id_table		= id_table,
+	.probe			= virtblk_probe,
+	.remove			= __devexit_p(virtblk_remove),
+	.config_changed		= virtblk_config_changed,
+};
+
+static int __init init(void)
+{
+	int error;
+
+	virtblk_wq = alloc_workqueue("virtio-blk", 0, 0);
+	if (!virtblk_wq)
+		return -ENOMEM;
+
+	major = register_blkdev(0, "virtblk");
+	if (major < 0) {
+		error = major;
+		goto out_destroy_workqueue;
+	}
+
+	error = register_virtio_driver(&virtio_blk);
+	if (error)
+		goto out_unregister_blkdev;
+	return 0;
+
+out_unregister_blkdev:
+	unregister_blkdev(major, "virtblk");
+out_destroy_workqueue:
+	destroy_workqueue(virtblk_wq);
+	return error;
+}
+
+static void __exit fini(void)
+{
+	unregister_blkdev(major, "virtblk");
+	unregister_virtio_driver(&virtio_blk);
+	destroy_workqueue(virtblk_wq);
+}
+module_init(init);
+module_exit(fini);
+
+MODULE_DEVICE_TABLE(virtio, id_table);
+MODULE_DESCRIPTION("Virtio block driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/block/xd.c b/drivers/block/xd.c
new file mode 100644
index 00000000..4abd2bcd
--- /dev/null
+++ b/drivers/block/xd.c
@@ -0,0 +1,1124 @@
+/*
+ * This file contains the driver for an XT hard disk controller
+ * (at least the DTC 5150X) for Linux.
+ *
+ * Author: Pat Mackinlay, pat@it.com.au
+ * Date: 29/09/92
+ * 
+ * Revised: 01/01/93, ...
+ *
+ * Ref: DTC 5150X Controller Specification (thanks to Kevin Fowler,
+ *   kevinf@agora.rain.com)
+ * Also thanks to: Salvador Abreu, Dave Thaler, Risto Kankkunen and
+ *   Wim Van Dorst.
+ *
+ * Revised: 04/04/94 by Risto Kankkunen
+ *   Moved the detection code from xd_init() to xd_geninit() as it needed
+ *   interrupts enabled and Linus didn't want to enable them in that first
+ *   phase. xd_geninit() is the place to do these kinds of things anyway,
+ *   he says.
+ *
+ * Modularized: 04/10/96 by Todd Fries, tfries@umr.edu
+ *
+ * Revised: 13/12/97 by Andrzej Krzysztofowicz, ankry@mif.pg.gda.pl
+ *   Fixed some problems with disk initialization and module initiation.
+ *   Added support for manual geometry setting (except Seagate controllers)
+ *   in form:
+ *      xd_geo=<cyl_xda>,<head_xda>,<sec_xda>[,<cyl_xdb>,<head_xdb>,<sec_xdb>]
+ *   Recovered DMA access. Abridged messages. Added support for DTC5051CX,
+ *   WD1002-27X & XEBEC controllers. Driver uses now some jumper settings.
+ *   Extended ioctl() support.
+ *
+ * Bugfix: 15/02/01, Paul G. - inform queue layer of tiny xd_maxsect.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/mm.h>
+#include <linux/fs.h>
+#include <linux/kernel.h>
+#include <linux/timer.h>
+#include <linux/genhd.h>
+#include <linux/hdreg.h>
+#include <linux/ioport.h>
+#include <linux/init.h>
+#include <linux/wait.h>
+#include <linux/blkdev.h>
+#include <linux/mutex.h>
+#include <linux/blkpg.h>
+#include <linux/delay.h>
+#include <linux/io.h>
+#include <linux/gfp.h>
+
+#include <asm/system.h>
+#include <asm/uaccess.h>
+#include <asm/dma.h>
+
+#include "xd.h"
+
+static DEFINE_MUTEX(xd_mutex);
+static void __init do_xd_setup (int *integers);
+#ifdef MODULE
+static int xd[5] = { -1,-1,-1,-1, };
+#endif
+
+#define XD_DONT_USE_DMA		0  /* Initial value. may be overriden using
+				      "nodma" module option */
+#define XD_INIT_DISK_DELAY	(30)  /* 30 ms delay during disk initialization */
+
+/* Above may need to be increased if a problem with the 2nd drive detection
+   (ST11M controller) or resetting a controller (WD) appears */
+
+static XD_INFO xd_info[XD_MAXDRIVES];
+
+/* If you try this driver and find that your card is not detected by the driver at bootup, you need to add your BIOS
+   signature and details to the following list of signatures. A BIOS signature is a string embedded into the first
+   few bytes of your controller's on-board ROM BIOS. To find out what yours is, use something like MS-DOS's DEBUG
+   command. Run DEBUG, and then you can examine your BIOS signature with:
+
+	d xxxx:0000
+
+   where xxxx is the segment of your controller (like C800 or D000 or something). On the ASCII dump at the right, you should
+   be able to see a string mentioning the manufacturer's copyright etc. Add this string into the table below. The parameters
+   in the table are, in order:
+
+	offset			; this is the offset (in bytes) from the start of your ROM where the signature starts
+	signature		; this is the actual text of the signature
+	xd_?_init_controller	; this is the controller init routine used by your controller
+	xd_?_init_drive		; this is the drive init routine used by your controller
+
+   The controllers directly supported at the moment are: DTC 5150x, WD 1004A27X, ST11M/R and override. If your controller is
+   made by the same manufacturer as one of these, try using the same init routines as they do. If that doesn't work, your
+   best bet is to use the "override" routines. These routines use a "portable" method of getting the disk's geometry, and
+   may work with your card. If none of these seem to work, try sending me some email and I'll see what I can do <grin>.
+
+   NOTE: You can now specify your XT controller's parameters from the command line in the form xd=TYPE,IRQ,IO,DMA. The driver
+   should be able to detect your drive's geometry from this info. (eg: xd=0,5,0x320,3 is the "standard"). */
+
+#include <asm/page.h>
+#define xd_dma_mem_alloc(size) __get_dma_pages(GFP_KERNEL,get_order(size))
+#define xd_dma_mem_free(addr, size) free_pages(addr, get_order(size))
+static char *xd_dma_buffer;
+
+static XD_SIGNATURE xd_sigs[] __initdata = {
+	{ 0x0000,"Override geometry handler",NULL,xd_override_init_drive,"n unknown" }, /* Pat Mackinlay, pat@it.com.au */
+	{ 0x0008,"[BXD06 (C) DTC 17-MAY-1985]",xd_dtc_init_controller,xd_dtc5150cx_init_drive," DTC 5150CX" }, /* Andrzej Krzysztofowicz, ankry@mif.pg.gda.pl */
+	{ 0x000B,"CRD18A   Not an IBM rom. (C) Copyright Data Technology Corp. 05/31/88",xd_dtc_init_controller,xd_dtc_init_drive," DTC 5150X" }, /* Todd Fries, tfries@umr.edu */
+	{ 0x000B,"CXD23A Not an IBM ROM (C)Copyright Data Technology Corp 12/03/88",xd_dtc_init_controller,xd_dtc_init_drive," DTC 5150X" }, /* Pat Mackinlay, pat@it.com.au */
+	{ 0x0008,"07/15/86(C) Copyright 1986 Western Digital Corp.",xd_wd_init_controller,xd_wd_init_drive," Western Dig. 1002-27X" }, /* Andrzej Krzysztofowicz, ankry@mif.pg.gda.pl */
+	{ 0x0008,"06/24/88(C) Copyright 1988 Western Digital Corp.",xd_wd_init_controller,xd_wd_init_drive," Western Dig. WDXT-GEN2" }, /* Dan Newcombe, newcombe@aa.csc.peachnet.edu */
+	{ 0x0015,"SEAGATE ST11 BIOS REVISION",xd_seagate_init_controller,xd_seagate_init_drive," Seagate ST11M/R" }, /* Salvador Abreu, spa@fct.unl.pt */
+	{ 0x0010,"ST11R BIOS",xd_seagate_init_controller,xd_seagate_init_drive," Seagate ST11M/R" }, /* Risto Kankkunen, risto.kankkunen@cs.helsinki.fi */
+	{ 0x0010,"ST11 BIOS v1.7",xd_seagate_init_controller,xd_seagate_init_drive," Seagate ST11R" }, /* Alan Hourihane, alanh@fairlite.demon.co.uk */
+	{ 0x1000,"(c)Copyright 1987 SMS",xd_omti_init_controller,xd_omti_init_drive,"n OMTI 5520" }, /* Dirk Melchers, dirk@merlin.nbg.sub.org */
+	{ 0x0006,"COPYRIGHT XEBEC (C) 1984",xd_xebec_init_controller,xd_xebec_init_drive," XEBEC" }, /* Andrzej Krzysztofowicz, ankry@mif.pg.gda.pl */
+	{ 0x0008,"(C) Copyright 1984 Western Digital Corp", xd_wd_init_controller, xd_wd_init_drive," Western Dig. 1002s-wx2" },
+	{ 0x0008,"(C) Copyright 1986 Western Digital Corporation", xd_wd_init_controller, xd_wd_init_drive," 1986 Western Digital" }, /* jfree@sovereign.org */
+};
+
+static unsigned int xd_bases[] __initdata =
+{
+	0xC8000, 0xCA000, 0xCC000,
+	0xCE000, 0xD0000, 0xD2000,
+	0xD4000, 0xD6000, 0xD8000,
+	0xDA000, 0xDC000, 0xDE000,
+	0xE0000
+};
+
+static DEFINE_SPINLOCK(xd_lock);
+
+static struct gendisk *xd_gendisk[2];
+
+static int xd_getgeo(struct block_device *bdev, struct hd_geometry *geo);
+
+static const struct block_device_operations xd_fops = {
+	.owner	= THIS_MODULE,
+	.ioctl	= xd_ioctl,
+	.getgeo = xd_getgeo,
+};
+static DECLARE_WAIT_QUEUE_HEAD(xd_wait_int);
+static u_char xd_drives, xd_irq = 5, xd_dma = 3, xd_maxsectors;
+static u_char xd_override __initdata = 0, xd_type __initdata = 0;
+static u_short xd_iobase = 0x320;
+static int xd_geo[XD_MAXDRIVES*3] __initdata = { 0, };
+
+static volatile int xdc_busy;
+static struct timer_list xd_watchdog_int;
+
+static volatile u_char xd_error;
+static int nodma = XD_DONT_USE_DMA;
+
+static struct request_queue *xd_queue;
+
+/* xd_init: register the block device number and set up pointer tables */
+static int __init xd_init(void)
+{
+	u_char i,controller;
+	unsigned int address;
+	int err;
+
+#ifdef MODULE
+	{
+		u_char count = 0;
+		for (i = 4; i > 0; i--)
+			if (((xd[i] = xd[i-1]) >= 0) && !count)
+				count = i;
+		if ((xd[0] = count))
+			do_xd_setup(xd);
+	}
+#endif
+
+	init_timer (&xd_watchdog_int); xd_watchdog_int.function = xd_watchdog;
+
+	err = -EBUSY;
+	if (register_blkdev(XT_DISK_MAJOR, "xd"))
+		goto out1;
+
+	err = -ENOMEM;
+	xd_queue = blk_init_queue(do_xd_request, &xd_lock);
+	if (!xd_queue)
+		goto out1a;
+
+	if (xd_detect(&controller,&address)) {
+
+		printk("Detected a%s controller (type %d) at address %06x\n",
+			xd_sigs[controller].name,controller,address);
+		if (!request_region(xd_iobase,4,"xd")) {
+			printk("xd: Ports at 0x%x are not available\n",
+				xd_iobase);
+			goto out2;
+		}
+		if (controller)
+			xd_sigs[controller].init_controller(address);
+		xd_drives = xd_initdrives(xd_sigs[controller].init_drive);
+		
+		printk("Detected %d hard drive%s (using IRQ%d & DMA%d)\n",
+			xd_drives,xd_drives == 1 ? "" : "s",xd_irq,xd_dma);
+	}
+
+	/*
+	 * With the drive detected, xd_maxsectors should now be known.
+	 * If xd_maxsectors is 0, nothing was detected and we fall through
+	 * to return -ENODEV
+	 */
+	if (!xd_dma_buffer && xd_maxsectors) {
+		xd_dma_buffer = (char *)xd_dma_mem_alloc(xd_maxsectors * 0x200);
+		if (!xd_dma_buffer) {
+			printk(KERN_ERR "xd: Out of memory.\n");
+			goto out3;
+		}
+	}
+
+	err = -ENODEV;
+	if (!xd_drives)
+		goto out3;
+
+	for (i = 0; i < xd_drives; i++) {
+		XD_INFO *p = &xd_info[i];
+		struct gendisk *disk = alloc_disk(64);
+		if (!disk)
+			goto Enomem;
+		p->unit = i;
+		disk->major = XT_DISK_MAJOR;
+		disk->first_minor = i<<6;
+		sprintf(disk->disk_name, "xd%c", i+'a');
+		disk->fops = &xd_fops;
+		disk->private_data = p;
+		disk->queue = xd_queue;
+		set_capacity(disk, p->heads * p->cylinders * p->sectors);
+		printk(" %s: CHS=%d/%d/%d\n", disk->disk_name,
+			p->cylinders, p->heads, p->sectors);
+		xd_gendisk[i] = disk;
+	}
+
+	err = -EBUSY;
+	if (request_irq(xd_irq,xd_interrupt_handler, 0, "XT hard disk", NULL)) {
+		printk("xd: unable to get IRQ%d\n",xd_irq);
+		goto out4;
+	}
+
+	if (request_dma(xd_dma,"xd")) {
+		printk("xd: unable to get DMA%d\n",xd_dma);
+		goto out5;
+	}
+
+	/* xd_maxsectors depends on controller - so set after detection */
+	blk_queue_max_hw_sectors(xd_queue, xd_maxsectors);
+
+	for (i = 0; i < xd_drives; i++)
+		add_disk(xd_gendisk[i]);
+
+	return 0;
+
+out5:
+	free_irq(xd_irq, NULL);
+out4:
+	for (i = 0; i < xd_drives; i++)
+		put_disk(xd_gendisk[i]);
+out3:
+	if (xd_maxsectors)
+		release_region(xd_iobase,4);
+
+	if (xd_dma_buffer)
+		xd_dma_mem_free((unsigned long)xd_dma_buffer,
+				xd_maxsectors * 0x200);
+out2:
+	blk_cleanup_queue(xd_queue);
+out1a:
+	unregister_blkdev(XT_DISK_MAJOR, "xd");
+out1:
+	return err;
+Enomem:
+	err = -ENOMEM;
+	while (i--)
+		put_disk(xd_gendisk[i]);
+	goto out3;
+}
+
+/* xd_detect: scan the possible BIOS ROM locations for the signature strings */
+static u_char __init xd_detect (u_char *controller, unsigned int *address)
+{
+	int i, j;
+
+	if (xd_override)
+	{
+		*controller = xd_type;
+		*address = 0;
+		return(1);
+	}
+
+	for (i = 0; i < ARRAY_SIZE(xd_bases); i++) {
+		void __iomem *p = ioremap(xd_bases[i], 0x2000);
+		if (!p)
+			continue;
+		for (j = 1; j < ARRAY_SIZE(xd_sigs); j++) {
+			const char *s = xd_sigs[j].string;
+			if (check_signature(p + xd_sigs[j].offset, s, strlen(s))) {
+				*controller = j;
+				xd_type = j;
+				*address = xd_bases[i];
+				iounmap(p);
+				return 1;
+			}
+		}
+		iounmap(p);
+	}
+	return 0;
+}
+
+/* do_xd_request: handle an incoming request */
+static void do_xd_request (struct request_queue * q)
+{
+	struct request *req;
+
+	if (xdc_busy)
+		return;
+
+	req = blk_fetch_request(q);
+	while (req) {
+		unsigned block = blk_rq_pos(req);
+		unsigned count = blk_rq_cur_sectors(req);
+		XD_INFO *disk = req->rq_disk->private_data;
+		int res = -EIO;
+		int retry;
+
+		if (req->cmd_type != REQ_TYPE_FS)
+			goto done;
+		if (block + count > get_capacity(req->rq_disk))
+			goto done;
+		for (retry = 0; (retry < XD_RETRIES) && !res; retry++)
+			res = xd_readwrite(rq_data_dir(req), disk, req->buffer,
+					   block, count);
+	done:
+		/* wrap up, 0 = success, -errno = fail */
+		if (!__blk_end_request_cur(req, res))
+			req = blk_fetch_request(q);
+	}
+}
+
+static int xd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
+{
+	XD_INFO *p = bdev->bd_disk->private_data;
+
+	geo->heads = p->heads;
+	geo->sectors = p->sectors;
+	geo->cylinders = p->cylinders;
+	return 0;
+}
+
+/* xd_ioctl: handle device ioctl's */
+static int xd_locked_ioctl(struct block_device *bdev, fmode_t mode, u_int cmd, u_long arg)
+{
+	switch (cmd) {
+		case HDIO_SET_DMA:
+			if (!capable(CAP_SYS_ADMIN)) return -EACCES;
+			if (xdc_busy) return -EBUSY;
+			nodma = !arg;
+			if (nodma && xd_dma_buffer) {
+				xd_dma_mem_free((unsigned long)xd_dma_buffer,
+						xd_maxsectors * 0x200);
+				xd_dma_buffer = NULL;
+			} else if (!nodma && !xd_dma_buffer) {
+				xd_dma_buffer = (char *)xd_dma_mem_alloc(xd_maxsectors * 0x200);
+				if (!xd_dma_buffer) {
+					nodma = XD_DONT_USE_DMA;
+					return -ENOMEM;
+				}
+			}
+			return 0;
+		case HDIO_GET_DMA:
+			return put_user(!nodma, (long __user *) arg);
+		case HDIO_GET_MULTCOUNT:
+			return put_user(xd_maxsectors, (long __user *) arg);
+		default:
+			return -EINVAL;
+	}
+}
+
+static int xd_ioctl(struct block_device *bdev, fmode_t mode,
+			     unsigned int cmd, unsigned long param)
+{
+	int ret;
+
+	mutex_lock(&xd_mutex);
+	ret = xd_locked_ioctl(bdev, mode, cmd, param);
+	mutex_unlock(&xd_mutex);
+
+	return ret;
+}
+
+/* xd_readwrite: handle a read/write request */
+static int xd_readwrite (u_char operation,XD_INFO *p,char *buffer,u_int block,u_int count)
+{
+	int drive = p->unit;
+	u_char cmdblk[6],sense[4];
+	u_short track,cylinder;
+	u_char head,sector,control,mode = PIO_MODE,temp;
+	char **real_buffer;
+	register int i;
+	
+#ifdef DEBUG_READWRITE
+	printk("xd_readwrite: operation = %s, drive = %d, buffer = 0x%X, block = %d, count = %d\n",operation == READ ? "read" : "write",drive,buffer,block,count);
+#endif /* DEBUG_READWRITE */
+
+	spin_unlock_irq(&xd_lock);
+
+	control = p->control;
+	if (!xd_dma_buffer)
+		xd_dma_buffer = (char *)xd_dma_mem_alloc(xd_maxsectors * 0x200);
+	while (count) {
+		temp = count < xd_maxsectors ? count : xd_maxsectors;
+
+		track = block / p->sectors;
+		head = track % p->heads;
+		cylinder = track / p->heads;
+		sector = block % p->sectors;
+
+#ifdef DEBUG_READWRITE
+		printk("xd_readwrite: drive = %d, head = %d, cylinder = %d, sector = %d, count = %d\n",drive,head,cylinder,sector,temp);
+#endif /* DEBUG_READWRITE */
+
+		if (xd_dma_buffer) {
+			mode = xd_setup_dma(operation == READ ? DMA_MODE_READ : DMA_MODE_WRITE,(u_char *)(xd_dma_buffer),temp * 0x200);
+			real_buffer = &xd_dma_buffer;
+			for (i=0; i < (temp * 0x200); i++)
+				xd_dma_buffer[i] = buffer[i];
+		}
+		else
+			real_buffer = &buffer;
+
+		xd_build(cmdblk,operation == READ ? CMD_READ : CMD_WRITE,drive,head,cylinder,sector,temp & 0xFF,control);
+
+		switch (xd_command(cmdblk,mode,(u_char *)(*real_buffer),(u_char *)(*real_buffer),sense,XD_TIMEOUT)) {
+			case 1:
+				printk("xd%c: %s timeout, recalibrating drive\n",'a'+drive,(operation == READ ? "read" : "write"));
+				xd_recalibrate(drive);
+				spin_lock_irq(&xd_lock);
+				return -EIO;
+			case 2:
+				if (sense[0] & 0x30) {
+					printk("xd%c: %s - ",'a'+drive,(operation == READ ? "reading" : "writing"));
+					switch ((sense[0] & 0x30) >> 4) {
+					case 0: printk("drive error, code = 0x%X",sense[0] & 0x0F);
+						break;
+					case 1: printk("controller error, code = 0x%X",sense[0] & 0x0F);
+						break;
+					case 2: printk("command error, code = 0x%X",sense[0] & 0x0F);
+						break;
+					case 3: printk("miscellaneous error, code = 0x%X",sense[0] & 0x0F);
+						break;
+					}
+				}
+				if (sense[0] & 0x80)
+					printk(" - CHS = %d/%d/%d\n",((sense[2] & 0xC0) << 2) | sense[3],sense[1] & 0x1F,sense[2] & 0x3F);
+				/*	reported drive number = (sense[1] & 0xE0) >> 5 */
+				else
+					printk(" - no valid disk address\n");
+				spin_lock_irq(&xd_lock);
+				return -EIO;
+		}
+		if (xd_dma_buffer)
+			for (i=0; i < (temp * 0x200); i++)
+				buffer[i] = xd_dma_buffer[i];
+
+		count -= temp, buffer += temp * 0x200, block += temp;
+	}
+	spin_lock_irq(&xd_lock);
+	return 0;
+}
+
+/* xd_recalibrate: recalibrate a given drive and reset controller if necessary */
+static void xd_recalibrate (u_char drive)
+{
+	u_char cmdblk[6];
+	
+	xd_build(cmdblk,CMD_RECALIBRATE,drive,0,0,0,0,0);
+	if (xd_command(cmdblk,PIO_MODE,NULL,NULL,NULL,XD_TIMEOUT * 8))
+		printk("xd%c: warning! error recalibrating, controller may be unstable\n", 'a'+drive);
+}
+
+/* xd_interrupt_handler: interrupt service routine */
+static irqreturn_t xd_interrupt_handler(int irq, void *dev_id)
+{
+	if (inb(XD_STATUS) & STAT_INTERRUPT) {							/* check if it was our device */
+#ifdef DEBUG_OTHER
+		printk("xd_interrupt_handler: interrupt detected\n");
+#endif /* DEBUG_OTHER */
+		outb(0,XD_CONTROL);								/* acknowledge interrupt */
+		wake_up(&xd_wait_int);	/* and wake up sleeping processes */
+		return IRQ_HANDLED;
+	}
+	else
+		printk("xd: unexpected interrupt\n");
+	return IRQ_NONE;
+}
+
+/* xd_setup_dma: set up the DMA controller for a data transfer */
+static u_char xd_setup_dma (u_char mode,u_char *buffer,u_int count)
+{
+	unsigned long f;
+	
+	if (nodma)
+		return (PIO_MODE);
+	if (((unsigned long) buffer & 0xFFFF0000) != (((unsigned long) buffer + count) & 0xFFFF0000)) {
+#ifdef DEBUG_OTHER
+		printk("xd_setup_dma: using PIO, transfer overlaps 64k boundary\n");
+#endif /* DEBUG_OTHER */
+		return (PIO_MODE);
+	}
+	
+	f=claim_dma_lock();
+	disable_dma(xd_dma);
+	clear_dma_ff(xd_dma);
+	set_dma_mode(xd_dma,mode);
+	set_dma_addr(xd_dma, (unsigned long) buffer);
+	set_dma_count(xd_dma,count);
+	
+	release_dma_lock(f);
+
+	return (DMA_MODE);			/* use DMA and INT */
+}
+
+/* xd_build: put stuff into an array in a format suitable for the controller */
+static u_char *xd_build (u_char *cmdblk,u_char command,u_char drive,u_char head,u_short cylinder,u_char sector,u_char count,u_char control)
+{
+	cmdblk[0] = command;
+	cmdblk[1] = ((drive & 0x07) << 5) | (head & 0x1F);
+	cmdblk[2] = ((cylinder & 0x300) >> 2) | (sector & 0x3F);
+	cmdblk[3] = cylinder & 0xFF;
+	cmdblk[4] = count;
+	cmdblk[5] = control;
+	
+	return (cmdblk);
+}
+
+static void xd_watchdog (unsigned long unused)
+{
+	xd_error = 1;
+	wake_up(&xd_wait_int);
+}
+
+/* xd_waitport: waits until port & mask == flags or a timeout occurs. return 1 for a timeout */
+static inline u_char xd_waitport (u_short port,u_char flags,u_char mask,u_long timeout)
+{
+	u_long expiry = jiffies + timeout;
+	int success;
+
+	xdc_busy = 1;
+	while ((success = ((inb(port) & mask) != flags)) && time_before(jiffies, expiry))
+		schedule_timeout_uninterruptible(1);
+	xdc_busy = 0;
+	return (success);
+}
+
+static inline u_int xd_wait_for_IRQ (void)
+{
+	unsigned long flags;
+	xd_watchdog_int.expires = jiffies + 8 * HZ;
+	add_timer(&xd_watchdog_int);
+	
+	flags=claim_dma_lock();
+	enable_dma(xd_dma);
+	release_dma_lock(flags);
+	
+	sleep_on(&xd_wait_int);
+	del_timer(&xd_watchdog_int);
+	xdc_busy = 0;
+	
+	flags=claim_dma_lock();
+	disable_dma(xd_dma);
+	release_dma_lock(flags);
+	
+	if (xd_error) {
+		printk("xd: missed IRQ - command aborted\n");
+		xd_error = 0;
+		return (1);
+	}
+	return (0);
+}
+
+/* xd_command: handle all data transfers necessary for a single command */
+static u_int xd_command (u_char *command,u_char mode,u_char *indata,u_char *outdata,u_char *sense,u_long timeout)
+{
+	u_char cmdblk[6],csb,complete = 0;
+
+#ifdef DEBUG_COMMAND
+	printk("xd_command: command = 0x%X, mode = 0x%X, indata = 0x%X, outdata = 0x%X, sense = 0x%X\n",command,mode,indata,outdata,sense);
+#endif /* DEBUG_COMMAND */
+
+	outb(0,XD_SELECT);
+	outb(mode,XD_CONTROL);
+
+	if (xd_waitport(XD_STATUS,STAT_SELECT,STAT_SELECT,timeout))
+		return (1);
+
+	while (!complete) {
+		if (xd_waitport(XD_STATUS,STAT_READY,STAT_READY,timeout))
+			return (1);
+
+		switch (inb(XD_STATUS) & (STAT_COMMAND | STAT_INPUT)) {
+			case 0:
+				if (mode == DMA_MODE) {
+					if (xd_wait_for_IRQ())
+						return (1);
+				} else
+					outb(outdata ? *outdata++ : 0,XD_DATA);
+				break;
+			case STAT_INPUT:
+				if (mode == DMA_MODE) {
+					if (xd_wait_for_IRQ())
+						return (1);
+				} else
+					if (indata)
+						*indata++ = inb(XD_DATA);
+					else
+						inb(XD_DATA);
+				break;
+			case STAT_COMMAND:
+				outb(command ? *command++ : 0,XD_DATA);
+				break;
+			case STAT_COMMAND | STAT_INPUT:
+				complete = 1;
+				break;
+		}
+	}
+	csb = inb(XD_DATA);
+
+	if (xd_waitport(XD_STATUS,0,STAT_SELECT,timeout))					/* wait until deselected */
+		return (1);
+
+	if (csb & CSB_ERROR) {									/* read sense data if error */
+		xd_build(cmdblk,CMD_SENSE,(csb & CSB_LUN) >> 5,0,0,0,0,0);
+		if (xd_command(cmdblk,0,sense,NULL,NULL,XD_TIMEOUT))
+			printk("xd: warning! sense command failed!\n");
+	}
+
+#ifdef DEBUG_COMMAND
+	printk("xd_command: completed with csb = 0x%X\n",csb);
+#endif /* DEBUG_COMMAND */
+
+	return (csb & CSB_ERROR);
+}
+
+static u_char __init xd_initdrives (void (*init_drive)(u_char drive))
+{
+	u_char cmdblk[6],i,count = 0;
+
+	for (i = 0; i < XD_MAXDRIVES; i++) {
+		xd_build(cmdblk,CMD_TESTREADY,i,0,0,0,0,0);
+		if (!xd_command(cmdblk,PIO_MODE,NULL,NULL,NULL,XD_TIMEOUT*8)) {
+			msleep_interruptible(XD_INIT_DISK_DELAY);
+
+			init_drive(count);
+			count++;
+
+			msleep_interruptible(XD_INIT_DISK_DELAY);
+		}
+	}
+	return (count);
+}
+
+static void __init xd_manual_geo_set (u_char drive)
+{
+	xd_info[drive].heads = (u_char)(xd_geo[3 * drive + 1]);
+	xd_info[drive].cylinders = (u_short)(xd_geo[3 * drive]);
+	xd_info[drive].sectors = (u_char)(xd_geo[3 * drive + 2]);
+}
+
+static void __init xd_dtc_init_controller (unsigned int address)
+{
+	switch (address) {
+		case 0x00000:
+		case 0xC8000:	break;			/*initial: 0x320 */
+		case 0xCA000:	xd_iobase = 0x324; 
+		case 0xD0000:				/*5150CX*/
+		case 0xD8000:	break;			/*5150CX & 5150XL*/
+		default:        printk("xd_dtc_init_controller: unsupported BIOS address %06x\n",address);
+				break;
+	}
+	xd_maxsectors = 0x01;		/* my card seems to have trouble doing multi-block transfers? */
+
+	outb(0,XD_RESET);		/* reset the controller */
+}
+
+
+static void __init xd_dtc5150cx_init_drive (u_char drive)
+{
+	/* values from controller's BIOS - BIOS chip may be removed */
+	static u_short geometry_table[][4] = {
+		{0x200,8,0x200,0x100},
+		{0x267,2,0x267,0x267},
+		{0x264,4,0x264,0x80},
+		{0x132,4,0x132,0x0},
+		{0x132,2,0x80, 0x132},
+		{0x177,8,0x177,0x0},
+		{0x132,8,0x84, 0x0},
+		{},  /* not used */
+		{0x132,6,0x80, 0x100},
+		{0x200,6,0x100,0x100},
+		{0x264,2,0x264,0x80},
+		{0x280,4,0x280,0x100},
+		{0x2B9,3,0x2B9,0x2B9},
+		{0x2B9,5,0x2B9,0x2B9},
+		{0x280,6,0x280,0x100},
+		{0x132,4,0x132,0x0}};
+	u_char n;
+
+	n = inb(XD_JUMPER);
+	n = (drive ? n : (n >> 2)) & 0x33;
+	n = (n | (n >> 2)) & 0x0F;
+	if (xd_geo[3*drive])
+		xd_manual_geo_set(drive);
+	else
+		if (n != 7) {	
+			xd_info[drive].heads = (u_char)(geometry_table[n][1]);			/* heads */
+			xd_info[drive].cylinders = geometry_table[n][0];	/* cylinders */
+			xd_info[drive].sectors = 17;				/* sectors */
+#if 0
+			xd_info[drive].rwrite = geometry_table[n][2];	/* reduced write */
+			xd_info[drive].precomp = geometry_table[n][3]		/* write precomp */
+			xd_info[drive].ecc = 0x0B;				/* ecc length */
+#endif /* 0 */
+		}
+		else {
+			printk("xd%c: undetermined drive geometry\n",'a'+drive);
+			return;
+		}
+	xd_info[drive].control = 5;				/* control byte */
+	xd_setparam(CMD_DTCSETPARAM,drive,xd_info[drive].heads,xd_info[drive].cylinders,geometry_table[n][2],geometry_table[n][3],0x0B);
+	xd_recalibrate(drive);
+}
+
+static void __init xd_dtc_init_drive (u_char drive)
+{
+	u_char cmdblk[6],buf[64];
+
+	xd_build(cmdblk,CMD_DTCGETGEOM,drive,0,0,0,0,0);
+	if (!xd_command(cmdblk,PIO_MODE,buf,NULL,NULL,XD_TIMEOUT * 2)) {
+		xd_info[drive].heads = buf[0x0A];			/* heads */
+		xd_info[drive].cylinders = ((u_short *) (buf))[0x04];	/* cylinders */
+		xd_info[drive].sectors = 17;				/* sectors */
+		if (xd_geo[3*drive])
+			xd_manual_geo_set(drive);
+#if 0
+		xd_info[drive].rwrite = ((u_short *) (buf + 1))[0x05];	/* reduced write */
+		xd_info[drive].precomp = ((u_short *) (buf + 1))[0x06];	/* write precomp */
+		xd_info[drive].ecc = buf[0x0F];				/* ecc length */
+#endif /* 0 */
+		xd_info[drive].control = 0;				/* control byte */
+
+		xd_setparam(CMD_DTCSETPARAM,drive,xd_info[drive].heads,xd_info[drive].cylinders,((u_short *) (buf + 1))[0x05],((u_short *) (buf + 1))[0x06],buf[0x0F]);
+		xd_build(cmdblk,CMD_DTCSETSTEP,drive,0,0,0,0,7);
+		if (xd_command(cmdblk,PIO_MODE,NULL,NULL,NULL,XD_TIMEOUT * 2))
+			printk("xd_dtc_init_drive: error setting step rate for xd%c\n", 'a'+drive);
+	}
+	else
+		printk("xd_dtc_init_drive: error reading geometry for xd%c\n", 'a'+drive);
+}
+
+static void __init xd_wd_init_controller (unsigned int address)
+{
+	switch (address) {
+		case 0x00000:
+		case 0xC8000:	break;			/*initial: 0x320 */
+		case 0xCA000:	xd_iobase = 0x324; break;
+		case 0xCC000:   xd_iobase = 0x328; break;
+		case 0xCE000:   xd_iobase = 0x32C; break;
+		case 0xD0000:	xd_iobase = 0x328; break; /* ? */
+		case 0xD8000:	xd_iobase = 0x32C; break; /* ? */
+		default:        printk("xd_wd_init_controller: unsupported BIOS address %06x\n",address);
+				break;
+	}
+	xd_maxsectors = 0x01;		/* this one doesn't wrap properly either... */
+
+	outb(0,XD_RESET);		/* reset the controller */
+
+	msleep(XD_INIT_DISK_DELAY);
+}
+
+static void __init xd_wd_init_drive (u_char drive)
+{
+	/* values from controller's BIOS - BIOS may be disabled */
+	static u_short geometry_table[][4] = {
+		{0x264,4,0x1C2,0x1C2},   /* common part */
+		{0x132,4,0x099,0x0},
+		{0x267,2,0x1C2,0x1C2},
+		{0x267,4,0x1C2,0x1C2},
+
+		{0x334,6,0x335,0x335},   /* 1004 series RLL */
+		{0x30E,4,0x30F,0x3DC},
+		{0x30E,2,0x30F,0x30F},
+		{0x267,4,0x268,0x268},
+
+		{0x3D5,5,0x3D6,0x3D6},   /* 1002 series RLL */
+		{0x3DB,7,0x3DC,0x3DC},
+		{0x264,4,0x265,0x265},
+		{0x267,4,0x268,0x268}};
+
+	u_char cmdblk[6],buf[0x200];
+	u_char n = 0,rll,jumper_state,use_jumper_geo;
+	u_char wd_1002 = (xd_sigs[xd_type].string[7] == '6');
+	
+	jumper_state = ~(inb(0x322));
+	if (jumper_state & 0x40)
+		xd_irq = 9;
+	rll = (jumper_state & 0x30) ? (0x04 << wd_1002) : 0;
+	xd_build(cmdblk,CMD_READ,drive,0,0,0,1,0);
+	if (!xd_command(cmdblk,PIO_MODE,buf,NULL,NULL,XD_TIMEOUT * 2)) {
+		xd_info[drive].heads = buf[0x1AF];				/* heads */
+		xd_info[drive].cylinders = ((u_short *) (buf + 1))[0xD6];	/* cylinders */
+		xd_info[drive].sectors = 17;					/* sectors */
+		if (xd_geo[3*drive])
+			xd_manual_geo_set(drive);
+#if 0
+		xd_info[drive].rwrite = ((u_short *) (buf))[0xD8];		/* reduced write */
+		xd_info[drive].wprecomp = ((u_short *) (buf))[0xDA];		/* write precomp */
+		xd_info[drive].ecc = buf[0x1B4];				/* ecc length */
+#endif /* 0 */
+		xd_info[drive].control = buf[0x1B5];				/* control byte */
+		use_jumper_geo = !(xd_info[drive].heads) || !(xd_info[drive].cylinders);
+		if (xd_geo[3*drive]) {
+			xd_manual_geo_set(drive);
+			xd_info[drive].control = rll ? 7 : 5;
+		}
+		else if (use_jumper_geo) {
+			n = (((jumper_state & 0x0F) >> (drive << 1)) & 0x03) | rll;
+			xd_info[drive].cylinders = geometry_table[n][0];
+			xd_info[drive].heads = (u_char)(geometry_table[n][1]);
+			xd_info[drive].control = rll ? 7 : 5;
+#if 0
+			xd_info[drive].rwrite = geometry_table[n][2];
+			xd_info[drive].wprecomp = geometry_table[n][3];
+			xd_info[drive].ecc = 0x0B;
+#endif /* 0 */
+		}
+		if (!wd_1002) {
+			if (use_jumper_geo)
+				xd_setparam(CMD_WDSETPARAM,drive,xd_info[drive].heads,xd_info[drive].cylinders,
+					geometry_table[n][2],geometry_table[n][3],0x0B);
+			else
+				xd_setparam(CMD_WDSETPARAM,drive,xd_info[drive].heads,xd_info[drive].cylinders,
+					((u_short *) (buf))[0xD8],((u_short *) (buf))[0xDA],buf[0x1B4]);
+		}
+	/* 1002 based RLL controller requests converted addressing, but reports physical 
+	   (physical 26 sec., logical 17 sec.) 
+	   1004 based ???? */
+		if (rll & wd_1002) {
+			if ((xd_info[drive].cylinders *= 26,
+			     xd_info[drive].cylinders /= 17) > 1023)
+				xd_info[drive].cylinders = 1023;  /* 1024 ? */
+#if 0
+			xd_info[drive].rwrite *= 26; 
+			xd_info[drive].rwrite /= 17;
+			xd_info[drive].wprecomp *= 26
+			xd_info[drive].wprecomp /= 17;
+#endif /* 0 */
+		}
+	}
+	else
+		printk("xd_wd_init_drive: error reading geometry for xd%c\n",'a'+drive);	
+
+}
+
+static void __init xd_seagate_init_controller (unsigned int address)
+{
+	switch (address) {
+		case 0x00000:
+		case 0xC8000:	break;			/*initial: 0x320 */
+		case 0xD0000:	xd_iobase = 0x324; break;
+		case 0xD8000:	xd_iobase = 0x328; break;
+		case 0xE0000:	xd_iobase = 0x32C; break;
+		default:	printk("xd_seagate_init_controller: unsupported BIOS address %06x\n",address);
+				break;
+	}
+	xd_maxsectors = 0x40;
+
+	outb(0,XD_RESET);		/* reset the controller */
+}
+
+static void __init xd_seagate_init_drive (u_char drive)
+{
+	u_char cmdblk[6],buf[0x200];
+
+	xd_build(cmdblk,CMD_ST11GETGEOM,drive,0,0,0,1,0);
+	if (!xd_command(cmdblk,PIO_MODE,buf,NULL,NULL,XD_TIMEOUT * 2)) {
+		xd_info[drive].heads = buf[0x04];				/* heads */
+		xd_info[drive].cylinders = (buf[0x02] << 8) | buf[0x03];	/* cylinders */
+		xd_info[drive].sectors = buf[0x05];				/* sectors */
+		xd_info[drive].control = 0;					/* control byte */
+	}
+	else
+		printk("xd_seagate_init_drive: error reading geometry from xd%c\n", 'a'+drive);
+}
+
+/* Omti support courtesy Dirk Melchers */
+static void __init xd_omti_init_controller (unsigned int address)
+{
+	switch (address) {
+		case 0x00000:
+		case 0xC8000:	break;			/*initial: 0x320 */
+		case 0xD0000:	xd_iobase = 0x324; break;
+		case 0xD8000:	xd_iobase = 0x328; break;
+		case 0xE0000:	xd_iobase = 0x32C; break;
+		default:	printk("xd_omti_init_controller: unsupported BIOS address %06x\n",address);
+				break;
+	}
+	
+	xd_maxsectors = 0x40;
+
+	outb(0,XD_RESET);		/* reset the controller */
+}
+
+static void __init xd_omti_init_drive (u_char drive)
+{
+	/* gets infos from drive */
+	xd_override_init_drive(drive);
+
+	/* set other parameters, Hardcoded, not that nice :-) */
+	xd_info[drive].control = 2;
+}
+
+/* Xebec support (AK) */
+static void __init xd_xebec_init_controller (unsigned int address)
+{
+/* iobase may be set manually in range 0x300 - 0x33C
+      irq may be set manually to 2(9),3,4,5,6,7
+      dma may be set manually to 1,2,3
+	(How to detect them ???)
+BIOS address may be set manually in range 0x0 - 0xF8000
+If you need non-standard settings use the xd=... command */
+
+	switch (address) {
+		case 0x00000:
+		case 0xC8000:	/* initially: xd_iobase==0x320 */
+		case 0xD0000:
+		case 0xD2000:
+		case 0xD4000:
+		case 0xD6000:
+		case 0xD8000:
+		case 0xDA000:
+		case 0xDC000:
+		case 0xDE000:
+		case 0xE0000:	break;
+		default:	printk("xd_xebec_init_controller: unsupported BIOS address %06x\n",address);
+				break;
+		}
+
+	xd_maxsectors = 0x01;
+	outb(0,XD_RESET);		/* reset the controller */
+
+	msleep(XD_INIT_DISK_DELAY);
+}
+
+static void __init xd_xebec_init_drive (u_char drive)
+{
+	/* values from controller's BIOS - BIOS chip may be removed */
+	static u_short geometry_table[][5] = {
+		{0x132,4,0x080,0x080,0x7},
+		{0x132,4,0x080,0x080,0x17},
+		{0x264,2,0x100,0x100,0x7},
+		{0x264,2,0x100,0x100,0x17},
+		{0x132,8,0x080,0x080,0x7},
+		{0x132,8,0x080,0x080,0x17},
+		{0x264,4,0x100,0x100,0x6},
+		{0x264,4,0x100,0x100,0x17},
+		{0x2BC,5,0x2BC,0x12C,0x6},
+		{0x3A5,4,0x3A5,0x3A5,0x7},
+		{0x26C,6,0x26C,0x26C,0x7},
+		{0x200,8,0x200,0x100,0x17},
+		{0x400,5,0x400,0x400,0x7},
+		{0x400,6,0x400,0x400,0x7},
+		{0x264,8,0x264,0x200,0x17},
+		{0x33E,7,0x33E,0x200,0x7}};
+	u_char n;
+
+	n = inb(XD_JUMPER) & 0x0F; /* BIOS's drive number: same geometry 
+					is assumed for BOTH drives */
+	if (xd_geo[3*drive])
+		xd_manual_geo_set(drive);
+	else {
+		xd_info[drive].heads = (u_char)(geometry_table[n][1]);			/* heads */
+		xd_info[drive].cylinders = geometry_table[n][0];	/* cylinders */
+		xd_info[drive].sectors = 17;				/* sectors */
+#if 0
+		xd_info[drive].rwrite = geometry_table[n][2];	/* reduced write */
+		xd_info[drive].precomp = geometry_table[n][3]		/* write precomp */
+		xd_info[drive].ecc = 0x0B;				/* ecc length */
+#endif /* 0 */
+	}
+	xd_info[drive].control = geometry_table[n][4];			/* control byte */
+	xd_setparam(CMD_XBSETPARAM,drive,xd_info[drive].heads,xd_info[drive].cylinders,geometry_table[n][2],geometry_table[n][3],0x0B);
+	xd_recalibrate(drive);
+}
+
+/* xd_override_init_drive: this finds disk geometry in a "binary search" style, narrowing in on the "correct" number of heads
+   etc. by trying values until it gets the highest successful value. Idea courtesy Salvador Abreu (spa@fct.unl.pt). */
+static void __init xd_override_init_drive (u_char drive)
+{
+	u_short min[] = { 0,0,0 },max[] = { 16,1024,64 },test[] = { 0,0,0 };
+	u_char cmdblk[6],i;
+
+	if (xd_geo[3*drive])
+		xd_manual_geo_set(drive);
+	else {
+		for (i = 0; i < 3; i++) {
+			while (min[i] != max[i] - 1) {
+				test[i] = (min[i] + max[i]) / 2;
+				xd_build(cmdblk,CMD_SEEK,drive,(u_char) test[0],(u_short) test[1],(u_char) test[2],0,0);
+				if (!xd_command(cmdblk,PIO_MODE,NULL,NULL,NULL,XD_TIMEOUT * 2))
+					min[i] = test[i];
+				else
+					max[i] = test[i];
+			}
+			test[i] = min[i];
+		}
+		xd_info[drive].heads = (u_char) min[0] + 1;
+		xd_info[drive].cylinders = (u_short) min[1] + 1;
+		xd_info[drive].sectors = (u_char) min[2] + 1;
+	}
+	xd_info[drive].control = 0;
+}
+
+/* xd_setup: initialise controller from command line parameters */
+static void __init do_xd_setup (int *integers)
+{
+	switch (integers[0]) {
+		case 4: if (integers[4] < 0)
+				nodma = 1;
+			else if (integers[4] < 8)
+				xd_dma = integers[4];
+		case 3: if ((integers[3] > 0) && (integers[3] <= 0x3FC))
+				xd_iobase = integers[3];
+		case 2: if ((integers[2] > 0) && (integers[2] < 16))
+				xd_irq = integers[2];
+		case 1: xd_override = 1;
+			if ((integers[1] >= 0) && (integers[1] < ARRAY_SIZE(xd_sigs)))
+				xd_type = integers[1];
+		case 0: break;
+		default:printk("xd: too many parameters for xd\n");
+	}
+	xd_maxsectors = 0x01;
+}
+
+/* xd_setparam: set the drive characteristics */
+static void __init xd_setparam (u_char command,u_char drive,u_char heads,u_short cylinders,u_short rwrite,u_short wprecomp,u_char ecc)
+{
+	u_char cmdblk[14];
+
+	xd_build(cmdblk,command,drive,0,0,0,0,0);
+	cmdblk[6] = (u_char) (cylinders >> 8) & 0x03;
+	cmdblk[7] = (u_char) (cylinders & 0xFF);
+	cmdblk[8] = heads & 0x1F;
+	cmdblk[9] = (u_char) (rwrite >> 8) & 0x03;
+	cmdblk[10] = (u_char) (rwrite & 0xFF);
+	cmdblk[11] = (u_char) (wprecomp >> 8) & 0x03;
+	cmdblk[12] = (u_char) (wprecomp & 0xFF);
+	cmdblk[13] = ecc;
+
+	/* Some controllers require geometry info as data, not command */
+
+	if (xd_command(cmdblk,PIO_MODE,NULL,&cmdblk[6],NULL,XD_TIMEOUT * 2))
+		printk("xd: error setting characteristics for xd%c\n", 'a'+drive);
+}
+
+
+#ifdef MODULE
+
+module_param_array(xd, int, NULL, 0);
+module_param_array(xd_geo, int, NULL, 0);
+module_param(nodma, bool, 0);
+
+MODULE_LICENSE("GPL");
+
+void cleanup_module(void)
+{
+	int i;
+	unregister_blkdev(XT_DISK_MAJOR, "xd");
+	for (i = 0; i < xd_drives; i++) {
+		del_gendisk(xd_gendisk[i]);
+		put_disk(xd_gendisk[i]);
+	}
+	blk_cleanup_queue(xd_queue);
+	release_region(xd_iobase,4);
+	if (xd_drives) {
+		free_irq(xd_irq, NULL);
+		free_dma(xd_dma);
+		if (xd_dma_buffer)
+			xd_dma_mem_free((unsigned long)xd_dma_buffer, xd_maxsectors * 0x200);
+	}
+}
+#else
+
+static int __init xd_setup (char *str)
+{
+	int ints[5];
+	get_options (str, ARRAY_SIZE (ints), ints);
+	do_xd_setup (ints);
+	return 1;
+}
+
+/* xd_manual_geo_init: initialise drive geometry from command line parameters
+   (used only for WD drives) */
+static int __init xd_manual_geo_init (char *str)
+{
+	int i, integers[1 + 3*XD_MAXDRIVES];
+
+	get_options (str, ARRAY_SIZE (integers), integers);
+	if (integers[0]%3 != 0) {
+		printk("xd: incorrect number of parameters for xd_geo\n");
+		return 1;
+	}
+	for (i = 0; (i < integers[0]) && (i < 3*XD_MAXDRIVES); i++)
+		xd_geo[i] = integers[i+1];
+	return 1;
+}
+
+__setup ("xd=", xd_setup);
+__setup ("xd_geo=", xd_manual_geo_init);
+
+#endif /* MODULE */
+
+module_init(xd_init);
+MODULE_ALIAS_BLOCKDEV_MAJOR(XT_DISK_MAJOR);
diff --git a/drivers/block/xd.h b/drivers/block/xd.h
new file mode 100644
index 00000000..37cacef1
--- /dev/null
+++ b/drivers/block/xd.h
@@ -0,0 +1,134 @@
+#ifndef _LINUX_XD_H
+#define _LINUX_XD_H
+
+/*
+ * This file contains the definitions for the IO ports and errors etc. for XT hard disk controllers (at least the DTC 5150X).
+ *
+ * Author: Pat Mackinlay, pat@it.com.au
+ * Date: 29/09/92
+ *
+ * Revised: 01/01/93, ...
+ *
+ * Ref: DTC 5150X Controller Specification (thanks to Kevin Fowler, kevinf@agora.rain.com)
+ * Also thanks to: Salvador Abreu, Dave Thaler, Risto Kankkunen and Wim Van Dorst.
+ */
+
+#include <linux/interrupt.h>
+
+/* XT hard disk controller registers */
+#define XD_DATA		(xd_iobase + 0x00)	/* data RW register */
+#define XD_RESET	(xd_iobase + 0x01)	/* reset WO register */
+#define XD_STATUS	(xd_iobase + 0x01)	/* status RO register */
+#define XD_SELECT	(xd_iobase + 0x02)	/* select WO register */
+#define XD_JUMPER	(xd_iobase + 0x02)	/* jumper RO register */
+#define XD_CONTROL	(xd_iobase + 0x03)	/* DMAE/INTE WO register */
+#define XD_RESERVED	(xd_iobase + 0x03)	/* reserved */
+
+/* XT hard disk controller commands (incomplete list) */
+#define CMD_TESTREADY	0x00	/* test drive ready */
+#define CMD_RECALIBRATE	0x01	/* recalibrate drive */
+#define CMD_SENSE	0x03	/* request sense */
+#define CMD_FORMATDRV	0x04	/* format drive */
+#define CMD_VERIFY	0x05	/* read verify */
+#define CMD_FORMATTRK	0x06	/* format track */
+#define CMD_FORMATBAD	0x07	/* format bad track */
+#define CMD_READ	0x08	/* read */
+#define CMD_WRITE	0x0A	/* write */
+#define CMD_SEEK	0x0B	/* seek */
+
+/* Controller specific commands */
+#define CMD_DTCSETPARAM	0x0C	/* set drive parameters (DTC 5150X & CX only?) */
+#define CMD_DTCGETECC	0x0D	/* get ecc error length (DTC 5150X only?) */
+#define CMD_DTCREADBUF	0x0E	/* read sector buffer (DTC 5150X only?) */
+#define CMD_DTCWRITEBUF 0x0F	/* write sector buffer (DTC 5150X only?) */
+#define CMD_DTCREMAPTRK	0x11	/* assign alternate track (DTC 5150X only?) */
+#define CMD_DTCGETPARAM	0xFB	/* get drive parameters (DTC 5150X only?) */
+#define CMD_DTCSETSTEP	0xFC	/* set step rate (DTC 5150X only?) */
+#define CMD_DTCSETGEOM	0xFE	/* set geometry data (DTC 5150X only?) */
+#define CMD_DTCGETGEOM	0xFF	/* get geometry data (DTC 5150X only?) */
+#define CMD_ST11GETGEOM 0xF8	/* get geometry data (Seagate ST11R/M only?) */
+#define CMD_WDSETPARAM	0x0C	/* set drive parameters (WD 1004A27X only?) */
+#define CMD_XBSETPARAM	0x0C	/* set drive parameters (XEBEC only?) */
+
+/* Bits for command status byte */
+#define CSB_ERROR	0x02	/* error */
+#define CSB_LUN		0x20	/* logical Unit Number */
+
+/* XT hard disk controller status bits */
+#define STAT_READY	0x01	/* controller is ready */
+#define STAT_INPUT	0x02	/* data flowing from controller to host */
+#define STAT_COMMAND	0x04	/* controller in command phase */
+#define STAT_SELECT	0x08	/* controller is selected */
+#define STAT_REQUEST	0x10	/* controller requesting data */
+#define STAT_INTERRUPT	0x20	/* controller requesting interrupt */
+
+/* XT hard disk controller control bits */
+#define PIO_MODE	0x00	/* control bits to set for PIO */
+#define DMA_MODE	0x03	/* control bits to set for DMA & interrupt */
+
+#define XD_MAXDRIVES	2	/* maximum 2 drives */
+#define XD_TIMEOUT	HZ	/* 1 second timeout */
+#define XD_RETRIES	4	/* maximum 4 retries */
+
+#undef DEBUG			/* define for debugging output */
+
+#ifdef DEBUG
+	#define DEBUG_STARTUP	/* debug driver initialisation */
+	#define DEBUG_OVERRIDE	/* debug override geometry detection */
+	#define DEBUG_READWRITE	/* debug each read/write command */
+	#define DEBUG_OTHER	/* debug misc. interrupt/DMA stuff */
+	#define DEBUG_COMMAND	/* debug each controller command */
+#endif /* DEBUG */
+
+/* this structure defines the XT drives and their types */
+typedef struct {
+	u_char heads;
+	u_short cylinders;
+	u_char sectors;
+	u_char control;
+	int unit;
+} XD_INFO;
+
+/* this structure defines a ROM BIOS signature */
+typedef struct {
+	unsigned int offset;
+	const char *string;
+	void (*init_controller)(unsigned int address);
+	void (*init_drive)(u_char drive);
+	const char *name;
+} XD_SIGNATURE;
+
+#ifndef MODULE
+static int xd_manual_geo_init (char *command);
+#endif /* MODULE */
+static u_char xd_detect (u_char *controller, unsigned int *address);
+static u_char xd_initdrives (void (*init_drive)(u_char drive));
+
+static void do_xd_request (struct request_queue * q);
+static int xd_ioctl (struct block_device *bdev,fmode_t mode,unsigned int cmd,unsigned long arg);
+static int xd_readwrite (u_char operation,XD_INFO *disk,char *buffer,u_int block,u_int count);
+static void xd_recalibrate (u_char drive);
+
+static irqreturn_t xd_interrupt_handler(int irq, void *dev_id);
+static u_char xd_setup_dma (u_char opcode,u_char *buffer,u_int count);
+static u_char *xd_build (u_char *cmdblk,u_char command,u_char drive,u_char head,u_short cylinder,u_char sector,u_char count,u_char control);
+static void xd_watchdog (unsigned long unused);
+static inline u_char xd_waitport (u_short port,u_char flags,u_char mask,u_long timeout);
+static u_int xd_command (u_char *command,u_char mode,u_char *indata,u_char *outdata,u_char *sense,u_long timeout);
+
+/* card specific setup and geometry gathering code */
+static void xd_dtc_init_controller (unsigned int address);
+static void xd_dtc5150cx_init_drive (u_char drive);
+static void xd_dtc_init_drive (u_char drive);
+static void xd_wd_init_controller (unsigned int address);
+static void xd_wd_init_drive (u_char drive);
+static void xd_seagate_init_controller (unsigned int address);
+static void xd_seagate_init_drive (u_char drive);
+static void xd_omti_init_controller (unsigned int address);
+static void xd_omti_init_drive (u_char drive);
+static void xd_xebec_init_controller (unsigned int address);
+static void xd_xebec_init_drive (u_char drive);
+static void xd_setparam (u_char command,u_char drive,u_char heads,u_short cylinders,u_short rwrite,u_short wprecomp,u_char ecc);
+static void xd_override_init_drive (u_char drive);
+
+#endif /* _LINUX_XD_H */
diff --git a/drivers/block/xen-blkback/Makefile b/drivers/block/xen-blkback/Makefile
new file mode 100644
index 00000000..e491c1b7
--- /dev/null
+++ b/drivers/block/xen-blkback/Makefile
@@ -0,0 +1,3 @@
+obj-$(CONFIG_XEN_BLKDEV_BACKEND) := xen-blkback.o
+
+xen-blkback-y	:= blkback.o xenbus.o
diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c
new file mode 100644
index 00000000..54139d0f
--- /dev/null
+++ b/drivers/block/xen-blkback/blkback.c
@@ -0,0 +1,826 @@
+/******************************************************************************
+ *
+ * Back-end of the driver for virtual block devices. This portion of the
+ * driver exports a 'unified' block-device interface that can be accessed
+ * by any operating system that implements a compatible front end. A
+ * reference front-end implementation can be found in:
+ *  drivers/block/xen-blkfront.c
+ *
+ * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
+ * Copyright (c) 2005, Christopher Clark
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <linux/spinlock.h>
+#include <linux/kthread.h>
+#include <linux/list.h>
+#include <linux/delay.h>
+#include <linux/freezer.h>
+
+#include <xen/events.h>
+#include <xen/page.h>
+#include <asm/xen/hypervisor.h>
+#include <asm/xen/hypercall.h>
+#include "common.h"
+
+/*
+ * These are rather arbitrary. They are fairly large because adjacent requests
+ * pulled from a communication ring are quite likely to end up being part of
+ * the same scatter/gather request at the disc.
+ *
+ * ** TRY INCREASING 'xen_blkif_reqs' IF WRITE SPEEDS SEEM TOO LOW **
+ *
+ * This will increase the chances of being able to write whole tracks.
+ * 64 should be enough to keep us competitive with Linux.
+ */
+static int xen_blkif_reqs = 64;
+module_param_named(reqs, xen_blkif_reqs, int, 0);
+MODULE_PARM_DESC(reqs, "Number of blkback requests to allocate");
+
+/* Run-time switchable: /sys/module/blkback/parameters/ */
+static unsigned int log_stats;
+module_param(log_stats, int, 0644);
+
+/*
+ * Each outstanding request that we've passed to the lower device layers has a
+ * 'pending_req' allocated to it. Each buffer_head that completes decrements
+ * the pendcnt towards zero. When it hits zero, the specified domain has a
+ * response queued for it, with the saved 'id' passed back.
+ */
+struct pending_req {
+	struct xen_blkif	*blkif;
+	u64			id;
+	int			nr_pages;
+	atomic_t		pendcnt;
+	unsigned short		operation;
+	int			status;
+	struct list_head	free_list;
+};
+
+#define BLKBACK_INVALID_HANDLE (~0)
+
+struct xen_blkbk {
+	struct pending_req	*pending_reqs;
+	/* List of all 'pending_req' available */
+	struct list_head	pending_free;
+	/* And its spinlock. */
+	spinlock_t		pending_free_lock;
+	wait_queue_head_t	pending_free_wq;
+	/* The list of all pages that are available. */
+	struct page		**pending_pages;
+	/* And the grant handles that are available. */
+	grant_handle_t		*pending_grant_handles;
+};
+
+static struct xen_blkbk *blkbk;
+
+/*
+ * Little helpful macro to figure out the index and virtual address of the
+ * pending_pages[..]. For each 'pending_req' we have have up to
+ * BLKIF_MAX_SEGMENTS_PER_REQUEST (11) pages. The seg would be from 0 through
+ * 10 and would index in the pending_pages[..].
+ */
+static inline int vaddr_pagenr(struct pending_req *req, int seg)
+{
+	return (req - blkbk->pending_reqs) *
+		BLKIF_MAX_SEGMENTS_PER_REQUEST + seg;
+}
+
+#define pending_page(req, seg) pending_pages[vaddr_pagenr(req, seg)]
+
+static inline unsigned long vaddr(struct pending_req *req, int seg)
+{
+	unsigned long pfn = page_to_pfn(blkbk->pending_page(req, seg));
+	return (unsigned long)pfn_to_kaddr(pfn);
+}
+
+#define pending_handle(_req, _seg) \
+	(blkbk->pending_grant_handles[vaddr_pagenr(_req, _seg)])
+
+
+static int do_block_io_op(struct xen_blkif *blkif);
+static int dispatch_rw_block_io(struct xen_blkif *blkif,
+				struct blkif_request *req,
+				struct pending_req *pending_req);
+static void make_response(struct xen_blkif *blkif, u64 id,
+			  unsigned short op, int st);
+
+/*
+ * Retrieve from the 'pending_reqs' a free pending_req structure to be used.
+ */
+static struct pending_req *alloc_req(void)
+{
+	struct pending_req *req = NULL;
+	unsigned long flags;
+
+	spin_lock_irqsave(&blkbk->pending_free_lock, flags);
+	if (!list_empty(&blkbk->pending_free)) {
+		req = list_entry(blkbk->pending_free.next, struct pending_req,
+				 free_list);
+		list_del(&req->free_list);
+	}
+	spin_unlock_irqrestore(&blkbk->pending_free_lock, flags);
+	return req;
+}
+
+/*
+ * Return the 'pending_req' structure back to the freepool. We also
+ * wake up the thread if it was waiting for a free page.
+ */
+static void free_req(struct pending_req *req)
+{
+	unsigned long flags;
+	int was_empty;
+
+	spin_lock_irqsave(&blkbk->pending_free_lock, flags);
+	was_empty = list_empty(&blkbk->pending_free);
+	list_add(&req->free_list, &blkbk->pending_free);
+	spin_unlock_irqrestore(&blkbk->pending_free_lock, flags);
+	if (was_empty)
+		wake_up(&blkbk->pending_free_wq);
+}
+
+/*
+ * Routines for managing virtual block devices (vbds).
+ */
+static int xen_vbd_translate(struct phys_req *req, struct xen_blkif *blkif,
+			     int operation)
+{
+	struct xen_vbd *vbd = &blkif->vbd;
+	int rc = -EACCES;
+
+	if ((operation != READ) && vbd->readonly)
+		goto out;
+
+	if (likely(req->nr_sects)) {
+		blkif_sector_t end = req->sector_number + req->nr_sects;
+
+		if (unlikely(end < req->sector_number))
+			goto out;
+		if (unlikely(end > vbd_sz(vbd)))
+			goto out;
+	}
+
+	req->dev  = vbd->pdevice;
+	req->bdev = vbd->bdev;
+	rc = 0;
+
+ out:
+	return rc;
+}
+
+static void xen_vbd_resize(struct xen_blkif *blkif)
+{
+	struct xen_vbd *vbd = &blkif->vbd;
+	struct xenbus_transaction xbt;
+	int err;
+	struct xenbus_device *dev = xen_blkbk_xenbus(blkif->be);
+	unsigned long long new_size = vbd_sz(vbd);
+
+	pr_info(DRV_PFX "VBD Resize: Domid: %d, Device: (%d, %d)\n",
+		blkif->domid, MAJOR(vbd->pdevice), MINOR(vbd->pdevice));
+	pr_info(DRV_PFX "VBD Resize: new size %llu\n", new_size);
+	vbd->size = new_size;
+again:
+	err = xenbus_transaction_start(&xbt);
+	if (err) {
+		pr_warn(DRV_PFX "Error starting transaction");
+		return;
+	}
+	err = xenbus_printf(xbt, dev->nodename, "sectors", "%llu",
+			    (unsigned long long)vbd_sz(vbd));
+	if (err) {
+		pr_warn(DRV_PFX "Error writing new size");
+		goto abort;
+	}
+	/*
+	 * Write the current state; we will use this to synchronize
+	 * the front-end. If the current state is "connected" the
+	 * front-end will get the new size information online.
+	 */
+	err = xenbus_printf(xbt, dev->nodename, "state", "%d", dev->state);
+	if (err) {
+		pr_warn(DRV_PFX "Error writing the state");
+		goto abort;
+	}
+
+	err = xenbus_transaction_end(xbt, 0);
+	if (err == -EAGAIN)
+		goto again;
+	if (err)
+		pr_warn(DRV_PFX "Error ending transaction");
+	return;
+abort:
+	xenbus_transaction_end(xbt, 1);
+}
+
+/*
+ * Notification from the guest OS.
+ */
+static void blkif_notify_work(struct xen_blkif *blkif)
+{
+	blkif->waiting_reqs = 1;
+	wake_up(&blkif->wq);
+}
+
+irqreturn_t xen_blkif_be_int(int irq, void *dev_id)
+{
+	blkif_notify_work(dev_id);
+	return IRQ_HANDLED;
+}
+
+/*
+ * SCHEDULER FUNCTIONS
+ */
+
+static void print_stats(struct xen_blkif *blkif)
+{
+	pr_info("xen-blkback (%s): oo %3d  |  rd %4d  |  wr %4d  |  f %4d\n",
+		 current->comm, blkif->st_oo_req,
+		 blkif->st_rd_req, blkif->st_wr_req, blkif->st_f_req);
+	blkif->st_print = jiffies + msecs_to_jiffies(10 * 1000);
+	blkif->st_rd_req = 0;
+	blkif->st_wr_req = 0;
+	blkif->st_oo_req = 0;
+}
+
+int xen_blkif_schedule(void *arg)
+{
+	struct xen_blkif *blkif = arg;
+	struct xen_vbd *vbd = &blkif->vbd;
+
+	xen_blkif_get(blkif);
+
+	while (!kthread_should_stop()) {
+		if (try_to_freeze())
+			continue;
+		if (unlikely(vbd->size != vbd_sz(vbd)))
+			xen_vbd_resize(blkif);
+
+		wait_event_interruptible(
+			blkif->wq,
+			blkif->waiting_reqs || kthread_should_stop());
+		wait_event_interruptible(
+			blkbk->pending_free_wq,
+			!list_empty(&blkbk->pending_free) ||
+			kthread_should_stop());
+
+		blkif->waiting_reqs = 0;
+		smp_mb(); /* clear flag *before* checking for work */
+
+		if (do_block_io_op(blkif))
+			blkif->waiting_reqs = 1;
+
+		if (log_stats && time_after(jiffies, blkif->st_print))
+			print_stats(blkif);
+	}
+
+	if (log_stats)
+		print_stats(blkif);
+
+	blkif->xenblkd = NULL;
+	xen_blkif_put(blkif);
+
+	return 0;
+}
+
+struct seg_buf {
+	unsigned long buf;
+	unsigned int nsec;
+};
+/*
+ * Unmap the grant references, and also remove the M2P over-rides
+ * used in the 'pending_req'.
+ */
+static void xen_blkbk_unmap(struct pending_req *req)
+{
+	struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+	unsigned int i, invcount = 0;
+	grant_handle_t handle;
+	int ret;
+
+	for (i = 0; i < req->nr_pages; i++) {
+		handle = pending_handle(req, i);
+		if (handle == BLKBACK_INVALID_HANDLE)
+			continue;
+		gnttab_set_unmap_op(&unmap[invcount], vaddr(req, i),
+				    GNTMAP_host_map, handle);
+		pending_handle(req, i) = BLKBACK_INVALID_HANDLE;
+		invcount++;
+	}
+
+	ret = HYPERVISOR_grant_table_op(
+		GNTTABOP_unmap_grant_ref, unmap, invcount);
+	BUG_ON(ret);
+	/*
+	 * Note, we use invcount, so nr->pages, so we can't index
+	 * using vaddr(req, i).
+	 */
+	for (i = 0; i < invcount; i++) {
+		ret = m2p_remove_override(
+			virt_to_page(unmap[i].host_addr), false);
+		if (ret) {
+			pr_alert(DRV_PFX "Failed to remove M2P override for %lx\n",
+				 (unsigned long)unmap[i].host_addr);
+			continue;
+		}
+	}
+}
+
+static int xen_blkbk_map(struct blkif_request *req,
+			 struct pending_req *pending_req,
+			 struct seg_buf seg[])
+{
+	struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+	int i;
+	int nseg = req->nr_segments;
+	int ret = 0;
+
+	/*
+	 * Fill out preq.nr_sects with proper amount of sectors, and setup
+	 * assign map[..] with the PFN of the page in our domain with the
+	 * corresponding grant reference for each page.
+	 */
+	for (i = 0; i < nseg; i++) {
+		uint32_t flags;
+
+		flags = GNTMAP_host_map;
+		if (pending_req->operation != BLKIF_OP_READ)
+			flags |= GNTMAP_readonly;
+		gnttab_set_map_op(&map[i], vaddr(pending_req, i), flags,
+				  req->u.rw.seg[i].gref,
+				  pending_req->blkif->domid);
+	}
+
+	ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, map, nseg);
+	BUG_ON(ret);
+
+	/*
+	 * Now swizzle the MFN in our domain with the MFN from the other domain
+	 * so that when we access vaddr(pending_req,i) it has the contents of
+	 * the page from the other domain.
+	 */
+	for (i = 0; i < nseg; i++) {
+		if (unlikely(map[i].status != 0)) {
+			pr_debug(DRV_PFX "invalid buffer -- could not remap it\n");
+			map[i].handle = BLKBACK_INVALID_HANDLE;
+			ret |= 1;
+		}
+
+		pending_handle(pending_req, i) = map[i].handle;
+
+		if (ret)
+			continue;
+
+		ret = m2p_add_override(PFN_DOWN(map[i].dev_bus_addr),
+			blkbk->pending_page(pending_req, i), false);
+		if (ret) {
+			pr_alert(DRV_PFX "Failed to install M2P override for %lx (ret: %d)\n",
+				 (unsigned long)map[i].dev_bus_addr, ret);
+			/* We could switch over to GNTTABOP_copy */
+			continue;
+		}
+
+		seg[i].buf  = map[i].dev_bus_addr |
+			(req->u.rw.seg[i].first_sect << 9);
+	}
+	return ret;
+}
+
+/*
+ * Completion callback on the bio's. Called as bh->b_end_io()
+ */
+
+static void __end_block_io_op(struct pending_req *pending_req, int error)
+{
+	/* An error fails the entire request. */
+	if ((pending_req->operation == BLKIF_OP_FLUSH_DISKCACHE) &&
+	    (error == -EOPNOTSUPP)) {
+		pr_debug(DRV_PFX "flush diskcache op failed, not supported\n");
+		xen_blkbk_flush_diskcache(XBT_NIL, pending_req->blkif->be, 0);
+		pending_req->status = BLKIF_RSP_EOPNOTSUPP;
+	} else if (error) {
+		pr_debug(DRV_PFX "Buffer not up-to-date at end of operation,"
+			 " error=%d\n", error);
+		pending_req->status = BLKIF_RSP_ERROR;
+	}
+
+	/*
+	 * If all of the bio's have completed it is time to unmap
+	 * the grant references associated with 'request' and provide
+	 * the proper response on the ring.
+	 */
+	if (atomic_dec_and_test(&pending_req->pendcnt)) {
+		xen_blkbk_unmap(pending_req);
+		make_response(pending_req->blkif, pending_req->id,
+			      pending_req->operation, pending_req->status);
+		xen_blkif_put(pending_req->blkif);
+		free_req(pending_req);
+	}
+}
+
+/*
+ * bio callback.
+ */
+static void end_block_io_op(struct bio *bio, int error)
+{
+	__end_block_io_op(bio->bi_private, error);
+	bio_put(bio);
+}
+
+
+
+/*
+ * Function to copy the from the ring buffer the 'struct blkif_request'
+ * (which has the sectors we want, number of them, grant references, etc),
+ * and transmute  it to the block API to hand it over to the proper block disk.
+ */
+static int do_block_io_op(struct xen_blkif *blkif)
+{
+	union blkif_back_rings *blk_rings = &blkif->blk_rings;
+	struct blkif_request req;
+	struct pending_req *pending_req;
+	RING_IDX rc, rp;
+	int more_to_do = 0;
+
+	rc = blk_rings->common.req_cons;
+	rp = blk_rings->common.sring->req_prod;
+	rmb(); /* Ensure we see queued requests up to 'rp'. */
+
+	while (rc != rp) {
+
+		if (RING_REQUEST_CONS_OVERFLOW(&blk_rings->common, rc))
+			break;
+
+		if (kthread_should_stop()) {
+			more_to_do = 1;
+			break;
+		}
+
+		pending_req = alloc_req();
+		if (NULL == pending_req) {
+			blkif->st_oo_req++;
+			more_to_do = 1;
+			break;
+		}
+
+		switch (blkif->blk_protocol) {
+		case BLKIF_PROTOCOL_NATIVE:
+			memcpy(&req, RING_GET_REQUEST(&blk_rings->native, rc), sizeof(req));
+			break;
+		case BLKIF_PROTOCOL_X86_32:
+			blkif_get_x86_32_req(&req, RING_GET_REQUEST(&blk_rings->x86_32, rc));
+			break;
+		case BLKIF_PROTOCOL_X86_64:
+			blkif_get_x86_64_req(&req, RING_GET_REQUEST(&blk_rings->x86_64, rc));
+			break;
+		default:
+			BUG();
+		}
+		blk_rings->common.req_cons = ++rc; /* before make_response() */
+
+		/* Apply all sanity checks to /private copy/ of request. */
+		barrier();
+
+		if (dispatch_rw_block_io(blkif, &req, pending_req))
+			break;
+
+		/* Yield point for this unbounded loop. */
+		cond_resched();
+	}
+
+	return more_to_do;
+}
+
+/*
+ * Transmutation of the 'struct blkif_request' to a proper 'struct bio'
+ * and call the 'submit_bio' to pass it to the underlying storage.
+ */
+static int dispatch_rw_block_io(struct xen_blkif *blkif,
+				struct blkif_request *req,
+				struct pending_req *pending_req)
+{
+	struct phys_req preq;
+	struct seg_buf seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+	unsigned int nseg;
+	struct bio *bio = NULL;
+	struct bio *biolist[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+	int i, nbio = 0;
+	int operation;
+	struct blk_plug plug;
+
+	switch (req->operation) {
+	case BLKIF_OP_READ:
+		blkif->st_rd_req++;
+		operation = READ;
+		break;
+	case BLKIF_OP_WRITE:
+		blkif->st_wr_req++;
+		operation = WRITE_ODIRECT;
+		break;
+	case BLKIF_OP_FLUSH_DISKCACHE:
+		blkif->st_f_req++;
+		operation = WRITE_FLUSH;
+		break;
+	case BLKIF_OP_WRITE_BARRIER:
+	default:
+		operation = 0; /* make gcc happy */
+		goto fail_response;
+		break;
+	}
+
+	/* Check that the number of segments is sane. */
+	nseg = req->nr_segments;
+	if (unlikely(nseg == 0 && operation != WRITE_FLUSH) ||
+	    unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST)) {
+		pr_debug(DRV_PFX "Bad number of segments in request (%d)\n",
+			 nseg);
+		/* Haven't submitted any bio's yet. */
+		goto fail_response;
+	}
+
+	preq.dev           = req->handle;
+	preq.sector_number = req->u.rw.sector_number;
+	preq.nr_sects      = 0;
+
+	pending_req->blkif     = blkif;
+	pending_req->id        = req->id;
+	pending_req->operation = req->operation;
+	pending_req->status    = BLKIF_RSP_OKAY;
+	pending_req->nr_pages  = nseg;
+
+	for (i = 0; i < nseg; i++) {
+		seg[i].nsec = req->u.rw.seg[i].last_sect -
+			req->u.rw.seg[i].first_sect + 1;
+		if ((req->u.rw.seg[i].last_sect >= (PAGE_SIZE >> 9)) ||
+		    (req->u.rw.seg[i].last_sect < req->u.rw.seg[i].first_sect))
+			goto fail_response;
+		preq.nr_sects += seg[i].nsec;
+
+	}
+
+	if (xen_vbd_translate(&preq, blkif, operation) != 0) {
+		pr_debug(DRV_PFX "access denied: %s of [%llu,%llu] on dev=%04x\n",
+			 operation == READ ? "read" : "write",
+			 preq.sector_number,
+			 preq.sector_number + preq.nr_sects, preq.dev);
+		goto fail_response;
+	}
+
+	/*
+	 * This check _MUST_ be done after xen_vbd_translate as the preq.bdev
+	 * is set there.
+	 */
+	for (i = 0; i < nseg; i++) {
+		if (((int)preq.sector_number|(int)seg[i].nsec) &
+		    ((bdev_logical_block_size(preq.bdev) >> 9) - 1)) {
+			pr_debug(DRV_PFX "Misaligned I/O request from domain %d",
+				 blkif->domid);
+			goto fail_response;
+		}
+	}
+
+	/*
+	 * If we have failed at this point, we need to undo the M2P override,
+	 * set gnttab_set_unmap_op on all of the grant references and perform
+	 * the hypercall to unmap the grants - that is all done in
+	 * xen_blkbk_unmap.
+	 */
+	if (xen_blkbk_map(req, pending_req, seg))
+		goto fail_flush;
+
+	/* This corresponding xen_blkif_put is done in __end_block_io_op */
+	xen_blkif_get(blkif);
+
+	for (i = 0; i < nseg; i++) {
+		while ((bio == NULL) ||
+		       (bio_add_page(bio,
+				     blkbk->pending_page(pending_req, i),
+				     seg[i].nsec << 9,
+				     seg[i].buf & ~PAGE_MASK) == 0)) {
+
+			bio = bio_alloc(GFP_KERNEL, nseg-i);
+			if (unlikely(bio == NULL))
+				goto fail_put_bio;
+
+			biolist[nbio++] = bio;
+			bio->bi_bdev    = preq.bdev;
+			bio->bi_private = pending_req;
+			bio->bi_end_io  = end_block_io_op;
+			bio->bi_sector  = preq.sector_number;
+		}
+
+		preq.sector_number += seg[i].nsec;
+	}
+
+	/* This will be hit if the operation was a flush. */
+	if (!bio) {
+		BUG_ON(operation != WRITE_FLUSH);
+
+		bio = bio_alloc(GFP_KERNEL, 0);
+		if (unlikely(bio == NULL))
+			goto fail_put_bio;
+
+		biolist[nbio++] = bio;
+		bio->bi_bdev    = preq.bdev;
+		bio->bi_private = pending_req;
+		bio->bi_end_io  = end_block_io_op;
+	}
+
+	/*
+	 * We set it one so that the last submit_bio does not have to call
+	 * atomic_inc.
+	 */
+	atomic_set(&pending_req->pendcnt, nbio);
+
+	/* Get a reference count for the disk queue and start sending I/O */
+	blk_start_plug(&plug);
+
+	for (i = 0; i < nbio; i++)
+		submit_bio(operation, biolist[i]);
+
+	/* Let the I/Os go.. */
+	blk_finish_plug(&plug);
+
+	if (operation == READ)
+		blkif->st_rd_sect += preq.nr_sects;
+	else if (operation & WRITE)
+		blkif->st_wr_sect += preq.nr_sects;
+
+	return 0;
+
+ fail_flush:
+	xen_blkbk_unmap(pending_req);
+ fail_response:
+	/* Haven't submitted any bio's yet. */
+	make_response(blkif, req->id, req->operation, BLKIF_RSP_ERROR);
+	free_req(pending_req);
+	msleep(1); /* back off a bit */
+	return -EIO;
+
+ fail_put_bio:
+	for (i = 0; i < nbio; i++)
+		bio_put(biolist[i]);
+	__end_block_io_op(pending_req, -EINVAL);
+	msleep(1); /* back off a bit */
+	return -EIO;
+}
+
+
+
+/*
+ * Put a response on the ring on how the operation fared.
+ */
+static void make_response(struct xen_blkif *blkif, u64 id,
+			  unsigned short op, int st)
+{
+	struct blkif_response  resp;
+	unsigned long     flags;
+	union blkif_back_rings *blk_rings = &blkif->blk_rings;
+	int more_to_do = 0;
+	int notify;
+
+	resp.id        = id;
+	resp.operation = op;
+	resp.status    = st;
+
+	spin_lock_irqsave(&blkif->blk_ring_lock, flags);
+	/* Place on the response ring for the relevant domain. */
+	switch (blkif->blk_protocol) {
+	case BLKIF_PROTOCOL_NATIVE:
+		memcpy(RING_GET_RESPONSE(&blk_rings->native, blk_rings->native.rsp_prod_pvt),
+		       &resp, sizeof(resp));
+		break;
+	case BLKIF_PROTOCOL_X86_32:
+		memcpy(RING_GET_RESPONSE(&blk_rings->x86_32, blk_rings->x86_32.rsp_prod_pvt),
+		       &resp, sizeof(resp));
+		break;
+	case BLKIF_PROTOCOL_X86_64:
+		memcpy(RING_GET_RESPONSE(&blk_rings->x86_64, blk_rings->x86_64.rsp_prod_pvt),
+		       &resp, sizeof(resp));
+		break;
+	default:
+		BUG();
+	}
+	blk_rings->common.rsp_prod_pvt++;
+	RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&blk_rings->common, notify);
+	if (blk_rings->common.rsp_prod_pvt == blk_rings->common.req_cons) {
+		/*
+		 * Tail check for pending requests. Allows frontend to avoid
+		 * notifications if requests are already in flight (lower
+		 * overheads and promotes batching).
+		 */
+		RING_FINAL_CHECK_FOR_REQUESTS(&blk_rings->common, more_to_do);
+
+	} else if (RING_HAS_UNCONSUMED_REQUESTS(&blk_rings->common)) {
+		more_to_do = 1;
+	}
+
+	spin_unlock_irqrestore(&blkif->blk_ring_lock, flags);
+
+	if (more_to_do)
+		blkif_notify_work(blkif);
+	if (notify)
+		notify_remote_via_irq(blkif->irq);
+}
+
+static int __init xen_blkif_init(void)
+{
+	int i, mmap_pages;
+	int rc = 0;
+
+	if (!xen_pv_domain())
+		return -ENODEV;
+
+	blkbk = kzalloc(sizeof(struct xen_blkbk), GFP_KERNEL);
+	if (!blkbk) {
+		pr_alert(DRV_PFX "%s: out of memory!\n", __func__);
+		return -ENOMEM;
+	}
+
+	mmap_pages = xen_blkif_reqs * BLKIF_MAX_SEGMENTS_PER_REQUEST;
+
+	blkbk->pending_reqs          = kmalloc(sizeof(blkbk->pending_reqs[0]) *
+					xen_blkif_reqs, GFP_KERNEL);
+	blkbk->pending_grant_handles = kzalloc(sizeof(blkbk->pending_grant_handles[0]) *
+					mmap_pages, GFP_KERNEL);
+	blkbk->pending_pages         = kzalloc(sizeof(blkbk->pending_pages[0]) *
+					mmap_pages, GFP_KERNEL);
+
+	if (!blkbk->pending_reqs || !blkbk->pending_grant_handles ||
+	    !blkbk->pending_pages) {
+		rc = -ENOMEM;
+		goto out_of_memory;
+	}
+
+	for (i = 0; i < mmap_pages; i++) {
+		blkbk->pending_grant_handles[i] = BLKBACK_INVALID_HANDLE;
+		blkbk->pending_pages[i] = alloc_page(GFP_KERNEL);
+		if (blkbk->pending_pages[i] == NULL) {
+			rc = -ENOMEM;
+			goto out_of_memory;
+		}
+	}
+	rc = xen_blkif_interface_init();
+	if (rc)
+		goto failed_init;
+
+	memset(blkbk->pending_reqs, 0, sizeof(blkbk->pending_reqs));
+
+	INIT_LIST_HEAD(&blkbk->pending_free);
+	spin_lock_init(&blkbk->pending_free_lock);
+	init_waitqueue_head(&blkbk->pending_free_wq);
+
+	for (i = 0; i < xen_blkif_reqs; i++)
+		list_add_tail(&blkbk->pending_reqs[i].free_list,
+			      &blkbk->pending_free);
+
+	rc = xen_blkif_xenbus_init();
+	if (rc)
+		goto failed_init;
+
+	return 0;
+
+ out_of_memory:
+	pr_alert(DRV_PFX "%s: out of memory\n", __func__);
+ failed_init:
+	kfree(blkbk->pending_reqs);
+	kfree(blkbk->pending_grant_handles);
+	if (blkbk->pending_pages) {
+		for (i = 0; i < mmap_pages; i++) {
+			if (blkbk->pending_pages[i])
+				__free_page(blkbk->pending_pages[i]);
+		}
+		kfree(blkbk->pending_pages);
+	}
+	kfree(blkbk);
+	blkbk = NULL;
+	return rc;
+}
+
+module_init(xen_blkif_init);
+
+MODULE_LICENSE("Dual BSD/GPL");
diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h
new file mode 100644
index 00000000..9e40b283
--- /dev/null
+++ b/drivers/block/xen-blkback/common.h
@@ -0,0 +1,233 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef __XEN_BLKIF__BACKEND__COMMON_H__
+#define __XEN_BLKIF__BACKEND__COMMON_H__
+
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/interrupt.h>
+#include <linux/slab.h>
+#include <linux/blkdev.h>
+#include <linux/vmalloc.h>
+#include <linux/wait.h>
+#include <linux/io.h>
+#include <asm/setup.h>
+#include <asm/pgalloc.h>
+#include <asm/hypervisor.h>
+#include <xen/grant_table.h>
+#include <xen/xenbus.h>
+#include <xen/interface/io/ring.h>
+#include <xen/interface/io/blkif.h>
+#include <xen/interface/io/protocols.h>
+
+#define DRV_PFX "xen-blkback:"
+#define DPRINTK(fmt, args...)				\
+	pr_debug(DRV_PFX "(%s:%d) " fmt ".\n",	\
+		 __func__, __LINE__, ##args)
+
+
+/* Not a real protocol.  Used to generate ring structs which contain
+ * the elements common to all protocols only.  This way we get a
+ * compiler-checkable way to use common struct elements, so we can
+ * avoid using switch(protocol) in a number of places.  */
+struct blkif_common_request {
+	char dummy;
+};
+struct blkif_common_response {
+	char dummy;
+};
+
+/* i386 protocol version */
+#pragma pack(push, 4)
+struct blkif_x86_32_request {
+	uint8_t        operation;    /* BLKIF_OP_???                         */
+	uint8_t        nr_segments;  /* number of segments                   */
+	blkif_vdev_t   handle;       /* only for read/write requests         */
+	uint64_t       id;           /* private guest value, echoed in resp  */
+	blkif_sector_t sector_number;/* start sector idx on disk (r/w only)  */
+	struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+};
+struct blkif_x86_32_response {
+	uint64_t        id;              /* copied from request */
+	uint8_t         operation;       /* copied from request */
+	int16_t         status;          /* BLKIF_RSP_???       */
+};
+#pragma pack(pop)
+
+/* x86_64 protocol version */
+struct blkif_x86_64_request {
+	uint8_t        operation;    /* BLKIF_OP_???                         */
+	uint8_t        nr_segments;  /* number of segments                   */
+	blkif_vdev_t   handle;       /* only for read/write requests         */
+	uint64_t       __attribute__((__aligned__(8))) id;
+	blkif_sector_t sector_number;/* start sector idx on disk (r/w only)  */
+	struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+};
+struct blkif_x86_64_response {
+	uint64_t       __attribute__((__aligned__(8))) id;
+	uint8_t         operation;       /* copied from request */
+	int16_t         status;          /* BLKIF_RSP_???       */
+};
+
+DEFINE_RING_TYPES(blkif_common, struct blkif_common_request,
+		  struct blkif_common_response);
+DEFINE_RING_TYPES(blkif_x86_32, struct blkif_x86_32_request,
+		  struct blkif_x86_32_response);
+DEFINE_RING_TYPES(blkif_x86_64, struct blkif_x86_64_request,
+		  struct blkif_x86_64_response);
+
+union blkif_back_rings {
+	struct blkif_back_ring        native;
+	struct blkif_common_back_ring common;
+	struct blkif_x86_32_back_ring x86_32;
+	struct blkif_x86_64_back_ring x86_64;
+};
+
+enum blkif_protocol {
+	BLKIF_PROTOCOL_NATIVE = 1,
+	BLKIF_PROTOCOL_X86_32 = 2,
+	BLKIF_PROTOCOL_X86_64 = 3,
+};
+
+struct xen_vbd {
+	/* What the domain refers to this vbd as. */
+	blkif_vdev_t		handle;
+	/* Non-zero -> read-only */
+	unsigned char		readonly;
+	/* VDISK_xxx */
+	unsigned char		type;
+	/* phys device that this vbd maps to. */
+	u32			pdevice;
+	struct block_device	*bdev;
+	/* Cached size parameter. */
+	sector_t		size;
+	bool			flush_support;
+};
+
+struct backend_info;
+
+struct xen_blkif {
+	/* Unique identifier for this interface. */
+	domid_t			domid;
+	unsigned int		handle;
+	/* Physical parameters of the comms window. */
+	unsigned int		irq;
+	/* Comms information. */
+	enum blkif_protocol	blk_protocol;
+	union blkif_back_rings	blk_rings;
+	struct vm_struct	*blk_ring_area;
+	/* The VBD attached to this interface. */
+	struct xen_vbd		vbd;
+	/* Back pointer to the backend_info. */
+	struct backend_info	*be;
+	/* Private fields. */
+	spinlock_t		blk_ring_lock;
+	atomic_t		refcnt;
+
+	wait_queue_head_t	wq;
+	/* One thread per one blkif. */
+	struct task_struct	*xenblkd;
+	unsigned int		waiting_reqs;
+
+	/* statistics */
+	unsigned long		st_print;
+	int			st_rd_req;
+	int			st_wr_req;
+	int			st_oo_req;
+	int			st_f_req;
+	int			st_rd_sect;
+	int			st_wr_sect;
+
+	wait_queue_head_t	waiting_to_free;
+
+	grant_handle_t		shmem_handle;
+	grant_ref_t		shmem_ref;
+};
+
+
+#define vbd_sz(_v)	((_v)->bdev->bd_part ? \
+			 (_v)->bdev->bd_part->nr_sects : \
+			  get_capacity((_v)->bdev->bd_disk))
+
+#define xen_blkif_get(_b) (atomic_inc(&(_b)->refcnt))
+#define xen_blkif_put(_b)				\
+	do {						\
+		if (atomic_dec_and_test(&(_b)->refcnt))	\
+			wake_up(&(_b)->waiting_to_free);\
+	} while (0)
+
+struct phys_req {
+	unsigned short		dev;
+	unsigned short		nr_sects;
+	struct block_device	*bdev;
+	blkif_sector_t		sector_number;
+};
+int xen_blkif_interface_init(void);
+
+int xen_blkif_xenbus_init(void);
+
+irqreturn_t xen_blkif_be_int(int irq, void *dev_id);
+int xen_blkif_schedule(void *arg);
+
+int xen_blkbk_flush_diskcache(struct xenbus_transaction xbt,
+			      struct backend_info *be, int state);
+
+struct xenbus_device *xen_blkbk_xenbus(struct backend_info *be);
+
+static inline void blkif_get_x86_32_req(struct blkif_request *dst,
+					struct blkif_x86_32_request *src)
+{
+	int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST;
+	dst->operation = src->operation;
+	dst->nr_segments = src->nr_segments;
+	dst->handle = src->handle;
+	dst->id = src->id;
+	dst->u.rw.sector_number = src->sector_number;
+	barrier();
+	if (n > dst->nr_segments)
+		n = dst->nr_segments;
+	for (i = 0; i < n; i++)
+		dst->u.rw.seg[i] = src->seg[i];
+}
+
+static inline void blkif_get_x86_64_req(struct blkif_request *dst,
+					struct blkif_x86_64_request *src)
+{
+	int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST;
+	dst->operation = src->operation;
+	dst->nr_segments = src->nr_segments;
+	dst->handle = src->handle;
+	dst->id = src->id;
+	dst->u.rw.sector_number = src->sector_number;
+	barrier();
+	if (n > dst->nr_segments)
+		n = dst->nr_segments;
+	for (i = 0; i < n; i++)
+		dst->u.rw.seg[i] = src->seg[i];
+}
+
+#endif /* __XEN_BLKIF__BACKEND__COMMON_H__ */
diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c
new file mode 100644
index 00000000..6cc0db1b
--- /dev/null
+++ b/drivers/block/xen-blkback/xenbus.c
@@ -0,0 +1,767 @@
+/*  Xenbus code for blkif backend
+    Copyright (C) 2005 Rusty Russell <rusty@rustcorp.com.au>
+    Copyright (C) 2005 XenSource Ltd
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+*/
+
+#include <stdarg.h>
+#include <linux/module.h>
+#include <linux/kthread.h>
+#include <xen/events.h>
+#include <xen/grant_table.h>
+#include "common.h"
+
+struct backend_info {
+	struct xenbus_device	*dev;
+	struct xen_blkif	*blkif;
+	struct xenbus_watch	backend_watch;
+	unsigned		major;
+	unsigned		minor;
+	char			*mode;
+};
+
+static struct kmem_cache *xen_blkif_cachep;
+static void connect(struct backend_info *);
+static int connect_ring(struct backend_info *);
+static void backend_changed(struct xenbus_watch *, const char **,
+			    unsigned int);
+
+struct xenbus_device *xen_blkbk_xenbus(struct backend_info *be)
+{
+	return be->dev;
+}
+
+static int blkback_name(struct xen_blkif *blkif, char *buf)
+{
+	char *devpath, *devname;
+	struct xenbus_device *dev = blkif->be->dev;
+
+	devpath = xenbus_read(XBT_NIL, dev->nodename, "dev", NULL);
+	if (IS_ERR(devpath))
+		return PTR_ERR(devpath);
+
+	devname = strstr(devpath, "/dev/");
+	if (devname != NULL)
+		devname += strlen("/dev/");
+	else
+		devname  = devpath;
+
+	snprintf(buf, TASK_COMM_LEN, "blkback.%d.%s", blkif->domid, devname);
+	kfree(devpath);
+
+	return 0;
+}
+
+static void xen_update_blkif_status(struct xen_blkif *blkif)
+{
+	int err;
+	char name[TASK_COMM_LEN];
+
+	/* Not ready to connect? */
+	if (!blkif->irq || !blkif->vbd.bdev)
+		return;
+
+	/* Already connected? */
+	if (blkif->be->dev->state == XenbusStateConnected)
+		return;
+
+	/* Attempt to connect: exit if we fail to. */
+	connect(blkif->be);
+	if (blkif->be->dev->state != XenbusStateConnected)
+		return;
+
+	err = blkback_name(blkif, name);
+	if (err) {
+		xenbus_dev_error(blkif->be->dev, err, "get blkback dev name");
+		return;
+	}
+
+	err = filemap_write_and_wait(blkif->vbd.bdev->bd_inode->i_mapping);
+	if (err) {
+		xenbus_dev_error(blkif->be->dev, err, "block flush");
+		return;
+	}
+	invalidate_inode_pages2(blkif->vbd.bdev->bd_inode->i_mapping);
+
+	blkif->xenblkd = kthread_run(xen_blkif_schedule, blkif, name);
+	if (IS_ERR(blkif->xenblkd)) {
+		err = PTR_ERR(blkif->xenblkd);
+		blkif->xenblkd = NULL;
+		xenbus_dev_error(blkif->be->dev, err, "start xenblkd");
+	}
+}
+
+static struct xen_blkif *xen_blkif_alloc(domid_t domid)
+{
+	struct xen_blkif *blkif;
+
+	blkif = kmem_cache_alloc(xen_blkif_cachep, GFP_KERNEL);
+	if (!blkif)
+		return ERR_PTR(-ENOMEM);
+
+	memset(blkif, 0, sizeof(*blkif));
+	blkif->domid = domid;
+	spin_lock_init(&blkif->blk_ring_lock);
+	atomic_set(&blkif->refcnt, 1);
+	init_waitqueue_head(&blkif->wq);
+	blkif->st_print = jiffies;
+	init_waitqueue_head(&blkif->waiting_to_free);
+
+	return blkif;
+}
+
+static int map_frontend_page(struct xen_blkif *blkif, unsigned long shared_page)
+{
+	struct gnttab_map_grant_ref op;
+
+	gnttab_set_map_op(&op, (unsigned long)blkif->blk_ring_area->addr,
+			  GNTMAP_host_map, shared_page, blkif->domid);
+
+	if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1))
+		BUG();
+
+	if (op.status) {
+		DPRINTK("Grant table operation failure !\n");
+		return op.status;
+	}
+
+	blkif->shmem_ref = shared_page;
+	blkif->shmem_handle = op.handle;
+
+	return 0;
+}
+
+static void unmap_frontend_page(struct xen_blkif *blkif)
+{
+	struct gnttab_unmap_grant_ref op;
+
+	gnttab_set_unmap_op(&op, (unsigned long)blkif->blk_ring_area->addr,
+			    GNTMAP_host_map, blkif->shmem_handle);
+
+	if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1))
+		BUG();
+}
+
+static int xen_blkif_map(struct xen_blkif *blkif, unsigned long shared_page,
+			 unsigned int evtchn)
+{
+	int err;
+
+	/* Already connected through? */
+	if (blkif->irq)
+		return 0;
+
+	blkif->blk_ring_area = alloc_vm_area(PAGE_SIZE);
+	if (!blkif->blk_ring_area)
+		return -ENOMEM;
+
+	err = map_frontend_page(blkif, shared_page);
+	if (err) {
+		free_vm_area(blkif->blk_ring_area);
+		return err;
+	}
+
+	switch (blkif->blk_protocol) {
+	case BLKIF_PROTOCOL_NATIVE:
+	{
+		struct blkif_sring *sring;
+		sring = (struct blkif_sring *)blkif->blk_ring_area->addr;
+		BACK_RING_INIT(&blkif->blk_rings.native, sring, PAGE_SIZE);
+		break;
+	}
+	case BLKIF_PROTOCOL_X86_32:
+	{
+		struct blkif_x86_32_sring *sring_x86_32;
+		sring_x86_32 = (struct blkif_x86_32_sring *)blkif->blk_ring_area->addr;
+		BACK_RING_INIT(&blkif->blk_rings.x86_32, sring_x86_32, PAGE_SIZE);
+		break;
+	}
+	case BLKIF_PROTOCOL_X86_64:
+	{
+		struct blkif_x86_64_sring *sring_x86_64;
+		sring_x86_64 = (struct blkif_x86_64_sring *)blkif->blk_ring_area->addr;
+		BACK_RING_INIT(&blkif->blk_rings.x86_64, sring_x86_64, PAGE_SIZE);
+		break;
+	}
+	default:
+		BUG();
+	}
+
+	err = bind_interdomain_evtchn_to_irqhandler(blkif->domid, evtchn,
+						    xen_blkif_be_int, 0,
+						    "blkif-backend", blkif);
+	if (err < 0) {
+		unmap_frontend_page(blkif);
+		free_vm_area(blkif->blk_ring_area);
+		blkif->blk_rings.common.sring = NULL;
+		return err;
+	}
+	blkif->irq = err;
+
+	return 0;
+}
+
+static void xen_blkif_disconnect(struct xen_blkif *blkif)
+{
+	if (blkif->xenblkd) {
+		kthread_stop(blkif->xenblkd);
+		blkif->xenblkd = NULL;
+	}
+
+	atomic_dec(&blkif->refcnt);
+	wait_event(blkif->waiting_to_free, atomic_read(&blkif->refcnt) == 0);
+	atomic_inc(&blkif->refcnt);
+
+	if (blkif->irq) {
+		unbind_from_irqhandler(blkif->irq, blkif);
+		blkif->irq = 0;
+	}
+
+	if (blkif->blk_rings.common.sring) {
+		unmap_frontend_page(blkif);
+		free_vm_area(blkif->blk_ring_area);
+		blkif->blk_rings.common.sring = NULL;
+	}
+}
+
+void xen_blkif_free(struct xen_blkif *blkif)
+{
+	if (!atomic_dec_and_test(&blkif->refcnt))
+		BUG();
+	kmem_cache_free(xen_blkif_cachep, blkif);
+}
+
+int __init xen_blkif_interface_init(void)
+{
+	xen_blkif_cachep = kmem_cache_create("blkif_cache",
+					     sizeof(struct xen_blkif),
+					     0, 0, NULL);
+	if (!xen_blkif_cachep)
+		return -ENOMEM;
+
+	return 0;
+}
+
+/*
+ *  sysfs interface for VBD I/O requests
+ */
+
+#define VBD_SHOW(name, format, args...)					\
+	static ssize_t show_##name(struct device *_dev,			\
+				   struct device_attribute *attr,	\
+				   char *buf)				\
+	{								\
+		struct xenbus_device *dev = to_xenbus_device(_dev);	\
+		struct backend_info *be = dev_get_drvdata(&dev->dev);	\
+									\
+		return sprintf(buf, format, ##args);			\
+	}								\
+	static DEVICE_ATTR(name, S_IRUGO, show_##name, NULL)
+
+VBD_SHOW(oo_req,  "%d\n", be->blkif->st_oo_req);
+VBD_SHOW(rd_req,  "%d\n", be->blkif->st_rd_req);
+VBD_SHOW(wr_req,  "%d\n", be->blkif->st_wr_req);
+VBD_SHOW(f_req,  "%d\n", be->blkif->st_f_req);
+VBD_SHOW(rd_sect, "%d\n", be->blkif->st_rd_sect);
+VBD_SHOW(wr_sect, "%d\n", be->blkif->st_wr_sect);
+
+static struct attribute *xen_vbdstat_attrs[] = {
+	&dev_attr_oo_req.attr,
+	&dev_attr_rd_req.attr,
+	&dev_attr_wr_req.attr,
+	&dev_attr_f_req.attr,
+	&dev_attr_rd_sect.attr,
+	&dev_attr_wr_sect.attr,
+	NULL
+};
+
+static struct attribute_group xen_vbdstat_group = {
+	.name = "statistics",
+	.attrs = xen_vbdstat_attrs,
+};
+
+VBD_SHOW(physical_device, "%x:%x\n", be->major, be->minor);
+VBD_SHOW(mode, "%s\n", be->mode);
+
+int xenvbd_sysfs_addif(struct xenbus_device *dev)
+{
+	int error;
+
+	error = device_create_file(&dev->dev, &dev_attr_physical_device);
+	if (error)
+		goto fail1;
+
+	error = device_create_file(&dev->dev, &dev_attr_mode);
+	if (error)
+		goto fail2;
+
+	error = sysfs_create_group(&dev->dev.kobj, &xen_vbdstat_group);
+	if (error)
+		goto fail3;
+
+	return 0;
+
+fail3:	sysfs_remove_group(&dev->dev.kobj, &xen_vbdstat_group);
+fail2:	device_remove_file(&dev->dev, &dev_attr_mode);
+fail1:	device_remove_file(&dev->dev, &dev_attr_physical_device);
+	return error;
+}
+
+void xenvbd_sysfs_delif(struct xenbus_device *dev)
+{
+	sysfs_remove_group(&dev->dev.kobj, &xen_vbdstat_group);
+	device_remove_file(&dev->dev, &dev_attr_mode);
+	device_remove_file(&dev->dev, &dev_attr_physical_device);
+}
+
+
+static void xen_vbd_free(struct xen_vbd *vbd)
+{
+	if (vbd->bdev)
+		blkdev_put(vbd->bdev, vbd->readonly ? FMODE_READ : FMODE_WRITE);
+	vbd->bdev = NULL;
+}
+
+static int xen_vbd_create(struct xen_blkif *blkif, blkif_vdev_t handle,
+			  unsigned major, unsigned minor, int readonly,
+			  int cdrom)
+{
+	struct xen_vbd *vbd;
+	struct block_device *bdev;
+	struct request_queue *q;
+
+	vbd = &blkif->vbd;
+	vbd->handle   = handle;
+	vbd->readonly = readonly;
+	vbd->type     = 0;
+
+	vbd->pdevice  = MKDEV(major, minor);
+
+	bdev = blkdev_get_by_dev(vbd->pdevice, vbd->readonly ?
+				 FMODE_READ : FMODE_WRITE, NULL);
+
+	if (IS_ERR(bdev)) {
+		DPRINTK("xen_vbd_create: device %08x could not be opened.\n",
+			vbd->pdevice);
+		return -ENOENT;
+	}
+
+	vbd->bdev = bdev;
+	if (vbd->bdev->bd_disk == NULL) {
+		DPRINTK("xen_vbd_create: device %08x doesn't exist.\n",
+			vbd->pdevice);
+		xen_vbd_free(vbd);
+		return -ENOENT;
+	}
+	vbd->size = vbd_sz(vbd);
+
+	if (vbd->bdev->bd_disk->flags & GENHD_FL_CD || cdrom)
+		vbd->type |= VDISK_CDROM;
+	if (vbd->bdev->bd_disk->flags & GENHD_FL_REMOVABLE)
+		vbd->type |= VDISK_REMOVABLE;
+
+	q = bdev_get_queue(bdev);
+	if (q && q->flush_flags)
+		vbd->flush_support = true;
+
+	DPRINTK("Successful creation of handle=%04x (dom=%u)\n",
+		handle, blkif->domid);
+	return 0;
+}
+static int xen_blkbk_remove(struct xenbus_device *dev)
+{
+	struct backend_info *be = dev_get_drvdata(&dev->dev);
+
+	DPRINTK("");
+
+	if (be->major || be->minor)
+		xenvbd_sysfs_delif(dev);
+
+	if (be->backend_watch.node) {
+		unregister_xenbus_watch(&be->backend_watch);
+		kfree(be->backend_watch.node);
+		be->backend_watch.node = NULL;
+	}
+
+	if (be->blkif) {
+		xen_blkif_disconnect(be->blkif);
+		xen_vbd_free(&be->blkif->vbd);
+		xen_blkif_free(be->blkif);
+		be->blkif = NULL;
+	}
+
+	kfree(be);
+	dev_set_drvdata(&dev->dev, NULL);
+	return 0;
+}
+
+int xen_blkbk_flush_diskcache(struct xenbus_transaction xbt,
+			      struct backend_info *be, int state)
+{
+	struct xenbus_device *dev = be->dev;
+	int err;
+
+	err = xenbus_printf(xbt, dev->nodename, "feature-flush-cache",
+			    "%d", state);
+	if (err)
+		xenbus_dev_fatal(dev, err, "writing feature-flush-cache");
+
+	return err;
+}
+
+/*
+ * Entry point to this code when a new device is created.  Allocate the basic
+ * structures, and watch the store waiting for the hotplug scripts to tell us
+ * the device's physical major and minor numbers.  Switch to InitWait.
+ */
+static int xen_blkbk_probe(struct xenbus_device *dev,
+			   const struct xenbus_device_id *id)
+{
+	int err;
+	struct backend_info *be = kzalloc(sizeof(struct backend_info),
+					  GFP_KERNEL);
+	if (!be) {
+		xenbus_dev_fatal(dev, -ENOMEM,
+				 "allocating backend structure");
+		return -ENOMEM;
+	}
+	be->dev = dev;
+	dev_set_drvdata(&dev->dev, be);
+
+	be->blkif = xen_blkif_alloc(dev->otherend_id);
+	if (IS_ERR(be->blkif)) {
+		err = PTR_ERR(be->blkif);
+		be->blkif = NULL;
+		xenbus_dev_fatal(dev, err, "creating block interface");
+		goto fail;
+	}
+
+	/* setup back pointer */
+	be->blkif->be = be;
+
+	err = xenbus_watch_pathfmt(dev, &be->backend_watch, backend_changed,
+				   "%s/%s", dev->nodename, "physical-device");
+	if (err)
+		goto fail;
+
+	err = xenbus_switch_state(dev, XenbusStateInitWait);
+	if (err)
+		goto fail;
+
+	return 0;
+
+fail:
+	DPRINTK("failed");
+	xen_blkbk_remove(dev);
+	return err;
+}
+
+
+/*
+ * Callback received when the hotplug scripts have placed the physical-device
+ * node.  Read it and the mode node, and create a vbd.  If the frontend is
+ * ready, connect.
+ */
+static void backend_changed(struct xenbus_watch *watch,
+			    const char **vec, unsigned int len)
+{
+	int err;
+	unsigned major;
+	unsigned minor;
+	struct backend_info *be
+		= container_of(watch, struct backend_info, backend_watch);
+	struct xenbus_device *dev = be->dev;
+	int cdrom = 0;
+	char *device_type;
+
+	DPRINTK("");
+
+	err = xenbus_scanf(XBT_NIL, dev->nodename, "physical-device", "%x:%x",
+			   &major, &minor);
+	if (XENBUS_EXIST_ERR(err)) {
+		/*
+		 * Since this watch will fire once immediately after it is
+		 * registered, we expect this.  Ignore it, and wait for the
+		 * hotplug scripts.
+		 */
+		return;
+	}
+	if (err != 2) {
+		xenbus_dev_fatal(dev, err, "reading physical-device");
+		return;
+	}
+
+	if ((be->major || be->minor) &&
+	    ((be->major != major) || (be->minor != minor))) {
+		pr_warn(DRV_PFX "changing physical device (from %x:%x to %x:%x) not supported.\n",
+			be->major, be->minor, major, minor);
+		return;
+	}
+
+	be->mode = xenbus_read(XBT_NIL, dev->nodename, "mode", NULL);
+	if (IS_ERR(be->mode)) {
+		err = PTR_ERR(be->mode);
+		be->mode = NULL;
+		xenbus_dev_fatal(dev, err, "reading mode");
+		return;
+	}
+
+	device_type = xenbus_read(XBT_NIL, dev->otherend, "device-type", NULL);
+	if (!IS_ERR(device_type)) {
+		cdrom = strcmp(device_type, "cdrom") == 0;
+		kfree(device_type);
+	}
+
+	if (be->major == 0 && be->minor == 0) {
+		/* Front end dir is a number, which is used as the handle. */
+
+		char *p = strrchr(dev->otherend, '/') + 1;
+		long handle;
+		err = strict_strtoul(p, 0, &handle);
+		if (err)
+			return;
+
+		be->major = major;
+		be->minor = minor;
+
+		err = xen_vbd_create(be->blkif, handle, major, minor,
+				 (NULL == strchr(be->mode, 'w')), cdrom);
+		if (err) {
+			be->major = 0;
+			be->minor = 0;
+			xenbus_dev_fatal(dev, err, "creating vbd structure");
+			return;
+		}
+
+		err = xenvbd_sysfs_addif(dev);
+		if (err) {
+			xen_vbd_free(&be->blkif->vbd);
+			be->major = 0;
+			be->minor = 0;
+			xenbus_dev_fatal(dev, err, "creating sysfs entries");
+			return;
+		}
+
+		/* We're potentially connected now */
+		xen_update_blkif_status(be->blkif);
+	}
+}
+
+
+/*
+ * Callback received when the frontend's state changes.
+ */
+static void frontend_changed(struct xenbus_device *dev,
+			     enum xenbus_state frontend_state)
+{
+	struct backend_info *be = dev_get_drvdata(&dev->dev);
+	int err;
+
+	DPRINTK("%s", xenbus_strstate(frontend_state));
+
+	switch (frontend_state) {
+	case XenbusStateInitialising:
+		if (dev->state == XenbusStateClosed) {
+			pr_info(DRV_PFX "%s: prepare for reconnect\n",
+				dev->nodename);
+			xenbus_switch_state(dev, XenbusStateInitWait);
+		}
+		break;
+
+	case XenbusStateInitialised:
+	case XenbusStateConnected:
+		/*
+		 * Ensure we connect even when two watches fire in
+		 * close successsion and we miss the intermediate value
+		 * of frontend_state.
+		 */
+		if (dev->state == XenbusStateConnected)
+			break;
+
+		/*
+		 * Enforce precondition before potential leak point.
+		 * blkif_disconnect() is idempotent.
+		 */
+		xen_blkif_disconnect(be->blkif);
+
+		err = connect_ring(be);
+		if (err)
+			break;
+		xen_update_blkif_status(be->blkif);
+		break;
+
+	case XenbusStateClosing:
+		xen_blkif_disconnect(be->blkif);
+		xenbus_switch_state(dev, XenbusStateClosing);
+		break;
+
+	case XenbusStateClosed:
+		xenbus_switch_state(dev, XenbusStateClosed);
+		if (xenbus_dev_is_online(dev))
+			break;
+		/* fall through if not online */
+	case XenbusStateUnknown:
+		/* implies blkif_disconnect() via blkback_remove() */
+		device_unregister(&dev->dev);
+		break;
+
+	default:
+		xenbus_dev_fatal(dev, -EINVAL, "saw state %d at frontend",
+				 frontend_state);
+		break;
+	}
+}
+
+
+/* ** Connection ** */
+
+
+/*
+ * Write the physical details regarding the block device to the store, and
+ * switch to Connected state.
+ */
+static void connect(struct backend_info *be)
+{
+	struct xenbus_transaction xbt;
+	int err;
+	struct xenbus_device *dev = be->dev;
+
+	DPRINTK("%s", dev->otherend);
+
+	/* Supply the information about the device the frontend needs */
+again:
+	err = xenbus_transaction_start(&xbt);
+	if (err) {
+		xenbus_dev_fatal(dev, err, "starting transaction");
+		return;
+	}
+
+	err = xen_blkbk_flush_diskcache(xbt, be, be->blkif->vbd.flush_support);
+	if (err)
+		goto abort;
+
+	err = xenbus_printf(xbt, dev->nodename, "sectors", "%llu",
+			    (unsigned long long)vbd_sz(&be->blkif->vbd));
+	if (err) {
+		xenbus_dev_fatal(dev, err, "writing %s/sectors",
+				 dev->nodename);
+		goto abort;
+	}
+
+	/* FIXME: use a typename instead */
+	err = xenbus_printf(xbt, dev->nodename, "info", "%u",
+			    be->blkif->vbd.type |
+			    (be->blkif->vbd.readonly ? VDISK_READONLY : 0));
+	if (err) {
+		xenbus_dev_fatal(dev, err, "writing %s/info",
+				 dev->nodename);
+		goto abort;
+	}
+	err = xenbus_printf(xbt, dev->nodename, "sector-size", "%lu",
+			    (unsigned long)
+			    bdev_logical_block_size(be->blkif->vbd.bdev));
+	if (err) {
+		xenbus_dev_fatal(dev, err, "writing %s/sector-size",
+				 dev->nodename);
+		goto abort;
+	}
+
+	err = xenbus_transaction_end(xbt, 0);
+	if (err == -EAGAIN)
+		goto again;
+	if (err)
+		xenbus_dev_fatal(dev, err, "ending transaction");
+
+	err = xenbus_switch_state(dev, XenbusStateConnected);
+	if (err)
+		xenbus_dev_fatal(dev, err, "switching to Connected state",
+				 dev->nodename);
+
+	return;
+ abort:
+	xenbus_transaction_end(xbt, 1);
+}
+
+
+static int connect_ring(struct backend_info *be)
+{
+	struct xenbus_device *dev = be->dev;
+	unsigned long ring_ref;
+	unsigned int evtchn;
+	char protocol[64] = "";
+	int err;
+
+	DPRINTK("%s", dev->otherend);
+
+	err = xenbus_gather(XBT_NIL, dev->otherend, "ring-ref", "%lu",
+			    &ring_ref, "event-channel", "%u", &evtchn, NULL);
+	if (err) {
+		xenbus_dev_fatal(dev, err,
+				 "reading %s/ring-ref and event-channel",
+				 dev->otherend);
+		return err;
+	}
+
+	be->blkif->blk_protocol = BLKIF_PROTOCOL_NATIVE;
+	err = xenbus_gather(XBT_NIL, dev->otherend, "protocol",
+			    "%63s", protocol, NULL);
+	if (err)
+		strcpy(protocol, "unspecified, assuming native");
+	else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_NATIVE))
+		be->blkif->blk_protocol = BLKIF_PROTOCOL_NATIVE;
+	else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_X86_32))
+		be->blkif->blk_protocol = BLKIF_PROTOCOL_X86_32;
+	else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_X86_64))
+		be->blkif->blk_protocol = BLKIF_PROTOCOL_X86_64;
+	else {
+		xenbus_dev_fatal(dev, err, "unknown fe protocol %s", protocol);
+		return -1;
+	}
+	pr_info(DRV_PFX "ring-ref %ld, event-channel %d, protocol %d (%s)\n",
+		ring_ref, evtchn, be->blkif->blk_protocol, protocol);
+
+	/* Map the shared frame, irq etc. */
+	err = xen_blkif_map(be->blkif, ring_ref, evtchn);
+	if (err) {
+		xenbus_dev_fatal(dev, err, "mapping ring-ref %lu port %u",
+				 ring_ref, evtchn);
+		return err;
+	}
+
+	return 0;
+}
+
+
+/* ** Driver Registration ** */
+
+
+static const struct xenbus_device_id xen_blkbk_ids[] = {
+	{ "vbd" },
+	{ "" }
+};
+
+
+static struct xenbus_driver xen_blkbk = {
+	.name = "vbd",
+	.owner = THIS_MODULE,
+	.ids = xen_blkbk_ids,
+	.probe = xen_blkbk_probe,
+	.remove = xen_blkbk_remove,
+	.otherend_changed = frontend_changed
+};
+
+
+int xen_blkif_xenbus_init(void)
+{
+	return xenbus_register_backend(&xen_blkbk);
+}
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
new file mode 100644
index 00000000..9ea8c257
--- /dev/null
+++ b/drivers/block/xen-blkfront.c
@@ -0,0 +1,1412 @@
+/*
+ * blkfront.c
+ *
+ * XenLinux virtual block device driver.
+ *
+ * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
+ * Modifications by Mark A. Williamson are (c) Intel Research Cambridge
+ * Copyright (c) 2004, Christian Limpach
+ * Copyright (c) 2004, Andrew Warfield
+ * Copyright (c) 2005, Christopher Clark
+ * Copyright (c) 2005, XenSource Ltd
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <linux/interrupt.h>
+#include <linux/blkdev.h>
+#include <linux/hdreg.h>
+#include <linux/cdrom.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/mutex.h>
+#include <linux/scatterlist.h>
+
+#include <xen/xen.h>
+#include <xen/xenbus.h>
+#include <xen/grant_table.h>
+#include <xen/events.h>
+#include <xen/page.h>
+#include <xen/platform_pci.h>
+
+#include <xen/interface/grant_table.h>
+#include <xen/interface/io/blkif.h>
+#include <xen/interface/io/protocols.h>
+
+#include <asm/xen/hypervisor.h>
+
+enum blkif_state {
+	BLKIF_STATE_DISCONNECTED,
+	BLKIF_STATE_CONNECTED,
+	BLKIF_STATE_SUSPENDED,
+};
+
+struct blk_shadow {
+	struct blkif_request req;
+	struct request *request;
+	unsigned long frame[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+};
+
+static DEFINE_MUTEX(blkfront_mutex);
+static const struct block_device_operations xlvbd_block_fops;
+
+#define BLK_RING_SIZE __CONST_RING_SIZE(blkif, PAGE_SIZE)
+
+/*
+ * We have one of these per vbd, whether ide, scsi or 'other'.  They
+ * hang in private_data off the gendisk structure. We may end up
+ * putting all kinds of interesting stuff here :-)
+ */
+struct blkfront_info
+{
+	struct mutex mutex;
+	struct xenbus_device *xbdev;
+	struct gendisk *gd;
+	int vdevice;
+	blkif_vdev_t handle;
+	enum blkif_state connected;
+	int ring_ref;
+	struct blkif_front_ring ring;
+	struct scatterlist sg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+	unsigned int evtchn, irq;
+	struct request_queue *rq;
+	struct work_struct work;
+	struct gnttab_free_callback callback;
+	struct blk_shadow shadow[BLK_RING_SIZE];
+	unsigned long shadow_free;
+	unsigned int feature_flush;
+	unsigned int flush_op;
+	int is_ready;
+};
+
+static DEFINE_SPINLOCK(blkif_io_lock);
+
+static unsigned int nr_minors;
+static unsigned long *minors;
+static DEFINE_SPINLOCK(minor_lock);
+
+#define MAXIMUM_OUTSTANDING_BLOCK_REQS \
+	(BLKIF_MAX_SEGMENTS_PER_REQUEST * BLK_RING_SIZE)
+#define GRANT_INVALID_REF	0
+
+#define PARTS_PER_DISK		16
+#define PARTS_PER_EXT_DISK      256
+
+#define BLKIF_MAJOR(dev) ((dev)>>8)
+#define BLKIF_MINOR(dev) ((dev) & 0xff)
+
+#define EXT_SHIFT 28
+#define EXTENDED (1<<EXT_SHIFT)
+#define VDEV_IS_EXTENDED(dev) ((dev)&(EXTENDED))
+#define BLKIF_MINOR_EXT(dev) ((dev)&(~EXTENDED))
+#define EMULATED_HD_DISK_MINOR_OFFSET (0)
+#define EMULATED_HD_DISK_NAME_OFFSET (EMULATED_HD_DISK_MINOR_OFFSET / 256)
+#define EMULATED_SD_DISK_MINOR_OFFSET (0)
+#define EMULATED_SD_DISK_NAME_OFFSET (EMULATED_SD_DISK_MINOR_OFFSET / 256)
+
+#define DEV_NAME	"xvd"	/* name in /dev */
+
+static int get_id_from_freelist(struct blkfront_info *info)
+{
+	unsigned long free = info->shadow_free;
+	BUG_ON(free >= BLK_RING_SIZE);
+	info->shadow_free = info->shadow[free].req.id;
+	info->shadow[free].req.id = 0x0fffffee; /* debug */
+	return free;
+}
+
+static void add_id_to_freelist(struct blkfront_info *info,
+			       unsigned long id)
+{
+	info->shadow[id].req.id  = info->shadow_free;
+	info->shadow[id].request = NULL;
+	info->shadow_free = id;
+}
+
+static int xlbd_reserve_minors(unsigned int minor, unsigned int nr)
+{
+	unsigned int end = minor + nr;
+	int rc;
+
+	if (end > nr_minors) {
+		unsigned long *bitmap, *old;
+
+		bitmap = kzalloc(BITS_TO_LONGS(end) * sizeof(*bitmap),
+				 GFP_KERNEL);
+		if (bitmap == NULL)
+			return -ENOMEM;
+
+		spin_lock(&minor_lock);
+		if (end > nr_minors) {
+			old = minors;
+			memcpy(bitmap, minors,
+			       BITS_TO_LONGS(nr_minors) * sizeof(*bitmap));
+			minors = bitmap;
+			nr_minors = BITS_TO_LONGS(end) * BITS_PER_LONG;
+		} else
+			old = bitmap;
+		spin_unlock(&minor_lock);
+		kfree(old);
+	}
+
+	spin_lock(&minor_lock);
+	if (find_next_bit(minors, end, minor) >= end) {
+		for (; minor < end; ++minor)
+			__set_bit(minor, minors);
+		rc = 0;
+	} else
+		rc = -EBUSY;
+	spin_unlock(&minor_lock);
+
+	return rc;
+}
+
+static void xlbd_release_minors(unsigned int minor, unsigned int nr)
+{
+	unsigned int end = minor + nr;
+
+	BUG_ON(end > nr_minors);
+	spin_lock(&minor_lock);
+	for (; minor < end; ++minor)
+		__clear_bit(minor, minors);
+	spin_unlock(&minor_lock);
+}
+
+static void blkif_restart_queue_callback(void *arg)
+{
+	struct blkfront_info *info = (struct blkfront_info *)arg;
+	schedule_work(&info->work);
+}
+
+static int blkif_getgeo(struct block_device *bd, struct hd_geometry *hg)
+{
+	/* We don't have real geometry info, but let's at least return
+	   values consistent with the size of the device */
+	sector_t nsect = get_capacity(bd->bd_disk);
+	sector_t cylinders = nsect;
+
+	hg->heads = 0xff;
+	hg->sectors = 0x3f;
+	sector_div(cylinders, hg->heads * hg->sectors);
+	hg->cylinders = cylinders;
+	if ((sector_t)(hg->cylinders + 1) * hg->heads * hg->sectors < nsect)
+		hg->cylinders = 0xffff;
+	return 0;
+}
+
+static int blkif_ioctl(struct block_device *bdev, fmode_t mode,
+		       unsigned command, unsigned long argument)
+{
+	struct blkfront_info *info = bdev->bd_disk->private_data;
+	int i;
+
+	dev_dbg(&info->xbdev->dev, "command: 0x%x, argument: 0x%lx\n",
+		command, (long)argument);
+
+	switch (command) {
+	case CDROMMULTISESSION:
+		dev_dbg(&info->xbdev->dev, "FIXME: support multisession CDs later\n");
+		for (i = 0; i < sizeof(struct cdrom_multisession); i++)
+			if (put_user(0, (char __user *)(argument + i)))
+				return -EFAULT;
+		return 0;
+
+	case CDROM_GET_CAPABILITY: {
+		struct gendisk *gd = info->gd;
+		if (gd->flags & GENHD_FL_CD)
+			return 0;
+		return -EINVAL;
+	}
+
+	default:
+		/*printk(KERN_ALERT "ioctl %08x not supported by Xen blkdev\n",
+		  command);*/
+		return -EINVAL; /* same return as native Linux */
+	}
+
+	return 0;
+}
+
+/*
+ * Generate a Xen blkfront IO request from a blk layer request.  Reads
+ * and writes are handled as expected.
+ *
+ * @req: a request struct
+ */
+static int blkif_queue_request(struct request *req)
+{
+	struct blkfront_info *info = req->rq_disk->private_data;
+	unsigned long buffer_mfn;
+	struct blkif_request *ring_req;
+	unsigned long id;
+	unsigned int fsect, lsect;
+	int i, ref;
+	grant_ref_t gref_head;
+	struct scatterlist *sg;
+
+	if (unlikely(info->connected != BLKIF_STATE_CONNECTED))
+		return 1;
+
+	if (gnttab_alloc_grant_references(
+		BLKIF_MAX_SEGMENTS_PER_REQUEST, &gref_head) < 0) {
+		gnttab_request_free_callback(
+			&info->callback,
+			blkif_restart_queue_callback,
+			info,
+			BLKIF_MAX_SEGMENTS_PER_REQUEST);
+		return 1;
+	}
+
+	/* Fill out a communications ring structure. */
+	ring_req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt);
+	id = get_id_from_freelist(info);
+	info->shadow[id].request = req;
+
+	ring_req->id = id;
+	ring_req->u.rw.sector_number = (blkif_sector_t)blk_rq_pos(req);
+	ring_req->handle = info->handle;
+
+	ring_req->operation = rq_data_dir(req) ?
+		BLKIF_OP_WRITE : BLKIF_OP_READ;
+
+	if (req->cmd_flags & (REQ_FLUSH | REQ_FUA)) {
+		/*
+		 * Ideally we can do an unordered flush-to-disk. In case the
+		 * backend onlysupports barriers, use that. A barrier request
+		 * a superset of FUA, so we can implement it the same
+		 * way.  (It's also a FLUSH+FUA, since it is
+		 * guaranteed ordered WRT previous writes.)
+		 */
+		ring_req->operation = info->flush_op;
+	}
+
+	ring_req->nr_segments = blk_rq_map_sg(req->q, req, info->sg);
+	BUG_ON(ring_req->nr_segments > BLKIF_MAX_SEGMENTS_PER_REQUEST);
+
+	for_each_sg(info->sg, sg, ring_req->nr_segments, i) {
+		buffer_mfn = pfn_to_mfn(page_to_pfn(sg_page(sg)));
+		fsect = sg->offset >> 9;
+		lsect = fsect + (sg->length >> 9) - 1;
+		/* install a grant reference. */
+		ref = gnttab_claim_grant_reference(&gref_head);
+		BUG_ON(ref == -ENOSPC);
+
+		gnttab_grant_foreign_access_ref(
+				ref,
+				info->xbdev->otherend_id,
+				buffer_mfn,
+				rq_data_dir(req) );
+
+		info->shadow[id].frame[i] = mfn_to_pfn(buffer_mfn);
+		ring_req->u.rw.seg[i] =
+				(struct blkif_request_segment) {
+					.gref       = ref,
+					.first_sect = fsect,
+					.last_sect  = lsect };
+	}
+
+	info->ring.req_prod_pvt++;
+
+	/* Keep a private copy so we can reissue requests when recovering. */
+	info->shadow[id].req = *ring_req;
+
+	gnttab_free_grant_references(gref_head);
+
+	return 0;
+}
+
+
+static inline void flush_requests(struct blkfront_info *info)
+{
+	int notify;
+
+	RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&info->ring, notify);
+
+	if (notify)
+		notify_remote_via_irq(info->irq);
+}
+
+/*
+ * do_blkif_request
+ *  read a block; request is in a request queue
+ */
+static void do_blkif_request(struct request_queue *rq)
+{
+	struct blkfront_info *info = NULL;
+	struct request *req;
+	int queued;
+
+	pr_debug("Entered do_blkif_request\n");
+
+	queued = 0;
+
+	while ((req = blk_peek_request(rq)) != NULL) {
+		info = req->rq_disk->private_data;
+
+		if (RING_FULL(&info->ring))
+			goto wait;
+
+		blk_start_request(req);
+
+		if (req->cmd_type != REQ_TYPE_FS) {
+			__blk_end_request_all(req, -EIO);
+			continue;
+		}
+
+		pr_debug("do_blk_req %p: cmd %p, sec %lx, "
+			 "(%u/%u) buffer:%p [%s]\n",
+			 req, req->cmd, (unsigned long)blk_rq_pos(req),
+			 blk_rq_cur_sectors(req), blk_rq_sectors(req),
+			 req->buffer, rq_data_dir(req) ? "write" : "read");
+
+		if (blkif_queue_request(req)) {
+			blk_requeue_request(rq, req);
+wait:
+			/* Avoid pointless unplugs. */
+			blk_stop_queue(rq);
+			break;
+		}
+
+		queued++;
+	}
+
+	if (queued != 0)
+		flush_requests(info);
+}
+
+static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size)
+{
+	struct request_queue *rq;
+
+	rq = blk_init_queue(do_blkif_request, &blkif_io_lock);
+	if (rq == NULL)
+		return -1;
+
+	queue_flag_set_unlocked(QUEUE_FLAG_VIRT, rq);
+
+	/* Hard sector size and max sectors impersonate the equiv. hardware. */
+	blk_queue_logical_block_size(rq, sector_size);
+	blk_queue_max_hw_sectors(rq, 512);
+
+	/* Each segment in a request is up to an aligned page in size. */
+	blk_queue_segment_boundary(rq, PAGE_SIZE - 1);
+	blk_queue_max_segment_size(rq, PAGE_SIZE);
+
+	/* Ensure a merged request will fit in a single I/O ring slot. */
+	blk_queue_max_segments(rq, BLKIF_MAX_SEGMENTS_PER_REQUEST);
+
+	/* Make sure buffer addresses are sector-aligned. */
+	blk_queue_dma_alignment(rq, 511);
+
+	/* Make sure we don't use bounce buffers. */
+	blk_queue_bounce_limit(rq, BLK_BOUNCE_ANY);
+
+	gd->queue = rq;
+
+	return 0;
+}
+
+
+static void xlvbd_flush(struct blkfront_info *info)
+{
+	blk_queue_flush(info->rq, info->feature_flush);
+	printk(KERN_INFO "blkfront: %s: %s: %s\n",
+	       info->gd->disk_name,
+	       info->flush_op == BLKIF_OP_WRITE_BARRIER ?
+		"barrier" : (info->flush_op == BLKIF_OP_FLUSH_DISKCACHE ?
+		"flush diskcache" : "barrier or flush"),
+	       info->feature_flush ? "enabled" : "disabled");
+}
+
+static int xen_translate_vdev(int vdevice, int *minor, unsigned int *offset)
+{
+	int major;
+	major = BLKIF_MAJOR(vdevice);
+	*minor = BLKIF_MINOR(vdevice);
+	switch (major) {
+		case XEN_IDE0_MAJOR:
+			*offset = (*minor / 64) + EMULATED_HD_DISK_NAME_OFFSET;
+			*minor = ((*minor / 64) * PARTS_PER_DISK) +
+				EMULATED_HD_DISK_MINOR_OFFSET;
+			break;
+		case XEN_IDE1_MAJOR:
+			*offset = (*minor / 64) + 2 + EMULATED_HD_DISK_NAME_OFFSET;
+			*minor = (((*minor / 64) + 2) * PARTS_PER_DISK) +
+				EMULATED_HD_DISK_MINOR_OFFSET;
+			break;
+		case XEN_SCSI_DISK0_MAJOR:
+			*offset = (*minor / PARTS_PER_DISK) + EMULATED_SD_DISK_NAME_OFFSET;
+			*minor = *minor + EMULATED_SD_DISK_MINOR_OFFSET;
+			break;
+		case XEN_SCSI_DISK1_MAJOR:
+		case XEN_SCSI_DISK2_MAJOR:
+		case XEN_SCSI_DISK3_MAJOR:
+		case XEN_SCSI_DISK4_MAJOR:
+		case XEN_SCSI_DISK5_MAJOR:
+		case XEN_SCSI_DISK6_MAJOR:
+		case XEN_SCSI_DISK7_MAJOR:
+			*offset = (*minor / PARTS_PER_DISK) + 
+				((major - XEN_SCSI_DISK1_MAJOR + 1) * 16) +
+				EMULATED_SD_DISK_NAME_OFFSET;
+			*minor = *minor +
+				((major - XEN_SCSI_DISK1_MAJOR + 1) * 16 * PARTS_PER_DISK) +
+				EMULATED_SD_DISK_MINOR_OFFSET;
+			break;
+		case XEN_SCSI_DISK8_MAJOR:
+		case XEN_SCSI_DISK9_MAJOR:
+		case XEN_SCSI_DISK10_MAJOR:
+		case XEN_SCSI_DISK11_MAJOR:
+		case XEN_SCSI_DISK12_MAJOR:
+		case XEN_SCSI_DISK13_MAJOR:
+		case XEN_SCSI_DISK14_MAJOR:
+		case XEN_SCSI_DISK15_MAJOR:
+			*offset = (*minor / PARTS_PER_DISK) + 
+				((major - XEN_SCSI_DISK8_MAJOR + 8) * 16) +
+				EMULATED_SD_DISK_NAME_OFFSET;
+			*minor = *minor +
+				((major - XEN_SCSI_DISK8_MAJOR + 8) * 16 * PARTS_PER_DISK) +
+				EMULATED_SD_DISK_MINOR_OFFSET;
+			break;
+		case XENVBD_MAJOR:
+			*offset = *minor / PARTS_PER_DISK;
+			break;
+		default:
+			printk(KERN_WARNING "blkfront: your disk configuration is "
+					"incorrect, please use an xvd device instead\n");
+			return -ENODEV;
+	}
+	return 0;
+}
+
+static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
+			       struct blkfront_info *info,
+			       u16 vdisk_info, u16 sector_size)
+{
+	struct gendisk *gd;
+	int nr_minors = 1;
+	int err;
+	unsigned int offset;
+	int minor;
+	int nr_parts;
+
+	BUG_ON(info->gd != NULL);
+	BUG_ON(info->rq != NULL);
+
+	if ((info->vdevice>>EXT_SHIFT) > 1) {
+		/* this is above the extended range; something is wrong */
+		printk(KERN_WARNING "blkfront: vdevice 0x%x is above the extended range; ignoring\n", info->vdevice);
+		return -ENODEV;
+	}
+
+	if (!VDEV_IS_EXTENDED(info->vdevice)) {
+		err = xen_translate_vdev(info->vdevice, &minor, &offset);
+		if (err)
+			return err;		
+ 		nr_parts = PARTS_PER_DISK;
+	} else {
+		minor = BLKIF_MINOR_EXT(info->vdevice);
+		nr_parts = PARTS_PER_EXT_DISK;
+		offset = minor / nr_parts;
+		if (xen_hvm_domain() && offset < EMULATED_HD_DISK_NAME_OFFSET + 4)
+			printk(KERN_WARNING "blkfront: vdevice 0x%x might conflict with "
+					"emulated IDE disks,\n\t choose an xvd device name"
+					"from xvde on\n", info->vdevice);
+	}
+	err = -ENODEV;
+
+	if ((minor % nr_parts) == 0)
+		nr_minors = nr_parts;
+
+	err = xlbd_reserve_minors(minor, nr_minors);
+	if (err)
+		goto out;
+	err = -ENODEV;
+
+	gd = alloc_disk(nr_minors);
+	if (gd == NULL)
+		goto release;
+
+	if (nr_minors > 1) {
+		if (offset < 26)
+			sprintf(gd->disk_name, "%s%c", DEV_NAME, 'a' + offset);
+		else
+			sprintf(gd->disk_name, "%s%c%c", DEV_NAME,
+				'a' + ((offset / 26)-1), 'a' + (offset % 26));
+	} else {
+		if (offset < 26)
+			sprintf(gd->disk_name, "%s%c%d", DEV_NAME,
+				'a' + offset,
+				minor & (nr_parts - 1));
+		else
+			sprintf(gd->disk_name, "%s%c%c%d", DEV_NAME,
+				'a' + ((offset / 26) - 1),
+				'a' + (offset % 26),
+				minor & (nr_parts - 1));
+	}
+
+	gd->major = XENVBD_MAJOR;
+	gd->first_minor = minor;
+	gd->fops = &xlvbd_block_fops;
+	gd->private_data = info;
+	gd->driverfs_dev = &(info->xbdev->dev);
+	set_capacity(gd, capacity);
+
+	if (xlvbd_init_blk_queue(gd, sector_size)) {
+		del_gendisk(gd);
+		goto release;
+	}
+
+	info->rq = gd->queue;
+	info->gd = gd;
+
+	xlvbd_flush(info);
+
+	if (vdisk_info & VDISK_READONLY)
+		set_disk_ro(gd, 1);
+
+	if (vdisk_info & VDISK_REMOVABLE)
+		gd->flags |= GENHD_FL_REMOVABLE;
+
+	if (vdisk_info & VDISK_CDROM)
+		gd->flags |= GENHD_FL_CD;
+
+	return 0;
+
+ release:
+	xlbd_release_minors(minor, nr_minors);
+ out:
+	return err;
+}
+
+static void xlvbd_release_gendisk(struct blkfront_info *info)
+{
+	unsigned int minor, nr_minors;
+	unsigned long flags;
+
+	if (info->rq == NULL)
+		return;
+
+	spin_lock_irqsave(&blkif_io_lock, flags);
+
+	/* No more blkif_request(). */
+	blk_stop_queue(info->rq);
+
+	/* No more gnttab callback work. */
+	gnttab_cancel_free_callback(&info->callback);
+	spin_unlock_irqrestore(&blkif_io_lock, flags);
+
+	/* Flush gnttab callback work. Must be done with no locks held. */
+	flush_work_sync(&info->work);
+
+	del_gendisk(info->gd);
+
+	minor = info->gd->first_minor;
+	nr_minors = info->gd->minors;
+	xlbd_release_minors(minor, nr_minors);
+
+	blk_cleanup_queue(info->rq);
+	info->rq = NULL;
+
+	put_disk(info->gd);
+	info->gd = NULL;
+}
+
+static void kick_pending_request_queues(struct blkfront_info *info)
+{
+	if (!RING_FULL(&info->ring)) {
+		/* Re-enable calldowns. */
+		blk_start_queue(info->rq);
+		/* Kick things off immediately. */
+		do_blkif_request(info->rq);
+	}
+}
+
+static void blkif_restart_queue(struct work_struct *work)
+{
+	struct blkfront_info *info = container_of(work, struct blkfront_info, work);
+
+	spin_lock_irq(&blkif_io_lock);
+	if (info->connected == BLKIF_STATE_CONNECTED)
+		kick_pending_request_queues(info);
+	spin_unlock_irq(&blkif_io_lock);
+}
+
+static void blkif_free(struct blkfront_info *info, int suspend)
+{
+	/* Prevent new requests being issued until we fix things up. */
+	spin_lock_irq(&blkif_io_lock);
+	info->connected = suspend ?
+		BLKIF_STATE_SUSPENDED : BLKIF_STATE_DISCONNECTED;
+	/* No more blkif_request(). */
+	if (info->rq)
+		blk_stop_queue(info->rq);
+	/* No more gnttab callback work. */
+	gnttab_cancel_free_callback(&info->callback);
+	spin_unlock_irq(&blkif_io_lock);
+
+	/* Flush gnttab callback work. Must be done with no locks held. */
+	flush_work_sync(&info->work);
+
+	/* Free resources associated with old device channel. */
+	if (info->ring_ref != GRANT_INVALID_REF) {
+		gnttab_end_foreign_access(info->ring_ref, 0,
+					  (unsigned long)info->ring.sring);
+		info->ring_ref = GRANT_INVALID_REF;
+		info->ring.sring = NULL;
+	}
+	if (info->irq)
+		unbind_from_irqhandler(info->irq, info);
+	info->evtchn = info->irq = 0;
+
+}
+
+static void blkif_completion(struct blk_shadow *s)
+{
+	int i;
+	for (i = 0; i < s->req.nr_segments; i++)
+		gnttab_end_foreign_access(s->req.u.rw.seg[i].gref, 0, 0UL);
+}
+
+static irqreturn_t blkif_interrupt(int irq, void *dev_id)
+{
+	struct request *req;
+	struct blkif_response *bret;
+	RING_IDX i, rp;
+	unsigned long flags;
+	struct blkfront_info *info = (struct blkfront_info *)dev_id;
+	int error;
+
+	spin_lock_irqsave(&blkif_io_lock, flags);
+
+	if (unlikely(info->connected != BLKIF_STATE_CONNECTED)) {
+		spin_unlock_irqrestore(&blkif_io_lock, flags);
+		return IRQ_HANDLED;
+	}
+
+ again:
+	rp = info->ring.sring->rsp_prod;
+	rmb(); /* Ensure we see queued responses up to 'rp'. */
+
+	for (i = info->ring.rsp_cons; i != rp; i++) {
+		unsigned long id;
+
+		bret = RING_GET_RESPONSE(&info->ring, i);
+		id   = bret->id;
+		req  = info->shadow[id].request;
+
+		blkif_completion(&info->shadow[id]);
+
+		add_id_to_freelist(info, id);
+
+		error = (bret->status == BLKIF_RSP_OKAY) ? 0 : -EIO;
+		switch (bret->operation) {
+		case BLKIF_OP_FLUSH_DISKCACHE:
+		case BLKIF_OP_WRITE_BARRIER:
+			if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) {
+				printk(KERN_WARNING "blkfront: %s: write %s op failed\n",
+				       info->flush_op == BLKIF_OP_WRITE_BARRIER ?
+				       "barrier" :  "flush disk cache",
+				       info->gd->disk_name);
+				error = -EOPNOTSUPP;
+			}
+			if (unlikely(bret->status == BLKIF_RSP_ERROR &&
+				     info->shadow[id].req.nr_segments == 0)) {
+				printk(KERN_WARNING "blkfront: %s: empty write %s op failed\n",
+				       info->flush_op == BLKIF_OP_WRITE_BARRIER ?
+				       "barrier" :  "flush disk cache",
+				       info->gd->disk_name);
+				error = -EOPNOTSUPP;
+			}
+			if (unlikely(error)) {
+				if (error == -EOPNOTSUPP)
+					error = 0;
+				info->feature_flush = 0;
+				info->flush_op = 0;
+				xlvbd_flush(info);
+			}
+			/* fall through */
+		case BLKIF_OP_READ:
+		case BLKIF_OP_WRITE:
+			if (unlikely(bret->status != BLKIF_RSP_OKAY))
+				dev_dbg(&info->xbdev->dev, "Bad return from blkdev data "
+					"request: %x\n", bret->status);
+
+			__blk_end_request_all(req, error);
+			break;
+		default:
+			BUG();
+		}
+	}
+
+	info->ring.rsp_cons = i;
+
+	if (i != info->ring.req_prod_pvt) {
+		int more_to_do;
+		RING_FINAL_CHECK_FOR_RESPONSES(&info->ring, more_to_do);
+		if (more_to_do)
+			goto again;
+	} else
+		info->ring.sring->rsp_event = i + 1;
+
+	kick_pending_request_queues(info);
+
+	spin_unlock_irqrestore(&blkif_io_lock, flags);
+
+	return IRQ_HANDLED;
+}
+
+
+static int setup_blkring(struct xenbus_device *dev,
+			 struct blkfront_info *info)
+{
+	struct blkif_sring *sring;
+	int err;
+
+	info->ring_ref = GRANT_INVALID_REF;
+
+	sring = (struct blkif_sring *)__get_free_page(GFP_NOIO | __GFP_HIGH);
+	if (!sring) {
+		xenbus_dev_fatal(dev, -ENOMEM, "allocating shared ring");
+		return -ENOMEM;
+	}
+	SHARED_RING_INIT(sring);
+	FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE);
+
+	sg_init_table(info->sg, BLKIF_MAX_SEGMENTS_PER_REQUEST);
+
+	err = xenbus_grant_ring(dev, virt_to_mfn(info->ring.sring));
+	if (err < 0) {
+		free_page((unsigned long)sring);
+		info->ring.sring = NULL;
+		goto fail;
+	}
+	info->ring_ref = err;
+
+	err = xenbus_alloc_evtchn(dev, &info->evtchn);
+	if (err)
+		goto fail;
+
+	err = bind_evtchn_to_irqhandler(info->evtchn,
+					blkif_interrupt,
+					IRQF_SAMPLE_RANDOM, "blkif", info);
+	if (err <= 0) {
+		xenbus_dev_fatal(dev, err,
+				 "bind_evtchn_to_irqhandler failed");
+		goto fail;
+	}
+	info->irq = err;
+
+	return 0;
+fail:
+	blkif_free(info, 0);
+	return err;
+}
+
+
+/* Common code used when first setting up, and when resuming. */
+static int talk_to_blkback(struct xenbus_device *dev,
+			   struct blkfront_info *info)
+{
+	const char *message = NULL;
+	struct xenbus_transaction xbt;
+	int err;
+
+	/* Create shared ring, alloc event channel. */
+	err = setup_blkring(dev, info);
+	if (err)
+		goto out;
+
+again:
+	err = xenbus_transaction_start(&xbt);
+	if (err) {
+		xenbus_dev_fatal(dev, err, "starting transaction");
+		goto destroy_blkring;
+	}
+
+	err = xenbus_printf(xbt, dev->nodename,
+			    "ring-ref", "%u", info->ring_ref);
+	if (err) {
+		message = "writing ring-ref";
+		goto abort_transaction;
+	}
+	err = xenbus_printf(xbt, dev->nodename,
+			    "event-channel", "%u", info->evtchn);
+	if (err) {
+		message = "writing event-channel";
+		goto abort_transaction;
+	}
+	err = xenbus_printf(xbt, dev->nodename, "protocol", "%s",
+			    XEN_IO_PROTO_ABI_NATIVE);
+	if (err) {
+		message = "writing protocol";
+		goto abort_transaction;
+	}
+
+	err = xenbus_transaction_end(xbt, 0);
+	if (err) {
+		if (err == -EAGAIN)
+			goto again;
+		xenbus_dev_fatal(dev, err, "completing transaction");
+		goto destroy_blkring;
+	}
+
+	xenbus_switch_state(dev, XenbusStateInitialised);
+
+	return 0;
+
+ abort_transaction:
+	xenbus_transaction_end(xbt, 1);
+	if (message)
+		xenbus_dev_fatal(dev, err, "%s", message);
+ destroy_blkring:
+	blkif_free(info, 0);
+ out:
+	return err;
+}
+
+/**
+ * Entry point to this code when a new device is created.  Allocate the basic
+ * structures and the ring buffer for communication with the backend, and
+ * inform the backend of the appropriate details for those.  Switch to
+ * Initialised state.
+ */
+static int blkfront_probe(struct xenbus_device *dev,
+			  const struct xenbus_device_id *id)
+{
+	int err, vdevice, i;
+	struct blkfront_info *info;
+
+	/* FIXME: Use dynamic device id if this is not set. */
+	err = xenbus_scanf(XBT_NIL, dev->nodename,
+			   "virtual-device", "%i", &vdevice);
+	if (err != 1) {
+		/* go looking in the extended area instead */
+		err = xenbus_scanf(XBT_NIL, dev->nodename, "virtual-device-ext",
+				   "%i", &vdevice);
+		if (err != 1) {
+			xenbus_dev_fatal(dev, err, "reading virtual-device");
+			return err;
+		}
+	}
+
+	if (xen_hvm_domain()) {
+		char *type;
+		int len;
+		/* no unplug has been done: do not hook devices != xen vbds */
+		if (xen_platform_pci_unplug & XEN_UNPLUG_UNNECESSARY) {
+			int major;
+
+			if (!VDEV_IS_EXTENDED(vdevice))
+				major = BLKIF_MAJOR(vdevice);
+			else
+				major = XENVBD_MAJOR;
+
+			if (major != XENVBD_MAJOR) {
+				printk(KERN_INFO
+						"%s: HVM does not support vbd %d as xen block device\n",
+						__FUNCTION__, vdevice);
+				return -ENODEV;
+			}
+		}
+		/* do not create a PV cdrom device if we are an HVM guest */
+		type = xenbus_read(XBT_NIL, dev->nodename, "device-type", &len);
+		if (IS_ERR(type))
+			return -ENODEV;
+		if (strncmp(type, "cdrom", 5) == 0) {
+			kfree(type);
+			return -ENODEV;
+		}
+		kfree(type);
+	}
+	info = kzalloc(sizeof(*info), GFP_KERNEL);
+	if (!info) {
+		xenbus_dev_fatal(dev, -ENOMEM, "allocating info structure");
+		return -ENOMEM;
+	}
+
+	mutex_init(&info->mutex);
+	info->xbdev = dev;
+	info->vdevice = vdevice;
+	info->connected = BLKIF_STATE_DISCONNECTED;
+	INIT_WORK(&info->work, blkif_restart_queue);
+
+	for (i = 0; i < BLK_RING_SIZE; i++)
+		info->shadow[i].req.id = i+1;
+	info->shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff;
+
+	/* Front end dir is a number, which is used as the id. */
+	info->handle = simple_strtoul(strrchr(dev->nodename, '/')+1, NULL, 0);
+	dev_set_drvdata(&dev->dev, info);
+
+	err = talk_to_blkback(dev, info);
+	if (err) {
+		kfree(info);
+		dev_set_drvdata(&dev->dev, NULL);
+		return err;
+	}
+
+	return 0;
+}
+
+
+static int blkif_recover(struct blkfront_info *info)
+{
+	int i;
+	struct blkif_request *req;
+	struct blk_shadow *copy;
+	int j;
+
+	/* Stage 1: Make a safe copy of the shadow state. */
+	copy = kmalloc(sizeof(info->shadow),
+		       GFP_NOIO | __GFP_REPEAT | __GFP_HIGH);
+	if (!copy)
+		return -ENOMEM;
+	memcpy(copy, info->shadow, sizeof(info->shadow));
+
+	/* Stage 2: Set up free list. */
+	memset(&info->shadow, 0, sizeof(info->shadow));
+	for (i = 0; i < BLK_RING_SIZE; i++)
+		info->shadow[i].req.id = i+1;
+	info->shadow_free = info->ring.req_prod_pvt;
+	info->shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff;
+
+	/* Stage 3: Find pending requests and requeue them. */
+	for (i = 0; i < BLK_RING_SIZE; i++) {
+		/* Not in use? */
+		if (!copy[i].request)
+			continue;
+
+		/* Grab a request slot and copy shadow state into it. */
+		req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt);
+		*req = copy[i].req;
+
+		/* We get a new request id, and must reset the shadow state. */
+		req->id = get_id_from_freelist(info);
+		memcpy(&info->shadow[req->id], &copy[i], sizeof(copy[i]));
+
+		/* Rewrite any grant references invalidated by susp/resume. */
+		for (j = 0; j < req->nr_segments; j++)
+			gnttab_grant_foreign_access_ref(
+				req->u.rw.seg[j].gref,
+				info->xbdev->otherend_id,
+				pfn_to_mfn(info->shadow[req->id].frame[j]),
+				rq_data_dir(info->shadow[req->id].request));
+		info->shadow[req->id].req = *req;
+
+		info->ring.req_prod_pvt++;
+	}
+
+	kfree(copy);
+
+	xenbus_switch_state(info->xbdev, XenbusStateConnected);
+
+	spin_lock_irq(&blkif_io_lock);
+
+	/* Now safe for us to use the shared ring */
+	info->connected = BLKIF_STATE_CONNECTED;
+
+	/* Send off requeued requests */
+	flush_requests(info);
+
+	/* Kick any other new requests queued since we resumed */
+	kick_pending_request_queues(info);
+
+	spin_unlock_irq(&blkif_io_lock);
+
+	return 0;
+}
+
+/**
+ * We are reconnecting to the backend, due to a suspend/resume, or a backend
+ * driver restart.  We tear down our blkif structure and recreate it, but
+ * leave the device-layer structures intact so that this is transparent to the
+ * rest of the kernel.
+ */
+static int blkfront_resume(struct xenbus_device *dev)
+{
+	struct blkfront_info *info = dev_get_drvdata(&dev->dev);
+	int err;
+
+	dev_dbg(&dev->dev, "blkfront_resume: %s\n", dev->nodename);
+
+	blkif_free(info, info->connected == BLKIF_STATE_CONNECTED);
+
+	err = talk_to_blkback(dev, info);
+	if (info->connected == BLKIF_STATE_SUSPENDED && !err)
+		err = blkif_recover(info);
+
+	return err;
+}
+
+static void
+blkfront_closing(struct blkfront_info *info)
+{
+	struct xenbus_device *xbdev = info->xbdev;
+	struct block_device *bdev = NULL;
+
+	mutex_lock(&info->mutex);
+
+	if (xbdev->state == XenbusStateClosing) {
+		mutex_unlock(&info->mutex);
+		return;
+	}
+
+	if (info->gd)
+		bdev = bdget_disk(info->gd, 0);
+
+	mutex_unlock(&info->mutex);
+
+	if (!bdev) {
+		xenbus_frontend_closed(xbdev);
+		return;
+	}
+
+	mutex_lock(&bdev->bd_mutex);
+
+	if (bdev->bd_openers) {
+		xenbus_dev_error(xbdev, -EBUSY,
+				 "Device in use; refusing to close");
+		xenbus_switch_state(xbdev, XenbusStateClosing);
+	} else {
+		xlvbd_release_gendisk(info);
+		xenbus_frontend_closed(xbdev);
+	}
+
+	mutex_unlock(&bdev->bd_mutex);
+	bdput(bdev);
+}
+
+/*
+ * Invoked when the backend is finally 'ready' (and has told produced
+ * the details about the physical device - #sectors, size, etc).
+ */
+static void blkfront_connect(struct blkfront_info *info)
+{
+	unsigned long long sectors;
+	unsigned long sector_size;
+	unsigned int binfo;
+	int err;
+	int barrier, flush;
+
+	switch (info->connected) {
+	case BLKIF_STATE_CONNECTED:
+		/*
+		 * Potentially, the back-end may be signalling
+		 * a capacity change; update the capacity.
+		 */
+		err = xenbus_scanf(XBT_NIL, info->xbdev->otherend,
+				   "sectors", "%Lu", &sectors);
+		if (XENBUS_EXIST_ERR(err))
+			return;
+		printk(KERN_INFO "Setting capacity to %Lu\n",
+		       sectors);
+		set_capacity(info->gd, sectors);
+		revalidate_disk(info->gd);
+
+		/* fall through */
+	case BLKIF_STATE_SUSPENDED:
+		return;
+
+	default:
+		break;
+	}
+
+	dev_dbg(&info->xbdev->dev, "%s:%s.\n",
+		__func__, info->xbdev->otherend);
+
+	err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
+			    "sectors", "%llu", &sectors,
+			    "info", "%u", &binfo,
+			    "sector-size", "%lu", &sector_size,
+			    NULL);
+	if (err) {
+		xenbus_dev_fatal(info->xbdev, err,
+				 "reading backend fields at %s",
+				 info->xbdev->otherend);
+		return;
+	}
+
+	info->feature_flush = 0;
+	info->flush_op = 0;
+
+	err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
+			    "feature-barrier", "%d", &barrier,
+			    NULL);
+
+	/*
+	 * If there's no "feature-barrier" defined, then it means
+	 * we're dealing with a very old backend which writes
+	 * synchronously; nothing to do.
+	 *
+	 * If there are barriers, then we use flush.
+	 */
+	if (!err && barrier) {
+		info->feature_flush = REQ_FLUSH | REQ_FUA;
+		info->flush_op = BLKIF_OP_WRITE_BARRIER;
+	}
+	/*
+	 * And if there is "feature-flush-cache" use that above
+	 * barriers.
+	 */
+	err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
+			    "feature-flush-cache", "%d", &flush,
+			    NULL);
+
+	if (!err && flush) {
+		info->feature_flush = REQ_FLUSH;
+		info->flush_op = BLKIF_OP_FLUSH_DISKCACHE;
+	}
+		
+	err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size);
+	if (err) {
+		xenbus_dev_fatal(info->xbdev, err, "xlvbd_add at %s",
+				 info->xbdev->otherend);
+		return;
+	}
+
+	xenbus_switch_state(info->xbdev, XenbusStateConnected);
+
+	/* Kick pending requests. */
+	spin_lock_irq(&blkif_io_lock);
+	info->connected = BLKIF_STATE_CONNECTED;
+	kick_pending_request_queues(info);
+	spin_unlock_irq(&blkif_io_lock);
+
+	add_disk(info->gd);
+
+	info->is_ready = 1;
+}
+
+/**
+ * Callback received when the backend's state changes.
+ */
+static void blkback_changed(struct xenbus_device *dev,
+			    enum xenbus_state backend_state)
+{
+	struct blkfront_info *info = dev_get_drvdata(&dev->dev);
+
+	dev_dbg(&dev->dev, "blkfront:blkback_changed to state %d.\n", backend_state);
+
+	switch (backend_state) {
+	case XenbusStateInitialising:
+	case XenbusStateInitWait:
+	case XenbusStateInitialised:
+	case XenbusStateReconfiguring:
+	case XenbusStateReconfigured:
+	case XenbusStateUnknown:
+	case XenbusStateClosed:
+		break;
+
+	case XenbusStateConnected:
+		blkfront_connect(info);
+		break;
+
+	case XenbusStateClosing:
+		blkfront_closing(info);
+		break;
+	}
+}
+
+static int blkfront_remove(struct xenbus_device *xbdev)
+{
+	struct blkfront_info *info = dev_get_drvdata(&xbdev->dev);
+	struct block_device *bdev = NULL;
+	struct gendisk *disk;
+
+	dev_dbg(&xbdev->dev, "%s removed", xbdev->nodename);
+
+	blkif_free(info, 0);
+
+	mutex_lock(&info->mutex);
+
+	disk = info->gd;
+	if (disk)
+		bdev = bdget_disk(disk, 0);
+
+	info->xbdev = NULL;
+	mutex_unlock(&info->mutex);
+
+	if (!bdev) {
+		kfree(info);
+		return 0;
+	}
+
+	/*
+	 * The xbdev was removed before we reached the Closed
+	 * state. See if it's safe to remove the disk. If the bdev
+	 * isn't closed yet, we let release take care of it.
+	 */
+
+	mutex_lock(&bdev->bd_mutex);
+	info = disk->private_data;
+
+	dev_warn(disk_to_dev(disk),
+		 "%s was hot-unplugged, %d stale handles\n",
+		 xbdev->nodename, bdev->bd_openers);
+
+	if (info && !bdev->bd_openers) {
+		xlvbd_release_gendisk(info);
+		disk->private_data = NULL;
+		kfree(info);
+	}
+
+	mutex_unlock(&bdev->bd_mutex);
+	bdput(bdev);
+
+	return 0;
+}
+
+static int blkfront_is_ready(struct xenbus_device *dev)
+{
+	struct blkfront_info *info = dev_get_drvdata(&dev->dev);
+
+	return info->is_ready && info->xbdev;
+}
+
+static int blkif_open(struct block_device *bdev, fmode_t mode)
+{
+	struct gendisk *disk = bdev->bd_disk;
+	struct blkfront_info *info;
+	int err = 0;
+
+	mutex_lock(&blkfront_mutex);
+
+	info = disk->private_data;
+	if (!info) {
+		/* xbdev gone */
+		err = -ERESTARTSYS;
+		goto out;
+	}
+
+	mutex_lock(&info->mutex);
+
+	if (!info->gd)
+		/* xbdev is closed */
+		err = -ERESTARTSYS;
+
+	mutex_unlock(&info->mutex);
+
+out:
+	mutex_unlock(&blkfront_mutex);
+	return err;
+}
+
+static int blkif_release(struct gendisk *disk, fmode_t mode)
+{
+	struct blkfront_info *info = disk->private_data;
+	struct block_device *bdev;
+	struct xenbus_device *xbdev;
+
+	mutex_lock(&blkfront_mutex);
+
+	bdev = bdget_disk(disk, 0);
+	bdput(bdev);
+
+	if (bdev->bd_openers)
+		goto out;
+
+	/*
+	 * Check if we have been instructed to close. We will have
+	 * deferred this request, because the bdev was still open.
+	 */
+
+	mutex_lock(&info->mutex);
+	xbdev = info->xbdev;
+
+	if (xbdev && xbdev->state == XenbusStateClosing) {
+		/* pending switch to state closed */
+		dev_info(disk_to_dev(bdev->bd_disk), "releasing disk\n");
+		xlvbd_release_gendisk(info);
+		xenbus_frontend_closed(info->xbdev);
+ 	}
+
+	mutex_unlock(&info->mutex);
+
+	if (!xbdev) {
+		/* sudden device removal */
+		dev_info(disk_to_dev(bdev->bd_disk), "releasing disk\n");
+		xlvbd_release_gendisk(info);
+		disk->private_data = NULL;
+		kfree(info);
+	}
+
+out:
+	mutex_unlock(&blkfront_mutex);
+	return 0;
+}
+
+static const struct block_device_operations xlvbd_block_fops =
+{
+	.owner = THIS_MODULE,
+	.open = blkif_open,
+	.release = blkif_release,
+	.getgeo = blkif_getgeo,
+	.ioctl = blkif_ioctl,
+};
+
+
+static const struct xenbus_device_id blkfront_ids[] = {
+	{ "vbd" },
+	{ "" }
+};
+
+static struct xenbus_driver blkfront = {
+	.name = "vbd",
+	.owner = THIS_MODULE,
+	.ids = blkfront_ids,
+	.probe = blkfront_probe,
+	.remove = blkfront_remove,
+	.resume = blkfront_resume,
+	.otherend_changed = blkback_changed,
+	.is_ready = blkfront_is_ready,
+};
+
+static int __init xlblk_init(void)
+{
+	if (!xen_domain())
+		return -ENODEV;
+
+	if (register_blkdev(XENVBD_MAJOR, DEV_NAME)) {
+		printk(KERN_WARNING "xen_blk: can't get major %d with name %s\n",
+		       XENVBD_MAJOR, DEV_NAME);
+		return -ENODEV;
+	}
+
+	return xenbus_register_frontend(&blkfront);
+}
+module_init(xlblk_init);
+
+
+static void __exit xlblk_exit(void)
+{
+	return xenbus_unregister_driver(&blkfront);
+}
+module_exit(xlblk_exit);
+
+MODULE_DESCRIPTION("Xen virtual block device frontend");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_BLOCKDEV_MAJOR(XENVBD_MAJOR);
+MODULE_ALIAS("xen:vbd");
+MODULE_ALIAS("xenblk");
diff --git a/drivers/block/xsysace.c b/drivers/block/xsysace.c
new file mode 100644
index 00000000..6c7fd7db
--- /dev/null
+++ b/drivers/block/xsysace.c
@@ -0,0 +1,1315 @@
+/*
+ * Xilinx SystemACE device driver
+ *
+ * Copyright 2007 Secret Lab Technologies Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+
+/*
+ * The SystemACE chip is designed to configure FPGAs by loading an FPGA
+ * bitstream from a file on a CF card and squirting it into FPGAs connected
+ * to the SystemACE JTAG chain.  It also has the advantage of providing an
+ * MPU interface which can be used to control the FPGA configuration process
+ * and to use the attached CF card for general purpose storage.
+ *
+ * This driver is a block device driver for the SystemACE.
+ *
+ * Initialization:
+ *    The driver registers itself as a platform_device driver at module
+ *    load time.  The platform bus will take care of calling the
+ *    ace_probe() method for all SystemACE instances in the system.  Any
+ *    number of SystemACE instances are supported.  ace_probe() calls
+ *    ace_setup() which initialized all data structures, reads the CF
+ *    id structure and registers the device.
+ *
+ * Processing:
+ *    Just about all of the heavy lifting in this driver is performed by
+ *    a Finite State Machine (FSM).  The driver needs to wait on a number
+ *    of events; some raised by interrupts, some which need to be polled
+ *    for.  Describing all of the behaviour in a FSM seems to be the
+ *    easiest way to keep the complexity low and make it easy to
+ *    understand what the driver is doing.  If the block ops or the
+ *    request function need to interact with the hardware, then they
+ *    simply need to flag the request and kick of FSM processing.
+ *
+ *    The FSM itself is atomic-safe code which can be run from any
+ *    context.  The general process flow is:
+ *    1. obtain the ace->lock spinlock.
+ *    2. loop on ace_fsm_dostate() until the ace->fsm_continue flag is
+ *       cleared.
+ *    3. release the lock.
+ *
+ *    Individual states do not sleep in any way.  If a condition needs to
+ *    be waited for then the state much clear the fsm_continue flag and
+ *    either schedule the FSM to be run again at a later time, or expect
+ *    an interrupt to call the FSM when the desired condition is met.
+ *
+ *    In normal operation, the FSM is processed at interrupt context
+ *    either when the driver's tasklet is scheduled, or when an irq is
+ *    raised by the hardware.  The tasklet can be scheduled at any time.
+ *    The request method in particular schedules the tasklet when a new
+ *    request has been indicated by the block layer.  Once started, the
+ *    FSM proceeds as far as it can processing the request until it
+ *    needs on a hardware event.  At this point, it must yield execution.
+ *
+ *    A state has two options when yielding execution:
+ *    1. ace_fsm_yield()
+ *       - Call if need to poll for event.
+ *       - clears the fsm_continue flag to exit the processing loop
+ *       - reschedules the tasklet to run again as soon as possible
+ *    2. ace_fsm_yieldirq()
+ *       - Call if an irq is expected from the HW
+ *       - clears the fsm_continue flag to exit the processing loop
+ *       - does not reschedule the tasklet so the FSM will not be processed
+ *         again until an irq is received.
+ *    After calling a yield function, the state must return control back
+ *    to the FSM main loop.
+ *
+ *    Additionally, the driver maintains a kernel timer which can process
+ *    the FSM.  If the FSM gets stalled, typically due to a missed
+ *    interrupt, then the kernel timer will expire and the driver can
+ *    continue where it left off.
+ *
+ * To Do:
+ *    - Add FPGA configuration control interface.
+ *    - Request major number from lanana
+ */
+
+#undef DEBUG
+
+#include <linux/module.h>
+#include <linux/ctype.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/delay.h>
+#include <linux/slab.h>
+#include <linux/blkdev.h>
+#include <linux/mutex.h>
+#include <linux/ata.h>
+#include <linux/hdreg.h>
+#include <linux/platform_device.h>
+#if defined(CONFIG_OF)
+#include <linux/of_address.h>
+#include <linux/of_device.h>
+#include <linux/of_platform.h>
+#endif
+
+MODULE_AUTHOR("Grant Likely <grant.likely@secretlab.ca>");
+MODULE_DESCRIPTION("Xilinx SystemACE device driver");
+MODULE_LICENSE("GPL");
+
+/* SystemACE register definitions */
+#define ACE_BUSMODE (0x00)
+
+#define ACE_STATUS (0x04)
+#define ACE_STATUS_CFGLOCK      (0x00000001)
+#define ACE_STATUS_MPULOCK      (0x00000002)
+#define ACE_STATUS_CFGERROR     (0x00000004)	/* config controller error */
+#define ACE_STATUS_CFCERROR     (0x00000008)	/* CF controller error */
+#define ACE_STATUS_CFDETECT     (0x00000010)
+#define ACE_STATUS_DATABUFRDY   (0x00000020)
+#define ACE_STATUS_DATABUFMODE  (0x00000040)
+#define ACE_STATUS_CFGDONE      (0x00000080)
+#define ACE_STATUS_RDYFORCFCMD  (0x00000100)
+#define ACE_STATUS_CFGMODEPIN   (0x00000200)
+#define ACE_STATUS_CFGADDR_MASK (0x0000e000)
+#define ACE_STATUS_CFBSY        (0x00020000)
+#define ACE_STATUS_CFRDY        (0x00040000)
+#define ACE_STATUS_CFDWF        (0x00080000)
+#define ACE_STATUS_CFDSC        (0x00100000)
+#define ACE_STATUS_CFDRQ        (0x00200000)
+#define ACE_STATUS_CFCORR       (0x00400000)
+#define ACE_STATUS_CFERR        (0x00800000)
+
+#define ACE_ERROR (0x08)
+#define ACE_CFGLBA (0x0c)
+#define ACE_MPULBA (0x10)
+
+#define ACE_SECCNTCMD (0x14)
+#define ACE_SECCNTCMD_RESET      (0x0100)
+#define ACE_SECCNTCMD_IDENTIFY   (0x0200)
+#define ACE_SECCNTCMD_READ_DATA  (0x0300)
+#define ACE_SECCNTCMD_WRITE_DATA (0x0400)
+#define ACE_SECCNTCMD_ABORT      (0x0600)
+
+#define ACE_VERSION (0x16)
+#define ACE_VERSION_REVISION_MASK (0x00FF)
+#define ACE_VERSION_MINOR_MASK    (0x0F00)
+#define ACE_VERSION_MAJOR_MASK    (0xF000)
+
+#define ACE_CTRL (0x18)
+#define ACE_CTRL_FORCELOCKREQ   (0x0001)
+#define ACE_CTRL_LOCKREQ        (0x0002)
+#define ACE_CTRL_FORCECFGADDR   (0x0004)
+#define ACE_CTRL_FORCECFGMODE   (0x0008)
+#define ACE_CTRL_CFGMODE        (0x0010)
+#define ACE_CTRL_CFGSTART       (0x0020)
+#define ACE_CTRL_CFGSEL         (0x0040)
+#define ACE_CTRL_CFGRESET       (0x0080)
+#define ACE_CTRL_DATABUFRDYIRQ  (0x0100)
+#define ACE_CTRL_ERRORIRQ       (0x0200)
+#define ACE_CTRL_CFGDONEIRQ     (0x0400)
+#define ACE_CTRL_RESETIRQ       (0x0800)
+#define ACE_CTRL_CFGPROG        (0x1000)
+#define ACE_CTRL_CFGADDR_MASK   (0xe000)
+
+#define ACE_FATSTAT (0x1c)
+
+#define ACE_NUM_MINORS 16
+#define ACE_SECTOR_SIZE (512)
+#define ACE_FIFO_SIZE (32)
+#define ACE_BUF_PER_SECTOR (ACE_SECTOR_SIZE / ACE_FIFO_SIZE)
+
+#define ACE_BUS_WIDTH_8  0
+#define ACE_BUS_WIDTH_16 1
+
+struct ace_reg_ops;
+
+struct ace_device {
+	/* driver state data */
+	int id;
+	int media_change;
+	int users;
+	struct list_head list;
+
+	/* finite state machine data */
+	struct tasklet_struct fsm_tasklet;
+	uint fsm_task;		/* Current activity (ACE_TASK_*) */
+	uint fsm_state;		/* Current state (ACE_FSM_STATE_*) */
+	uint fsm_continue_flag;	/* cleared to exit FSM mainloop */
+	uint fsm_iter_num;
+	struct timer_list stall_timer;
+
+	/* Transfer state/result, use for both id and block request */
+	struct request *req;	/* request being processed */
+	void *data_ptr;		/* pointer to I/O buffer */
+	int data_count;		/* number of buffers remaining */
+	int data_result;	/* Result of transfer; 0 := success */
+
+	int id_req_count;	/* count of id requests */
+	int id_result;
+	struct completion id_completion;	/* used when id req finishes */
+	int in_irq;
+
+	/* Details of hardware device */
+	resource_size_t physaddr;
+	void __iomem *baseaddr;
+	int irq;
+	int bus_width;		/* 0 := 8 bit; 1 := 16 bit */
+	struct ace_reg_ops *reg_ops;
+	int lock_count;
+
+	/* Block device data structures */
+	spinlock_t lock;
+	struct device *dev;
+	struct request_queue *queue;
+	struct gendisk *gd;
+
+	/* Inserted CF card parameters */
+	u16 cf_id[ATA_ID_WORDS];
+};
+
+static DEFINE_MUTEX(xsysace_mutex);
+static int ace_major;
+
+/* ---------------------------------------------------------------------
+ * Low level register access
+ */
+
+struct ace_reg_ops {
+	u16(*in) (struct ace_device * ace, int reg);
+	void (*out) (struct ace_device * ace, int reg, u16 val);
+	void (*datain) (struct ace_device * ace);
+	void (*dataout) (struct ace_device * ace);
+};
+
+/* 8 Bit bus width */
+static u16 ace_in_8(struct ace_device *ace, int reg)
+{
+	void __iomem *r = ace->baseaddr + reg;
+	return in_8(r) | (in_8(r + 1) << 8);
+}
+
+static void ace_out_8(struct ace_device *ace, int reg, u16 val)
+{
+	void __iomem *r = ace->baseaddr + reg;
+	out_8(r, val);
+	out_8(r + 1, val >> 8);
+}
+
+static void ace_datain_8(struct ace_device *ace)
+{
+	void __iomem *r = ace->baseaddr + 0x40;
+	u8 *dst = ace->data_ptr;
+	int i = ACE_FIFO_SIZE;
+	while (i--)
+		*dst++ = in_8(r++);
+	ace->data_ptr = dst;
+}
+
+static void ace_dataout_8(struct ace_device *ace)
+{
+	void __iomem *r = ace->baseaddr + 0x40;
+	u8 *src = ace->data_ptr;
+	int i = ACE_FIFO_SIZE;
+	while (i--)
+		out_8(r++, *src++);
+	ace->data_ptr = src;
+}
+
+static struct ace_reg_ops ace_reg_8_ops = {
+	.in = ace_in_8,
+	.out = ace_out_8,
+	.datain = ace_datain_8,
+	.dataout = ace_dataout_8,
+};
+
+/* 16 bit big endian bus attachment */
+static u16 ace_in_be16(struct ace_device *ace, int reg)
+{
+	return in_be16(ace->baseaddr + reg);
+}
+
+static void ace_out_be16(struct ace_device *ace, int reg, u16 val)
+{
+	out_be16(ace->baseaddr + reg, val);
+}
+
+static void ace_datain_be16(struct ace_device *ace)
+{
+	int i = ACE_FIFO_SIZE / 2;
+	u16 *dst = ace->data_ptr;
+	while (i--)
+		*dst++ = in_le16(ace->baseaddr + 0x40);
+	ace->data_ptr = dst;
+}
+
+static void ace_dataout_be16(struct ace_device *ace)
+{
+	int i = ACE_FIFO_SIZE / 2;
+	u16 *src = ace->data_ptr;
+	while (i--)
+		out_le16(ace->baseaddr + 0x40, *src++);
+	ace->data_ptr = src;
+}
+
+/* 16 bit little endian bus attachment */
+static u16 ace_in_le16(struct ace_device *ace, int reg)
+{
+	return in_le16(ace->baseaddr + reg);
+}
+
+static void ace_out_le16(struct ace_device *ace, int reg, u16 val)
+{
+	out_le16(ace->baseaddr + reg, val);
+}
+
+static void ace_datain_le16(struct ace_device *ace)
+{
+	int i = ACE_FIFO_SIZE / 2;
+	u16 *dst = ace->data_ptr;
+	while (i--)
+		*dst++ = in_be16(ace->baseaddr + 0x40);
+	ace->data_ptr = dst;
+}
+
+static void ace_dataout_le16(struct ace_device *ace)
+{
+	int i = ACE_FIFO_SIZE / 2;
+	u16 *src = ace->data_ptr;
+	while (i--)
+		out_be16(ace->baseaddr + 0x40, *src++);
+	ace->data_ptr = src;
+}
+
+static struct ace_reg_ops ace_reg_be16_ops = {
+	.in = ace_in_be16,
+	.out = ace_out_be16,
+	.datain = ace_datain_be16,
+	.dataout = ace_dataout_be16,
+};
+
+static struct ace_reg_ops ace_reg_le16_ops = {
+	.in = ace_in_le16,
+	.out = ace_out_le16,
+	.datain = ace_datain_le16,
+	.dataout = ace_dataout_le16,
+};
+
+static inline u16 ace_in(struct ace_device *ace, int reg)
+{
+	return ace->reg_ops->in(ace, reg);
+}
+
+static inline u32 ace_in32(struct ace_device *ace, int reg)
+{
+	return ace_in(ace, reg) | (ace_in(ace, reg + 2) << 16);
+}
+
+static inline void ace_out(struct ace_device *ace, int reg, u16 val)
+{
+	ace->reg_ops->out(ace, reg, val);
+}
+
+static inline void ace_out32(struct ace_device *ace, int reg, u32 val)
+{
+	ace_out(ace, reg, val);
+	ace_out(ace, reg + 2, val >> 16);
+}
+
+/* ---------------------------------------------------------------------
+ * Debug support functions
+ */
+
+#if defined(DEBUG)
+static void ace_dump_mem(void *base, int len)
+{
+	const char *ptr = base;
+	int i, j;
+
+	for (i = 0; i < len; i += 16) {
+		printk(KERN_INFO "%.8x:", i);
+		for (j = 0; j < 16; j++) {
+			if (!(j % 4))
+				printk(" ");
+			printk("%.2x", ptr[i + j]);
+		}
+		printk(" ");
+		for (j = 0; j < 16; j++)
+			printk("%c", isprint(ptr[i + j]) ? ptr[i + j] : '.');
+		printk("\n");
+	}
+}
+#else
+static inline void ace_dump_mem(void *base, int len)
+{
+}
+#endif
+
+static void ace_dump_regs(struct ace_device *ace)
+{
+	dev_info(ace->dev,
+		 "    ctrl:  %.8x  seccnt/cmd: %.4x      ver:%.4x\n"
+		 "    status:%.8x  mpu_lba:%.8x  busmode:%4x\n"
+		 "    error: %.8x  cfg_lba:%.8x  fatstat:%.4x\n",
+		 ace_in32(ace, ACE_CTRL),
+		 ace_in(ace, ACE_SECCNTCMD),
+		 ace_in(ace, ACE_VERSION),
+		 ace_in32(ace, ACE_STATUS),
+		 ace_in32(ace, ACE_MPULBA),
+		 ace_in(ace, ACE_BUSMODE),
+		 ace_in32(ace, ACE_ERROR),
+		 ace_in32(ace, ACE_CFGLBA), ace_in(ace, ACE_FATSTAT));
+}
+
+void ace_fix_driveid(u16 *id)
+{
+#if defined(__BIG_ENDIAN)
+	int i;
+
+	/* All half words have wrong byte order; swap the bytes */
+	for (i = 0; i < ATA_ID_WORDS; i++, id++)
+		*id = le16_to_cpu(*id);
+#endif
+}
+
+/* ---------------------------------------------------------------------
+ * Finite State Machine (FSM) implementation
+ */
+
+/* FSM tasks; used to direct state transitions */
+#define ACE_TASK_IDLE      0
+#define ACE_TASK_IDENTIFY  1
+#define ACE_TASK_READ      2
+#define ACE_TASK_WRITE     3
+#define ACE_FSM_NUM_TASKS  4
+
+/* FSM state definitions */
+#define ACE_FSM_STATE_IDLE               0
+#define ACE_FSM_STATE_REQ_LOCK           1
+#define ACE_FSM_STATE_WAIT_LOCK          2
+#define ACE_FSM_STATE_WAIT_CFREADY       3
+#define ACE_FSM_STATE_IDENTIFY_PREPARE   4
+#define ACE_FSM_STATE_IDENTIFY_TRANSFER  5
+#define ACE_FSM_STATE_IDENTIFY_COMPLETE  6
+#define ACE_FSM_STATE_REQ_PREPARE        7
+#define ACE_FSM_STATE_REQ_TRANSFER       8
+#define ACE_FSM_STATE_REQ_COMPLETE       9
+#define ACE_FSM_STATE_ERROR             10
+#define ACE_FSM_NUM_STATES              11
+
+/* Set flag to exit FSM loop and reschedule tasklet */
+static inline void ace_fsm_yield(struct ace_device *ace)
+{
+	dev_dbg(ace->dev, "ace_fsm_yield()\n");
+	tasklet_schedule(&ace->fsm_tasklet);
+	ace->fsm_continue_flag = 0;
+}
+
+/* Set flag to exit FSM loop and wait for IRQ to reschedule tasklet */
+static inline void ace_fsm_yieldirq(struct ace_device *ace)
+{
+	dev_dbg(ace->dev, "ace_fsm_yieldirq()\n");
+
+	if (ace->irq == NO_IRQ)
+		/* No IRQ assigned, so need to poll */
+		tasklet_schedule(&ace->fsm_tasklet);
+	ace->fsm_continue_flag = 0;
+}
+
+/* Get the next read/write request; ending requests that we don't handle */
+struct request *ace_get_next_request(struct request_queue * q)
+{
+	struct request *req;
+
+	while ((req = blk_peek_request(q)) != NULL) {
+		if (req->cmd_type == REQ_TYPE_FS)
+			break;
+		blk_start_request(req);
+		__blk_end_request_all(req, -EIO);
+	}
+	return req;
+}
+
+static void ace_fsm_dostate(struct ace_device *ace)
+{
+	struct request *req;
+	u32 status;
+	u16 val;
+	int count;
+
+#if defined(DEBUG)
+	dev_dbg(ace->dev, "fsm_state=%i, id_req_count=%i\n",
+		ace->fsm_state, ace->id_req_count);
+#endif
+
+	/* Verify that there is actually a CF in the slot. If not, then
+	 * bail out back to the idle state and wake up all the waiters */
+	status = ace_in32(ace, ACE_STATUS);
+	if ((status & ACE_STATUS_CFDETECT) == 0) {
+		ace->fsm_state = ACE_FSM_STATE_IDLE;
+		ace->media_change = 1;
+		set_capacity(ace->gd, 0);
+		dev_info(ace->dev, "No CF in slot\n");
+
+		/* Drop all in-flight and pending requests */
+		if (ace->req) {
+			__blk_end_request_all(ace->req, -EIO);
+			ace->req = NULL;
+		}
+		while ((req = blk_fetch_request(ace->queue)) != NULL)
+			__blk_end_request_all(req, -EIO);
+
+		/* Drop back to IDLE state and notify waiters */
+		ace->fsm_state = ACE_FSM_STATE_IDLE;
+		ace->id_result = -EIO;
+		while (ace->id_req_count) {
+			complete(&ace->id_completion);
+			ace->id_req_count--;
+		}
+	}
+
+	switch (ace->fsm_state) {
+	case ACE_FSM_STATE_IDLE:
+		/* See if there is anything to do */
+		if (ace->id_req_count || ace_get_next_request(ace->queue)) {
+			ace->fsm_iter_num++;
+			ace->fsm_state = ACE_FSM_STATE_REQ_LOCK;
+			mod_timer(&ace->stall_timer, jiffies + HZ);
+			if (!timer_pending(&ace->stall_timer))
+				add_timer(&ace->stall_timer);
+			break;
+		}
+		del_timer(&ace->stall_timer);
+		ace->fsm_continue_flag = 0;
+		break;
+
+	case ACE_FSM_STATE_REQ_LOCK:
+		if (ace_in(ace, ACE_STATUS) & ACE_STATUS_MPULOCK) {
+			/* Already have the lock, jump to next state */
+			ace->fsm_state = ACE_FSM_STATE_WAIT_CFREADY;
+			break;
+		}
+
+		/* Request the lock */
+		val = ace_in(ace, ACE_CTRL);
+		ace_out(ace, ACE_CTRL, val | ACE_CTRL_LOCKREQ);
+		ace->fsm_state = ACE_FSM_STATE_WAIT_LOCK;
+		break;
+
+	case ACE_FSM_STATE_WAIT_LOCK:
+		if (ace_in(ace, ACE_STATUS) & ACE_STATUS_MPULOCK) {
+			/* got the lock; move to next state */
+			ace->fsm_state = ACE_FSM_STATE_WAIT_CFREADY;
+			break;
+		}
+
+		/* wait a bit for the lock */
+		ace_fsm_yield(ace);
+		break;
+
+	case ACE_FSM_STATE_WAIT_CFREADY:
+		status = ace_in32(ace, ACE_STATUS);
+		if (!(status & ACE_STATUS_RDYFORCFCMD) ||
+		    (status & ACE_STATUS_CFBSY)) {
+			/* CF card isn't ready; it needs to be polled */
+			ace_fsm_yield(ace);
+			break;
+		}
+
+		/* Device is ready for command; determine what to do next */
+		if (ace->id_req_count)
+			ace->fsm_state = ACE_FSM_STATE_IDENTIFY_PREPARE;
+		else
+			ace->fsm_state = ACE_FSM_STATE_REQ_PREPARE;
+		break;
+
+	case ACE_FSM_STATE_IDENTIFY_PREPARE:
+		/* Send identify command */
+		ace->fsm_task = ACE_TASK_IDENTIFY;
+		ace->data_ptr = ace->cf_id;
+		ace->data_count = ACE_BUF_PER_SECTOR;
+		ace_out(ace, ACE_SECCNTCMD, ACE_SECCNTCMD_IDENTIFY);
+
+		/* As per datasheet, put config controller in reset */
+		val = ace_in(ace, ACE_CTRL);
+		ace_out(ace, ACE_CTRL, val | ACE_CTRL_CFGRESET);
+
+		/* irq handler takes over from this point; wait for the
+		 * transfer to complete */
+		ace->fsm_state = ACE_FSM_STATE_IDENTIFY_TRANSFER;
+		ace_fsm_yieldirq(ace);
+		break;
+
+	case ACE_FSM_STATE_IDENTIFY_TRANSFER:
+		/* Check that the sysace is ready to receive data */
+		status = ace_in32(ace, ACE_STATUS);
+		if (status & ACE_STATUS_CFBSY) {
+			dev_dbg(ace->dev, "CFBSY set; t=%i iter=%i dc=%i\n",
+				ace->fsm_task, ace->fsm_iter_num,
+				ace->data_count);
+			ace_fsm_yield(ace);
+			break;
+		}
+		if (!(status & ACE_STATUS_DATABUFRDY)) {
+			ace_fsm_yield(ace);
+			break;
+		}
+
+		/* Transfer the next buffer */
+		ace->reg_ops->datain(ace);
+		ace->data_count--;
+
+		/* If there are still buffers to be transfers; jump out here */
+		if (ace->data_count != 0) {
+			ace_fsm_yieldirq(ace);
+			break;
+		}
+
+		/* transfer finished; kick state machine */
+		dev_dbg(ace->dev, "identify finished\n");
+		ace->fsm_state = ACE_FSM_STATE_IDENTIFY_COMPLETE;
+		break;
+
+	case ACE_FSM_STATE_IDENTIFY_COMPLETE:
+		ace_fix_driveid(ace->cf_id);
+		ace_dump_mem(ace->cf_id, 512);	/* Debug: Dump out disk ID */
+
+		if (ace->data_result) {
+			/* Error occurred, disable the disk */
+			ace->media_change = 1;
+			set_capacity(ace->gd, 0);
+			dev_err(ace->dev, "error fetching CF id (%i)\n",
+				ace->data_result);
+		} else {
+			ace->media_change = 0;
+
+			/* Record disk parameters */
+			set_capacity(ace->gd,
+				ata_id_u32(ace->cf_id, ATA_ID_LBA_CAPACITY));
+			dev_info(ace->dev, "capacity: %i sectors\n",
+				ata_id_u32(ace->cf_id, ATA_ID_LBA_CAPACITY));
+		}
+
+		/* We're done, drop to IDLE state and notify waiters */
+		ace->fsm_state = ACE_FSM_STATE_IDLE;
+		ace->id_result = ace->data_result;
+		while (ace->id_req_count) {
+			complete(&ace->id_completion);
+			ace->id_req_count--;
+		}
+		break;
+
+	case ACE_FSM_STATE_REQ_PREPARE:
+		req = ace_get_next_request(ace->queue);
+		if (!req) {
+			ace->fsm_state = ACE_FSM_STATE_IDLE;
+			break;
+		}
+		blk_start_request(req);
+
+		/* Okay, it's a data request, set it up for transfer */
+		dev_dbg(ace->dev,
+			"request: sec=%llx hcnt=%x, ccnt=%x, dir=%i\n",
+			(unsigned long long)blk_rq_pos(req),
+			blk_rq_sectors(req), blk_rq_cur_sectors(req),
+			rq_data_dir(req));
+
+		ace->req = req;
+		ace->data_ptr = req->buffer;
+		ace->data_count = blk_rq_cur_sectors(req) * ACE_BUF_PER_SECTOR;
+		ace_out32(ace, ACE_MPULBA, blk_rq_pos(req) & 0x0FFFFFFF);
+
+		count = blk_rq_sectors(req);
+		if (rq_data_dir(req)) {
+			/* Kick off write request */
+			dev_dbg(ace->dev, "write data\n");
+			ace->fsm_task = ACE_TASK_WRITE;
+			ace_out(ace, ACE_SECCNTCMD,
+				count | ACE_SECCNTCMD_WRITE_DATA);
+		} else {
+			/* Kick off read request */
+			dev_dbg(ace->dev, "read data\n");
+			ace->fsm_task = ACE_TASK_READ;
+			ace_out(ace, ACE_SECCNTCMD,
+				count | ACE_SECCNTCMD_READ_DATA);
+		}
+
+		/* As per datasheet, put config controller in reset */
+		val = ace_in(ace, ACE_CTRL);
+		ace_out(ace, ACE_CTRL, val | ACE_CTRL_CFGRESET);
+
+		/* Move to the transfer state.  The systemace will raise
+		 * an interrupt once there is something to do
+		 */
+		ace->fsm_state = ACE_FSM_STATE_REQ_TRANSFER;
+		if (ace->fsm_task == ACE_TASK_READ)
+			ace_fsm_yieldirq(ace);	/* wait for data ready */
+		break;
+
+	case ACE_FSM_STATE_REQ_TRANSFER:
+		/* Check that the sysace is ready to receive data */
+		status = ace_in32(ace, ACE_STATUS);
+		if (status & ACE_STATUS_CFBSY) {
+			dev_dbg(ace->dev,
+				"CFBSY set; t=%i iter=%i c=%i dc=%i irq=%i\n",
+				ace->fsm_task, ace->fsm_iter_num,
+				blk_rq_cur_sectors(ace->req) * 16,
+				ace->data_count, ace->in_irq);
+			ace_fsm_yield(ace);	/* need to poll CFBSY bit */
+			break;
+		}
+		if (!(status & ACE_STATUS_DATABUFRDY)) {
+			dev_dbg(ace->dev,
+				"DATABUF not set; t=%i iter=%i c=%i dc=%i irq=%i\n",
+				ace->fsm_task, ace->fsm_iter_num,
+				blk_rq_cur_sectors(ace->req) * 16,
+				ace->data_count, ace->in_irq);
+			ace_fsm_yieldirq(ace);
+			break;
+		}
+
+		/* Transfer the next buffer */
+		if (ace->fsm_task == ACE_TASK_WRITE)
+			ace->reg_ops->dataout(ace);
+		else
+			ace->reg_ops->datain(ace);
+		ace->data_count--;
+
+		/* If there are still buffers to be transfers; jump out here */
+		if (ace->data_count != 0) {
+			ace_fsm_yieldirq(ace);
+			break;
+		}
+
+		/* bio finished; is there another one? */
+		if (__blk_end_request_cur(ace->req, 0)) {
+			/* dev_dbg(ace->dev, "next block; h=%u c=%u\n",
+			 *      blk_rq_sectors(ace->req),
+			 *      blk_rq_cur_sectors(ace->req));
+			 */
+			ace->data_ptr = ace->req->buffer;
+			ace->data_count = blk_rq_cur_sectors(ace->req) * 16;
+			ace_fsm_yieldirq(ace);
+			break;
+		}
+
+		ace->fsm_state = ACE_FSM_STATE_REQ_COMPLETE;
+		break;
+
+	case ACE_FSM_STATE_REQ_COMPLETE:
+		ace->req = NULL;
+
+		/* Finished request; go to idle state */
+		ace->fsm_state = ACE_FSM_STATE_IDLE;
+		break;
+
+	default:
+		ace->fsm_state = ACE_FSM_STATE_IDLE;
+		break;
+	}
+}
+
+static void ace_fsm_tasklet(unsigned long data)
+{
+	struct ace_device *ace = (void *)data;
+	unsigned long flags;
+
+	spin_lock_irqsave(&ace->lock, flags);
+
+	/* Loop over state machine until told to stop */
+	ace->fsm_continue_flag = 1;
+	while (ace->fsm_continue_flag)
+		ace_fsm_dostate(ace);
+
+	spin_unlock_irqrestore(&ace->lock, flags);
+}
+
+static void ace_stall_timer(unsigned long data)
+{
+	struct ace_device *ace = (void *)data;
+	unsigned long flags;
+
+	dev_warn(ace->dev,
+		 "kicking stalled fsm; state=%i task=%i iter=%i dc=%i\n",
+		 ace->fsm_state, ace->fsm_task, ace->fsm_iter_num,
+		 ace->data_count);
+	spin_lock_irqsave(&ace->lock, flags);
+
+	/* Rearm the stall timer *before* entering FSM (which may then
+	 * delete the timer) */
+	mod_timer(&ace->stall_timer, jiffies + HZ);
+
+	/* Loop over state machine until told to stop */
+	ace->fsm_continue_flag = 1;
+	while (ace->fsm_continue_flag)
+		ace_fsm_dostate(ace);
+
+	spin_unlock_irqrestore(&ace->lock, flags);
+}
+
+/* ---------------------------------------------------------------------
+ * Interrupt handling routines
+ */
+static int ace_interrupt_checkstate(struct ace_device *ace)
+{
+	u32 sreg = ace_in32(ace, ACE_STATUS);
+	u16 creg = ace_in(ace, ACE_CTRL);
+
+	/* Check for error occurrence */
+	if ((sreg & (ACE_STATUS_CFGERROR | ACE_STATUS_CFCERROR)) &&
+	    (creg & ACE_CTRL_ERRORIRQ)) {
+		dev_err(ace->dev, "transfer failure\n");
+		ace_dump_regs(ace);
+		return -EIO;
+	}
+
+	return 0;
+}
+
+static irqreturn_t ace_interrupt(int irq, void *dev_id)
+{
+	u16 creg;
+	struct ace_device *ace = dev_id;
+
+	/* be safe and get the lock */
+	spin_lock(&ace->lock);
+	ace->in_irq = 1;
+
+	/* clear the interrupt */
+	creg = ace_in(ace, ACE_CTRL);
+	ace_out(ace, ACE_CTRL, creg | ACE_CTRL_RESETIRQ);
+	ace_out(ace, ACE_CTRL, creg);
+
+	/* check for IO failures */
+	if (ace_interrupt_checkstate(ace))
+		ace->data_result = -EIO;
+
+	if (ace->fsm_task == 0) {
+		dev_err(ace->dev,
+			"spurious irq; stat=%.8x ctrl=%.8x cmd=%.4x\n",
+			ace_in32(ace, ACE_STATUS), ace_in32(ace, ACE_CTRL),
+			ace_in(ace, ACE_SECCNTCMD));
+		dev_err(ace->dev, "fsm_task=%i fsm_state=%i data_count=%i\n",
+			ace->fsm_task, ace->fsm_state, ace->data_count);
+	}
+
+	/* Loop over state machine until told to stop */
+	ace->fsm_continue_flag = 1;
+	while (ace->fsm_continue_flag)
+		ace_fsm_dostate(ace);
+
+	/* done with interrupt; drop the lock */
+	ace->in_irq = 0;
+	spin_unlock(&ace->lock);
+
+	return IRQ_HANDLED;
+}
+
+/* ---------------------------------------------------------------------
+ * Block ops
+ */
+static void ace_request(struct request_queue * q)
+{
+	struct request *req;
+	struct ace_device *ace;
+
+	req = ace_get_next_request(q);
+
+	if (req) {
+		ace = req->rq_disk->private_data;
+		tasklet_schedule(&ace->fsm_tasklet);
+	}
+}
+
+static unsigned int ace_check_events(struct gendisk *gd, unsigned int clearing)
+{
+	struct ace_device *ace = gd->private_data;
+	dev_dbg(ace->dev, "ace_check_events(): %i\n", ace->media_change);
+
+	return ace->media_change ? DISK_EVENT_MEDIA_CHANGE : 0;
+}
+
+static int ace_revalidate_disk(struct gendisk *gd)
+{
+	struct ace_device *ace = gd->private_data;
+	unsigned long flags;
+
+	dev_dbg(ace->dev, "ace_revalidate_disk()\n");
+
+	if (ace->media_change) {
+		dev_dbg(ace->dev, "requesting cf id and scheduling tasklet\n");
+
+		spin_lock_irqsave(&ace->lock, flags);
+		ace->id_req_count++;
+		spin_unlock_irqrestore(&ace->lock, flags);
+
+		tasklet_schedule(&ace->fsm_tasklet);
+		wait_for_completion(&ace->id_completion);
+	}
+
+	dev_dbg(ace->dev, "revalidate complete\n");
+	return ace->id_result;
+}
+
+static int ace_open(struct block_device *bdev, fmode_t mode)
+{
+	struct ace_device *ace = bdev->bd_disk->private_data;
+	unsigned long flags;
+
+	dev_dbg(ace->dev, "ace_open() users=%i\n", ace->users + 1);
+
+	mutex_lock(&xsysace_mutex);
+	spin_lock_irqsave(&ace->lock, flags);
+	ace->users++;
+	spin_unlock_irqrestore(&ace->lock, flags);
+
+	check_disk_change(bdev);
+	mutex_unlock(&xsysace_mutex);
+
+	return 0;
+}
+
+static int ace_release(struct gendisk *disk, fmode_t mode)
+{
+	struct ace_device *ace = disk->private_data;
+	unsigned long flags;
+	u16 val;
+
+	dev_dbg(ace->dev, "ace_release() users=%i\n", ace->users - 1);
+
+	mutex_lock(&xsysace_mutex);
+	spin_lock_irqsave(&ace->lock, flags);
+	ace->users--;
+	if (ace->users == 0) {
+		val = ace_in(ace, ACE_CTRL);
+		ace_out(ace, ACE_CTRL, val & ~ACE_CTRL_LOCKREQ);
+	}
+	spin_unlock_irqrestore(&ace->lock, flags);
+	mutex_unlock(&xsysace_mutex);
+	return 0;
+}
+
+static int ace_getgeo(struct block_device *bdev, struct hd_geometry *geo)
+{
+	struct ace_device *ace = bdev->bd_disk->private_data;
+	u16 *cf_id = ace->cf_id;
+
+	dev_dbg(ace->dev, "ace_getgeo()\n");
+
+	geo->heads	= cf_id[ATA_ID_HEADS];
+	geo->sectors	= cf_id[ATA_ID_SECTORS];
+	geo->cylinders	= cf_id[ATA_ID_CYLS];
+
+	return 0;
+}
+
+static const struct block_device_operations ace_fops = {
+	.owner = THIS_MODULE,
+	.open = ace_open,
+	.release = ace_release,
+	.check_events = ace_check_events,
+	.revalidate_disk = ace_revalidate_disk,
+	.getgeo = ace_getgeo,
+};
+
+/* --------------------------------------------------------------------
+ * SystemACE device setup/teardown code
+ */
+static int __devinit ace_setup(struct ace_device *ace)
+{
+	u16 version;
+	u16 val;
+	int rc;
+
+	dev_dbg(ace->dev, "ace_setup(ace=0x%p)\n", ace);
+	dev_dbg(ace->dev, "physaddr=0x%llx irq=%i\n",
+		(unsigned long long)ace->physaddr, ace->irq);
+
+	spin_lock_init(&ace->lock);
+	init_completion(&ace->id_completion);
+
+	/*
+	 * Map the device
+	 */
+	ace->baseaddr = ioremap(ace->physaddr, 0x80);
+	if (!ace->baseaddr)
+		goto err_ioremap;
+
+	/*
+	 * Initialize the state machine tasklet and stall timer
+	 */
+	tasklet_init(&ace->fsm_tasklet, ace_fsm_tasklet, (unsigned long)ace);
+	setup_timer(&ace->stall_timer, ace_stall_timer, (unsigned long)ace);
+
+	/*
+	 * Initialize the request queue
+	 */
+	ace->queue = blk_init_queue(ace_request, &ace->lock);
+	if (ace->queue == NULL)
+		goto err_blk_initq;
+	blk_queue_logical_block_size(ace->queue, 512);
+
+	/*
+	 * Allocate and initialize GD structure
+	 */
+	ace->gd = alloc_disk(ACE_NUM_MINORS);
+	if (!ace->gd)
+		goto err_alloc_disk;
+
+	ace->gd->major = ace_major;
+	ace->gd->first_minor = ace->id * ACE_NUM_MINORS;
+	ace->gd->fops = &ace_fops;
+	ace->gd->queue = ace->queue;
+	ace->gd->private_data = ace;
+	snprintf(ace->gd->disk_name, 32, "xs%c", ace->id + 'a');
+
+	/* set bus width */
+	if (ace->bus_width == ACE_BUS_WIDTH_16) {
+		/* 0x0101 should work regardless of endianess */
+		ace_out_le16(ace, ACE_BUSMODE, 0x0101);
+
+		/* read it back to determine endianess */
+		if (ace_in_le16(ace, ACE_BUSMODE) == 0x0001)
+			ace->reg_ops = &ace_reg_le16_ops;
+		else
+			ace->reg_ops = &ace_reg_be16_ops;
+	} else {
+		ace_out_8(ace, ACE_BUSMODE, 0x00);
+		ace->reg_ops = &ace_reg_8_ops;
+	}
+
+	/* Make sure version register is sane */
+	version = ace_in(ace, ACE_VERSION);
+	if ((version == 0) || (version == 0xFFFF))
+		goto err_read;
+
+	/* Put sysace in a sane state by clearing most control reg bits */
+	ace_out(ace, ACE_CTRL, ACE_CTRL_FORCECFGMODE |
+		ACE_CTRL_DATABUFRDYIRQ | ACE_CTRL_ERRORIRQ);
+
+	/* Now we can hook up the irq handler */
+	if (ace->irq != NO_IRQ) {
+		rc = request_irq(ace->irq, ace_interrupt, 0, "systemace", ace);
+		if (rc) {
+			/* Failure - fall back to polled mode */
+			dev_err(ace->dev, "request_irq failed\n");
+			ace->irq = NO_IRQ;
+		}
+	}
+
+	/* Enable interrupts */
+	val = ace_in(ace, ACE_CTRL);
+	val |= ACE_CTRL_DATABUFRDYIRQ | ACE_CTRL_ERRORIRQ;
+	ace_out(ace, ACE_CTRL, val);
+
+	/* Print the identification */
+	dev_info(ace->dev, "Xilinx SystemACE revision %i.%i.%i\n",
+		 (version >> 12) & 0xf, (version >> 8) & 0x0f, version & 0xff);
+	dev_dbg(ace->dev, "physaddr 0x%llx, mapped to 0x%p, irq=%i\n",
+		(unsigned long long) ace->physaddr, ace->baseaddr, ace->irq);
+
+	ace->media_change = 1;
+	ace_revalidate_disk(ace->gd);
+
+	/* Make the sysace device 'live' */
+	add_disk(ace->gd);
+
+	return 0;
+
+err_read:
+	put_disk(ace->gd);
+err_alloc_disk:
+	blk_cleanup_queue(ace->queue);
+err_blk_initq:
+	iounmap(ace->baseaddr);
+err_ioremap:
+	dev_info(ace->dev, "xsysace: error initializing device at 0x%llx\n",
+		 (unsigned long long) ace->physaddr);
+	return -ENOMEM;
+}
+
+static void __devexit ace_teardown(struct ace_device *ace)
+{
+	if (ace->gd) {
+		del_gendisk(ace->gd);
+		put_disk(ace->gd);
+	}
+
+	if (ace->queue)
+		blk_cleanup_queue(ace->queue);
+
+	tasklet_kill(&ace->fsm_tasklet);
+
+	if (ace->irq != NO_IRQ)
+		free_irq(ace->irq, ace);
+
+	iounmap(ace->baseaddr);
+}
+
+static int __devinit
+ace_alloc(struct device *dev, int id, resource_size_t physaddr,
+	  int irq, int bus_width)
+{
+	struct ace_device *ace;
+	int rc;
+	dev_dbg(dev, "ace_alloc(%p)\n", dev);
+
+	if (!physaddr) {
+		rc = -ENODEV;
+		goto err_noreg;
+	}
+
+	/* Allocate and initialize the ace device structure */
+	ace = kzalloc(sizeof(struct ace_device), GFP_KERNEL);
+	if (!ace) {
+		rc = -ENOMEM;
+		goto err_alloc;
+	}
+
+	ace->dev = dev;
+	ace->id = id;
+	ace->physaddr = physaddr;
+	ace->irq = irq;
+	ace->bus_width = bus_width;
+
+	/* Call the setup code */
+	rc = ace_setup(ace);
+	if (rc)
+		goto err_setup;
+
+	dev_set_drvdata(dev, ace);
+	return 0;
+
+err_setup:
+	dev_set_drvdata(dev, NULL);
+	kfree(ace);
+err_alloc:
+err_noreg:
+	dev_err(dev, "could not initialize device, err=%i\n", rc);
+	return rc;
+}
+
+static void __devexit ace_free(struct device *dev)
+{
+	struct ace_device *ace = dev_get_drvdata(dev);
+	dev_dbg(dev, "ace_free(%p)\n", dev);
+
+	if (ace) {
+		ace_teardown(ace);
+		dev_set_drvdata(dev, NULL);
+		kfree(ace);
+	}
+}
+
+/* ---------------------------------------------------------------------
+ * Platform Bus Support
+ */
+
+static int __devinit ace_probe(struct platform_device *dev)
+{
+	resource_size_t physaddr = 0;
+	int bus_width = ACE_BUS_WIDTH_16; /* FIXME: should not be hard coded */
+	int id = dev->id;
+	int irq = NO_IRQ;
+	int i;
+
+	dev_dbg(&dev->dev, "ace_probe(%p)\n", dev);
+
+	for (i = 0; i < dev->num_resources; i++) {
+		if (dev->resource[i].flags & IORESOURCE_MEM)
+			physaddr = dev->resource[i].start;
+		if (dev->resource[i].flags & IORESOURCE_IRQ)
+			irq = dev->resource[i].start;
+	}
+
+	/* Call the bus-independent setup code */
+	return ace_alloc(&dev->dev, id, physaddr, irq, bus_width);
+}
+
+/*
+ * Platform bus remove() method
+ */
+static int __devexit ace_remove(struct platform_device *dev)
+{
+	ace_free(&dev->dev);
+	return 0;
+}
+
+static struct platform_driver ace_platform_driver = {
+	.probe = ace_probe,
+	.remove = __devexit_p(ace_remove),
+	.driver = {
+		.owner = THIS_MODULE,
+		.name = "xsysace",
+	},
+};
+
+/* ---------------------------------------------------------------------
+ * OF_Platform Bus Support
+ */
+
+#if defined(CONFIG_OF)
+static int __devinit ace_of_probe(struct platform_device *op)
+{
+	struct resource res;
+	resource_size_t physaddr;
+	const u32 *id;
+	int irq, bus_width, rc;
+
+	/* device id */
+	id = of_get_property(op->dev.of_node, "port-number", NULL);
+
+	/* physaddr */
+	rc = of_address_to_resource(op->dev.of_node, 0, &res);
+	if (rc) {
+		dev_err(&op->dev, "invalid address\n");
+		return rc;
+	}
+	physaddr = res.start;
+
+	/* irq */
+	irq = irq_of_parse_and_map(op->dev.of_node, 0);
+
+	/* bus width */
+	bus_width = ACE_BUS_WIDTH_16;
+	if (of_find_property(op->dev.of_node, "8-bit", NULL))
+		bus_width = ACE_BUS_WIDTH_8;
+
+	/* Call the bus-independent setup code */
+	return ace_alloc(&op->dev, id ? be32_to_cpup(id) : 0,
+						physaddr, irq, bus_width);
+}
+
+static int __devexit ace_of_remove(struct platform_device *op)
+{
+	ace_free(&op->dev);
+	return 0;
+}
+
+/* Match table for of_platform binding */
+static const struct of_device_id ace_of_match[] __devinitconst = {
+	{ .compatible = "xlnx,opb-sysace-1.00.b", },
+	{ .compatible = "xlnx,opb-sysace-1.00.c", },
+	{ .compatible = "xlnx,xps-sysace-1.00.a", },
+	{ .compatible = "xlnx,sysace", },
+	{},
+};
+MODULE_DEVICE_TABLE(of, ace_of_match);
+
+static struct platform_driver ace_of_driver = {
+	.probe = ace_of_probe,
+	.remove = __devexit_p(ace_of_remove),
+	.driver = {
+		.name = "xsysace",
+		.owner = THIS_MODULE,
+		.of_match_table = ace_of_match,
+	},
+};
+
+/* Registration helpers to keep the number of #ifdefs to a minimum */
+static inline int __init ace_of_register(void)
+{
+	pr_debug("xsysace: registering OF binding\n");
+	return platform_driver_register(&ace_of_driver);
+}
+
+static inline void __exit ace_of_unregister(void)
+{
+	platform_driver_unregister(&ace_of_driver);
+}
+#else /* CONFIG_OF */
+/* CONFIG_OF not enabled; do nothing helpers */
+static inline int __init ace_of_register(void) { return 0; }
+static inline void __exit ace_of_unregister(void) { }
+#endif /* CONFIG_OF */
+
+/* ---------------------------------------------------------------------
+ * Module init/exit routines
+ */
+static int __init ace_init(void)
+{
+	int rc;
+
+	ace_major = register_blkdev(ace_major, "xsysace");
+	if (ace_major <= 0) {
+		rc = -ENOMEM;
+		goto err_blk;
+	}
+
+	rc = ace_of_register();
+	if (rc)
+		goto err_of;
+
+	pr_debug("xsysace: registering platform binding\n");
+	rc = platform_driver_register(&ace_platform_driver);
+	if (rc)
+		goto err_plat;
+
+	pr_info("Xilinx SystemACE device driver, major=%i\n", ace_major);
+	return 0;
+
+err_plat:
+	ace_of_unregister();
+err_of:
+	unregister_blkdev(ace_major, "xsysace");
+err_blk:
+	printk(KERN_ERR "xsysace: registration failed; err=%i\n", rc);
+	return rc;
+}
+
+static void __exit ace_exit(void)
+{
+	pr_debug("Unregistering Xilinx SystemACE driver\n");
+	platform_driver_unregister(&ace_platform_driver);
+	ace_of_unregister();
+	unregister_blkdev(ace_major, "xsysace");
+}
+
+module_init(ace_init);
+module_exit(ace_exit);
diff --git a/drivers/block/z2ram.c b/drivers/block/z2ram.c
new file mode 100644
index 00000000..a22e3f89
--- /dev/null
+++ b/drivers/block/z2ram.c
@@ -0,0 +1,421 @@
+/*
+** z2ram - Amiga pseudo-driver to access 16bit-RAM in ZorroII space
+**         as a block device, to be used as a RAM disk or swap space
+** 
+** Copyright (C) 1994 by Ingo Wilken (Ingo.Wilken@informatik.uni-oldenburg.de)
+**
+** ++Geert: support for zorro_unused_z2ram, better range checking
+** ++roman: translate accesses via an array
+** ++Milan: support for ChipRAM usage
+** ++yambo: converted to 2.0 kernel
+** ++yambo: modularized and support added for 3 minor devices including:
+**          MAJOR  MINOR  DESCRIPTION
+**          -----  -----  ----------------------------------------------
+**          37     0       Use Zorro II and Chip ram
+**          37     1       Use only Zorro II ram
+**          37     2       Use only Chip ram
+**          37     4-7     Use memory list entry 1-4 (first is 0)
+** ++jskov: support for 1-4th memory list entry.
+**
+** Permission to use, copy, modify, and distribute this software and its
+** documentation for any purpose and without fee is hereby granted, provided
+** that the above copyright notice appear in all copies and that both that
+** copyright notice and this permission notice appear in supporting
+** documentation.  This software is provided "as is" without express or
+** implied warranty.
+*/
+
+#define DEVICE_NAME "Z2RAM"
+
+#include <linux/major.h>
+#include <linux/vmalloc.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/blkdev.h>
+#include <linux/bitops.h>
+#include <linux/mutex.h>
+#include <linux/slab.h>
+
+#include <asm/setup.h>
+#include <asm/amigahw.h>
+#include <asm/pgtable.h>
+
+#include <linux/zorro.h>
+
+
+extern int m68k_realnum_memory;
+extern struct mem_info m68k_memory[NUM_MEMINFO];
+
+#define Z2MINOR_COMBINED      (0)
+#define Z2MINOR_Z2ONLY        (1)
+#define Z2MINOR_CHIPONLY      (2)
+#define Z2MINOR_MEMLIST1      (4)
+#define Z2MINOR_MEMLIST2      (5)
+#define Z2MINOR_MEMLIST3      (6)
+#define Z2MINOR_MEMLIST4      (7)
+#define Z2MINOR_COUNT         (8) /* Move this down when adding a new minor */
+
+#define Z2RAM_CHUNK1024       ( Z2RAM_CHUNKSIZE >> 10 )
+
+static DEFINE_MUTEX(z2ram_mutex);
+static u_long *z2ram_map    = NULL;
+static u_long z2ram_size    = 0;
+static int z2_count         = 0;
+static int chip_count       = 0;
+static int list_count       = 0;
+static int current_device   = -1;
+
+static DEFINE_SPINLOCK(z2ram_lock);
+
+static struct gendisk *z2ram_gendisk;
+
+static void do_z2_request(struct request_queue *q)
+{
+	struct request *req;
+
+	req = blk_fetch_request(q);
+	while (req) {
+		unsigned long start = blk_rq_pos(req) << 9;
+		unsigned long len  = blk_rq_cur_bytes(req);
+		int err = 0;
+
+		if (start + len > z2ram_size) {
+			pr_err(DEVICE_NAME ": bad access: block=%llu, "
+			       "count=%u\n",
+			       (unsigned long long)blk_rq_pos(req),
+			       blk_rq_cur_sectors(req));
+			err = -EIO;
+			goto done;
+		}
+		while (len) {
+			unsigned long addr = start & Z2RAM_CHUNKMASK;
+			unsigned long size = Z2RAM_CHUNKSIZE - addr;
+			if (len < size)
+				size = len;
+			addr += z2ram_map[ start >> Z2RAM_CHUNKSHIFT ];
+			if (rq_data_dir(req) == READ)
+				memcpy(req->buffer, (char *)addr, size);
+			else
+				memcpy((char *)addr, req->buffer, size);
+			start += size;
+			len -= size;
+		}
+	done:
+		if (!__blk_end_request_cur(req, err))
+			req = blk_fetch_request(q);
+	}
+}
+
+static void
+get_z2ram( void )
+{
+    int i;
+
+    for ( i = 0; i < Z2RAM_SIZE / Z2RAM_CHUNKSIZE; i++ )
+    {
+	if ( test_bit( i, zorro_unused_z2ram ) )
+	{
+	    z2_count++;
+	    z2ram_map[ z2ram_size++ ] = 
+		ZTWO_VADDR( Z2RAM_START ) + ( i << Z2RAM_CHUNKSHIFT );
+	    clear_bit( i, zorro_unused_z2ram );
+	}
+    }
+
+    return;
+}
+
+static void
+get_chipram( void )
+{
+
+    while ( amiga_chip_avail() > ( Z2RAM_CHUNKSIZE * 4 ) )
+    {
+	chip_count++;
+	z2ram_map[ z2ram_size ] =
+	    (u_long)amiga_chip_alloc( Z2RAM_CHUNKSIZE, "z2ram" );
+
+	if ( z2ram_map[ z2ram_size ] == 0 )
+	{
+	    break;
+	}
+
+	z2ram_size++;
+    }
+	
+    return;
+}
+
+static int z2_open(struct block_device *bdev, fmode_t mode)
+{
+    int device;
+    int max_z2_map = ( Z2RAM_SIZE / Z2RAM_CHUNKSIZE ) *
+	sizeof( z2ram_map[0] );
+    int max_chip_map = ( amiga_chip_size / Z2RAM_CHUNKSIZE ) *
+	sizeof( z2ram_map[0] );
+    int rc = -ENOMEM;
+
+    device = MINOR(bdev->bd_dev);
+
+    mutex_lock(&z2ram_mutex);
+    if ( current_device != -1 && current_device != device )
+    {
+	rc = -EBUSY;
+	goto err_out;
+    }
+
+    if ( current_device == -1 )
+    {
+	z2_count   = 0;
+	chip_count = 0;
+	list_count = 0;
+	z2ram_size = 0;
+
+	/* Use a specific list entry. */
+	if (device >= Z2MINOR_MEMLIST1 && device <= Z2MINOR_MEMLIST4) {
+		int index = device - Z2MINOR_MEMLIST1 + 1;
+		unsigned long size, paddr, vaddr;
+
+		if (index >= m68k_realnum_memory) {
+			printk( KERN_ERR DEVICE_NAME
+				": no such entry in z2ram_map\n" );
+		        goto err_out;
+		}
+
+		paddr = m68k_memory[index].addr;
+		size = m68k_memory[index].size & ~(Z2RAM_CHUNKSIZE-1);
+
+#ifdef __powerpc__
+		/* FIXME: ioremap doesn't build correct memory tables. */
+		{
+			vfree(vmalloc (size));
+		}
+
+		vaddr = (unsigned long) __ioremap (paddr, size, 
+						   _PAGE_WRITETHRU);
+
+#else
+		vaddr = (unsigned long)z_remap_nocache_nonser(paddr, size);
+#endif
+		z2ram_map = 
+			kmalloc((size/Z2RAM_CHUNKSIZE)*sizeof(z2ram_map[0]),
+				GFP_KERNEL);
+		if ( z2ram_map == NULL )
+		{
+		    printk( KERN_ERR DEVICE_NAME
+			": cannot get mem for z2ram_map\n" );
+		    goto err_out;
+		}
+
+		while (size) {
+			z2ram_map[ z2ram_size++ ] = vaddr;
+			size -= Z2RAM_CHUNKSIZE;
+			vaddr += Z2RAM_CHUNKSIZE;
+			list_count++;
+		}
+
+		if ( z2ram_size != 0 )
+		    printk( KERN_INFO DEVICE_NAME
+			": using %iK List Entry %d Memory\n",
+			list_count * Z2RAM_CHUNK1024, index );
+	} else
+
+	switch ( device )
+	{
+	    case Z2MINOR_COMBINED:
+
+		z2ram_map = kmalloc( max_z2_map + max_chip_map, GFP_KERNEL );
+		if ( z2ram_map == NULL )
+		{
+		    printk( KERN_ERR DEVICE_NAME
+			": cannot get mem for z2ram_map\n" );
+		    goto err_out;
+		}
+
+		get_z2ram();
+		get_chipram();
+
+		if ( z2ram_size != 0 )
+		    printk( KERN_INFO DEVICE_NAME 
+			": using %iK Zorro II RAM and %iK Chip RAM (Total %dK)\n",
+			z2_count * Z2RAM_CHUNK1024,
+			chip_count * Z2RAM_CHUNK1024,
+			( z2_count + chip_count ) * Z2RAM_CHUNK1024 );
+
+	    break;
+
+    	    case Z2MINOR_Z2ONLY:
+		z2ram_map = kmalloc( max_z2_map, GFP_KERNEL );
+		if ( z2ram_map == NULL )
+		{
+		    printk( KERN_ERR DEVICE_NAME
+			": cannot get mem for z2ram_map\n" );
+		    goto err_out;
+		}
+
+		get_z2ram();
+
+		if ( z2ram_size != 0 )
+		    printk( KERN_INFO DEVICE_NAME 
+			": using %iK of Zorro II RAM\n",
+			z2_count * Z2RAM_CHUNK1024 );
+
+	    break;
+
+	    case Z2MINOR_CHIPONLY:
+		z2ram_map = kmalloc( max_chip_map, GFP_KERNEL );
+		if ( z2ram_map == NULL )
+		{
+		    printk( KERN_ERR DEVICE_NAME
+			": cannot get mem for z2ram_map\n" );
+		    goto err_out;
+		}
+
+		get_chipram();
+
+		if ( z2ram_size != 0 )
+		    printk( KERN_INFO DEVICE_NAME 
+			": using %iK Chip RAM\n",
+			chip_count * Z2RAM_CHUNK1024 );
+		    
+	    break;
+
+	    default:
+		rc = -ENODEV;
+		goto err_out;
+	
+	    break;
+	}
+
+	if ( z2ram_size == 0 )
+	{
+	    printk( KERN_NOTICE DEVICE_NAME
+		": no unused ZII/Chip RAM found\n" );
+	    goto err_out_kfree;
+	}
+
+	current_device = device;
+	z2ram_size <<= Z2RAM_CHUNKSHIFT;
+	set_capacity(z2ram_gendisk, z2ram_size >> 9);
+    }
+
+    mutex_unlock(&z2ram_mutex);
+    return 0;
+
+err_out_kfree:
+    kfree(z2ram_map);
+err_out:
+    mutex_unlock(&z2ram_mutex);
+    return rc;
+}
+
+static int
+z2_release(struct gendisk *disk, fmode_t mode)
+{
+    mutex_lock(&z2ram_mutex);
+    if ( current_device == -1 ) {
+    	mutex_unlock(&z2ram_mutex);
+    	return 0;
+    }
+    mutex_unlock(&z2ram_mutex);
+    /*
+     * FIXME: unmap memory
+     */
+
+    return 0;
+}
+
+static const struct block_device_operations z2_fops =
+{
+	.owner		= THIS_MODULE,
+	.open		= z2_open,
+	.release	= z2_release,
+};
+
+static struct kobject *z2_find(dev_t dev, int *part, void *data)
+{
+	*part = 0;
+	return get_disk(z2ram_gendisk);
+}
+
+static struct request_queue *z2_queue;
+
+static int __init 
+z2_init(void)
+{
+    int ret;
+
+    if (!MACH_IS_AMIGA)
+	return -ENODEV;
+
+    ret = -EBUSY;
+    if (register_blkdev(Z2RAM_MAJOR, DEVICE_NAME))
+	goto err;
+
+    ret = -ENOMEM;
+    z2ram_gendisk = alloc_disk(1);
+    if (!z2ram_gendisk)
+	goto out_disk;
+
+    z2_queue = blk_init_queue(do_z2_request, &z2ram_lock);
+    if (!z2_queue)
+	goto out_queue;
+
+    z2ram_gendisk->major = Z2RAM_MAJOR;
+    z2ram_gendisk->first_minor = 0;
+    z2ram_gendisk->fops = &z2_fops;
+    sprintf(z2ram_gendisk->disk_name, "z2ram");
+
+    z2ram_gendisk->queue = z2_queue;
+    add_disk(z2ram_gendisk);
+    blk_register_region(MKDEV(Z2RAM_MAJOR, 0), Z2MINOR_COUNT, THIS_MODULE,
+				z2_find, NULL, NULL);
+
+    return 0;
+
+out_queue:
+    put_disk(z2ram_gendisk);
+out_disk:
+    unregister_blkdev(Z2RAM_MAJOR, DEVICE_NAME);
+err:
+    return ret;
+}
+
+static void __exit z2_exit(void)
+{
+    int i, j;
+    blk_unregister_region(MKDEV(Z2RAM_MAJOR, 0), Z2MINOR_COUNT);
+    unregister_blkdev(Z2RAM_MAJOR, DEVICE_NAME);
+    del_gendisk(z2ram_gendisk);
+    put_disk(z2ram_gendisk);
+    blk_cleanup_queue(z2_queue);
+
+    if ( current_device != -1 )
+    {
+	i = 0;
+
+	for ( j = 0 ; j < z2_count; j++ )
+	{
+	    set_bit( i++, zorro_unused_z2ram ); 
+	}
+
+	for ( j = 0 ; j < chip_count; j++ )
+	{
+	    if ( z2ram_map[ i ] )
+	    {
+		amiga_chip_free( (void *) z2ram_map[ i++ ] );
+	    }
+	}
+
+	if ( z2ram_map != NULL )
+	{
+	    kfree( z2ram_map );
+	}
+    }
+
+    return;
+} 
+
+module_init(z2_init);
+module_exit(z2_exit);
+MODULE_LICENSE("GPL");
author	root <root@artemis.panaceas.org>	2015-12-25 04:40:36 +0000
committer	root <root@artemis.panaceas.org>	2015-12-25 04:40:36 +0000
commit	849369d6c66d3054688672f97d31fceb8e8230fb (patch)
tree	6135abc790ca67dedbe07c39806591e70eda81ce /drivers/block
download	linux-3.0.35-kobo-849369d6c66d3054688672f97d31fceb8e8230fb.tar.gz linux-3.0.35-kobo-849369d6c66d3054688672f97d31fceb8e8230fb.tar.bz2 linux-3.0.35-kobo-849369d6c66d3054688672f97d31fceb8e8230fb.zip