aboutsummaryrefslogtreecommitdiffstats
path: root/tools/python
diff options
context:
space:
mode:
authorShriram Rajagopalan <rshriram@cs.ubc.ca>2011-05-26 15:03:39 +0100
committerShriram Rajagopalan <rshriram@cs.ubc.ca>2011-05-26 15:03:39 +0100
commitb3f1e4ac567c2a03979a10991dd301d6c80b96e9 (patch)
tree2ba8ec2a407eb4d80c3e3a997219e0368ad7b8f5 /tools/python
parenteadf449efd7518d8f5789c90aaa93c487b88fb98 (diff)
downloadxen-b3f1e4ac567c2a03979a10991dd301d6c80b96e9.tar.gz
xen-b3f1e4ac567c2a03979a10991dd301d6c80b96e9.tar.bz2
xen-b3f1e4ac567c2a03979a10991dd301d6c80b96e9.zip
tools: remus: support DRBD disk backends
DRBD disk backends can be used instead of tapdisk backends for Remus. This requires a Remus style disk replication protocol (asynchronous replication with output buffering at backup), that is not available in standard DRBD code. A modified version that supports this new replication protocol is available from git://aramis.nss.cs.ubc.ca/drbd-8.3-remus Use of DRBD disk backends provides a means for efficient resynchronization of data after the crashed machine comes back online. Since DRBD allows for online resynchronization, a DRBD backed Remus VM does not have to be stopped or shutdown while the disks are resynchronizing. Once resynchronization is complete, Remus can be started at will. Signed-off-by: Shriram Rajagopalan <rshriram@cs.ubc.ca> Committed-by: Ian Jackson <ian.jackson@eu.citrix.com>
Diffstat (limited to 'tools/python')
-rw-r--r--tools/python/xen/remus/device.py67
1 files changed, 54 insertions, 13 deletions
diff --git a/tools/python/xen/remus/device.py b/tools/python/xen/remus/device.py
index 0f923222d4..9e4dafa9df 100644
--- a/tools/python/xen/remus/device.py
+++ b/tools/python/xen/remus/device.py
@@ -2,7 +2,7 @@
#
# Coordinates with devices at suspend, resume, and commit hooks
-import os, re
+import os, re, fcntl
import netlink, qdisc, util
@@ -30,22 +30,51 @@ class ReplicatedDisk(CheckpointedDevice):
is paused between epochs.
"""
FIFODIR = '/var/run/tap'
+ SEND_CHECKPOINT = 20
+ WAIT_CHECKPOINT_ACK = 30
def __init__(self, disk):
# look up disk, make sure it is tap:buffer, and set up socket
# to request commits.
self.ctlfd = None
+ self.msgfd = None
+ self.is_drbd = False
+ self.ackwait = False
- if not disk.uname.startswith('tap:remus:') and not disk.uname.startswith('tap:tapdisk:remus:'):
+ if disk.uname.startswith('tap:remus:') or disk.uname.startswith('tap:tapdisk:remus:'):
+ fifo = re.match("tap:.*(remus.*)\|", disk.uname).group(1).replace(':', '_')
+ absfifo = os.path.join(self.FIFODIR, fifo)
+ absmsgfifo = absfifo + '.msg'
+
+ self.installed = False
+ self.ctlfd = open(absfifo, 'w+b')
+ self.msgfd = open(absmsgfifo, 'r+b')
+ elif disk.uname.startswith('drbd:'):
+ #get the drbd device associated with this resource
+ drbdres = re.match("drbd:(.*)", disk.uname).group(1)
+ drbddev = util.runcmd("drbdadm sh-dev %s" % drbdres).rstrip()
+
+ #check for remus supported drbd installation
+ rconf = util.runcmd("drbdsetup %s show" % drbddev)
+ if rconf.find('protocol D;') == -1:
+ raise ReplicatedDiskException('Remus support for DRBD disks requires the '
+ 'resources to operate in protocol D. Please make '
+ 'sure that you have installed the remus supported DRBD '
+ 'version from git://aramis.nss.cs.ubc.ca/drbd-8.3-remus '
+ 'and enabled protocol D in the resource config')
+
+ #check if resource is in connected state
+ cstate = util.runcmd("drbdadm cstate %s" % drbdres).rstrip()
+ if cstate != 'Connected':
+ raise ReplicatedDiskException('DRBD resource %s is not in connected state!'
+ % drbdres)
+
+ #open a handle to the resource so that we could issue chkpt ioctls
+ self.ctlfd = open(drbddev, 'r')
+ self.is_drbd = True
+ else:
raise ReplicatedDiskException('Disk is not replicated: %s' %
str(disk))
- fifo = re.match("tap:.*(remus.*)\|", disk.uname).group(1).replace(':', '_')
- absfifo = os.path.join(self.FIFODIR, fifo)
- absmsgfifo = absfifo + '.msg'
-
- self.installed = False
- self.ctlfd = open(absfifo, 'w+b')
- self.msgfd = open(absmsgfifo, 'r+b')
def __del__(self):
self.uninstall()
@@ -56,12 +85,24 @@ class ReplicatedDisk(CheckpointedDevice):
self.ctlfd = None
def postsuspend(self):
- os.write(self.ctlfd.fileno(), 'flush')
+ if not self.is_drbd:
+ os.write(self.ctlfd.fileno(), 'flush')
+ elif not self.ackwait:
+ if (fcntl.ioctl(self.ctlfd.fileno(), self.SEND_CHECKPOINT, 0) > 0):
+ self.ackwait = False
+ else:
+ self.ackwait = True
+
+ def preresume(self):
+ if self.is_drbd and self.ackwait:
+ fcntl.ioctl(self.ctlfd.fileno(), self.WAIT_CHECKPOINT_ACK, 0)
+ self.ackwait = False
def commit(self):
- msg = os.read(self.msgfd.fileno(), 4)
- if msg != 'done':
- print 'Unknown message: %s' % msg
+ if not self.is_drbd:
+ msg = os.read(self.msgfd.fileno(), 4)
+ if msg != 'done':
+ print 'Unknown message: %s' % msg
### Network