aboutsummaryrefslogtreecommitdiffstats
path: root/techlibs/coolrunner2/synth_coolrunner2.cc
blob: a746ac222d0868df09c3bc677303a295aa33575f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
/*
 *  yosys -- Yosys Open SYnthesis Suite
 *
 *  Copyright (C) 2017 Robert Ou <rqou@robertou.com>
 *
 *  Permission to use, copy, modify, and/or distribute this software for any
 *  purpose with or without fee is hereby granted, provided that the above
 *  copyright notice and this permission notice appear in all copies.
 *
 *  THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
 *  WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 *  MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
 *  ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 *  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 *  ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 *  OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 *
 */

#include "kernel/register.h"
#include "kernel/celltypes.h"
#include "kernel/rtlil.h"
#include "kernel/log.h"

USING_YOSYS_NAMESPACE
PRIVATE_NAMESPACE_BEGIN

struct SynthCoolrunner2Pass : public ScriptPass
{
	SynthCoolrunner2Pass() : ScriptPass("synth_coolrunner2", "synthesis for Xilinx Coolrunner-II CPLDs") { }

	void help() override
	{
		//   |---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---|
		log("\n");
		log("    synth_coolrunner2 [options]\n");
		log("\n");
		log("This command runs synthesis for Coolrunner-II CPLDs. This work is experimental.\n");
		log("It is intended to be used with https://github.com/azonenberg/openfpga as the\n");
		log("place-and-route.\n");
		log("\n");
		log("    -top <module>\n");
		log("        use the specified module as top module (default='top')\n");
		log("\n");
		log("    -json <file>\n");
		log("        write the design to the specified JSON file. writing of an output file\n");
		log("        is omitted if this parameter is not specified.\n");
		log("\n");
		log("    -run <from_label>:<to_label>\n");
		log("        only run the commands between the labels (see below). an empty\n");
		log("        from label is synonymous to 'begin', and empty to label is\n");
		log("        synonymous to the end of the command list.\n");
		log("\n");
		log("    -noflatten\n");
		log("        do not flatten design before synthesis\n");
		log("\n");
		log("    -retime\n");
		log("        run 'abc' with '-dff -D 1' options\n");
		log("\n");
		log("\n");
		log("The following commands are executed by this synthesis command:\n");
		help_script();
		log("\n");
	}

	string top_opt, json_file;
	bool flatten, retime;

	void clear_flags() override
	{
		top_opt = "-auto-top";
		json_file = "";
		flatten = true;
		retime = false;
	}

	void execute(std::vector<std::string> args, RTLIL::Design *design) override
	{
		string run_from, run_to;
		clear_flags();

		size_t argidx;
		for (argidx = 1; argidx < args.size(); argidx++)
		{
			if (args[argidx] == "-top" && argidx+1 < args.size()) {
				top_opt = "-top " + args[++argidx];
				continue;
			}
			if (args[argidx] == "-json" && argidx+1 < args.size()) {
				json_file = args[++argidx];
				continue;
			}
			if (args[argidx] == "-run" && argidx+1 < args.size()) {
				size_t pos = args[argidx+1].find(':');
				if (pos == std::string::npos)
					break;
				run_from = args[++argidx].substr(0, pos);
				run_to = args[argidx].substr(pos+1);
				continue;
			}
			if (args[argidx] == "-noflatten") {
				flatten = false;
				continue;
			}
			if (args[argidx] == "-retime") {
				retime = true;
				continue;
			}
			break;
		}
		extra_args(args, argidx, design);

		if (!design->full_selection())
			log_cmd_error("This command only operates on fully selected designs!\n");

		log_header(design, "Executing SYNTH_COOLRUNNER2 pass.\n");
		log_push();

		run_script(design, run_from, run_to);

		log_pop();
	}

	void script() override
	{
		if (check_label("begin"))
		{
			run("read_verilog -lib +/coolrunner2/cells_sim.v");
			run(stringf("hierarchy -check %s", help_mode ? "-top <top>" : top_opt.c_str()));
		}

		if (flatten && check_label("flatten", "(unless -noflatten)"))
		{
			run("proc");
			run("flatten");
			run("tribuf -logic");
		}

		if (check_label("coarse"))
		{
			run("synth -run coarse");
		}

		if (check_label("fine"))
		{
			run("extract_counter -dir up -allow_arst no");
			run("techmap -map +/coolrunner2/cells_counter_map.v");
			run("clean");
			run("opt -fast -full");
			run("techmap -map +/techmap.v -map +/coolrunner2/cells_latch.v");
			run("opt -fast");
			run("dfflibmap -prepare -liberty +/coolrunner2/xc2_dff.lib");
		}

		if (check_label("map_tff"))
		{
			// This is quite hacky. By telling abc that it can only use AND and XOR gates, abc will try and use XOR
			// gates "whenever possible." This will hopefully cause toggle flip-flop structures to turn into an XOR
			// connected to a D flip-flop. We then match on these and convert them into XC2 TFF cells.
			run("abc -g AND,XOR");
			run("clean");
			run("extract -map +/coolrunner2/tff_extract.v");
		}

		if (check_label("map_pla"))
		{
			run("abc -sop -I 40 -P 56" + string(retime ? " -dff -D 1" : ""));
			run("clean");
		}

		if (check_label("map_cells"))
		{
			run("dfflibmap -liberty +/coolrunner2/xc2_dff.lib");
			run("dffinit -ff FDCP Q INIT");
			run("dffinit -ff FDCP_N Q INIT");
			run("dffinit -ff FTCP Q INIT");
			run("dffinit -ff FTCP_N Q INIT");
			run("dffinit -ff LDCP Q INIT");
			run("dffinit -ff LDCP_N Q INIT");
			run("coolrunner2_sop");
			run("clean");
			run("iopadmap -bits -inpad IBUF O:I -outpad IOBUFE I:IO -inoutpad IOBUFE O:IO -toutpad IOBUFE E:I:IO -tinoutpad IOBUFE E:O:I:IO");
			run("attrmvcp -attr src -attr LOC t:IOBUFE n:*");
			run("attrmvcp -attr src -attr LOC -driven t:IBUF n:*");
			run("coolrunner2_fixup");
			run("splitnets");
			run("clean");
		}

		if (check_label("check"))
		{
			run("hierarchy -check");
			run("stat");
			run("check -noinit");
			run("blackbox =A:whitebox");
		}

		if (check_label("json"))
		{
			if (!json_file.empty() || help_mode)
				run(stringf("write_json %s", help_mode ? "<file-name>" : json_file.c_str()));
		}
	}
} SynthCoolrunner2Pass;

PRIVATE_NAMESPACE_END
d='n1097' href='#n1097'>1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491
/* block-qcow.c
 *
 * Asynchronous Qemu copy-on-write disk implementation.
 * Code based on the Qemu implementation
 * (see copyright notice below)
 *
 * (c) 2006 Andrew Warfield and Julian Chesterfield
 *
 */

/*
 * Block driver for the QCOW format
 * 
 * Copyright (c) 2004 Fabrice Bellard
 * 
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files(the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 */

#include <errno.h>
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <sys/statvfs.h>
#include <sys/stat.h>
#include <sys/ioctl.h>
#include <linux/fs.h>
#include <string.h>
#include <zlib.h>
#include <inttypes.h>
#include <libaio.h>
#include <openssl/md5.h>
#include "bswap.h"
#include "aes.h"
#include "tapdisk.h"

#if 1
#define ASSERT(_p) \
    if ( !(_p) ) { DPRINTF("Assertion '%s' failed, line %d, file %s", #_p , \
    __LINE__, __FILE__); *(int*)0=0; }
#else
#define ASSERT(_p) ((void)0)
#endif

#define ROUNDUP(l, s) \
({ \
    (uint64_t)( \
        (l + (s - 1)) - ((l + (s - 1)) % s)); \
})

/******AIO DEFINES******/
#define REQUEST_ASYNC_FD 1
#define MAX_QCOW_IDS  0xFFFF
#define MAX_AIO_REQS (MAX_REQUESTS * MAX_SEGMENTS_PER_REQ)

struct pending_aio {
        td_callback_t cb;
        int id;
        void *private;
	int nb_sectors;
	char *buf;
	uint64_t sector;
	int qcow_idx;
};

#define IOCB_IDX(_s, _io) ((_io) - (_s)->iocb_list)

#define ZERO_TEST(_b) (_b | 0x00)

/**************************************************************/
/* QEMU COW block driver with compression and encryption support */

#define QCOW_MAGIC (('Q' << 24) | ('F' << 16) | ('I' << 8) | 0xfb)
#define XEN_MAGIC  (('X' << 24) | ('E' << 16) | ('N' << 8) | 0xfb)
#define QCOW_VERSION 1

#define QCOW_CRYPT_NONE 0x00
#define QCOW_CRYPT_AES  0x01

#define QCOW_OFLAG_COMPRESSED (1LL << 63)
#define SPARSE_FILE 0x01

#ifndef O_BINARY
#define O_BINARY 0
#endif

typedef struct QCowHeader {
	uint32_t magic;
	uint32_t version;
	uint64_t backing_file_offset;
	uint32_t backing_file_size;
	uint32_t mtime;
	uint64_t size; /* in bytes */
	uint8_t cluster_bits;
	uint8_t l2_bits;
	uint32_t crypt_method;
	uint64_t l1_table_offset;
} QCowHeader;

/*Extended header for Xen enhancements*/
typedef struct QCowHeader_ext {
        uint32_t xmagic;
        uint32_t cksum;
        uint32_t min_cluster_alloc;
        uint32_t flags;
} QCowHeader_ext;

#define L2_CACHE_SIZE 16  /*Fixed allocation in Qemu*/

struct tdqcow_state {
        int fd;                        /*Main Qcow file descriptor */
	uint64_t fd_end;               /*Store a local record of file length */
	int bfd;                       /*Backing file descriptor*/
	char *name;                    /*Record of the filename*/
	int poll_pipe[2];              /*dummy fd for polling on */
	int encrypted;                 /*File contents are encrypted or plain*/
	int cluster_bits;              /*Determines length of cluster as 
					*indicated by file hdr*/
	int cluster_size;              /*Length of cluster*/
	int cluster_sectors;           /*Number of sectors per cluster*/
	int cluster_alloc;             /*Blktap fix for allocating full 
					*extents*/
	int min_cluster_alloc;         /*Blktap historical extent alloc*/
	int sparse;                    /*Indicates whether to preserve sparseness*/
	int l2_bits;                   /*Size of L2 table entry*/
	int l2_size;                   /*Full table size*/
	int l1_size;                   /*L1 table size*/
	uint64_t cluster_offset_mask;    
	uint64_t l1_table_offset;      /*L1 table offset from beginning of 
					*file*/
	uint64_t *l1_table;            /*L1 table entries*/
	uint64_t *l2_cache;            /*We maintain a cache of size 
					*L2_CACHE_SIZE of most read entries*/
	uint64_t l2_cache_offsets[L2_CACHE_SIZE];     /*L2 cache entries*/
	uint32_t l2_cache_counts[L2_CACHE_SIZE];      /*Cache access record*/
	uint8_t *cluster_cache;          
	uint8_t *cluster_data;
	uint8_t *sector_lock;          /*Locking bitmap for AIO reads/writes*/
	uint64_t cluster_cache_offset; /**/
	uint32_t crypt_method;         /*current crypt method, 0 if no 
					*key yet */
	uint32_t crypt_method_header;  /**/
	AES_KEY aes_encrypt_key;       /*AES key*/
	AES_KEY aes_decrypt_key;       /*AES key*/
        /* libaio state */
        io_context_t       aio_ctx;
	int		   nr_reqs [MAX_QCOW_IDS];
        struct iocb        iocb_list  [MAX_AIO_REQS];
        struct iocb       *iocb_free  [MAX_AIO_REQS];
        struct pending_aio pending_aio[MAX_AIO_REQS];
        int                iocb_free_count;
        struct iocb       *iocb_queue[MAX_AIO_REQS];
        int                iocb_queued;
        int                poll_fd;      /* NB: we require aio_poll support */
        struct io_event    aio_events[MAX_AIO_REQS];
};

static int decompress_cluster(struct tdqcow_state *s, uint64_t cluster_offset);

static int init_aio_state(struct td_state *bs)
{
        int i;
	struct tdqcow_state *s = (struct tdqcow_state *)bs->private;
        long     ioidx;

        /*Initialize Locking bitmap*/
	s->sector_lock = calloc(1, bs->size);
	
	if (!s->sector_lock) {
		DPRINTF("Failed to allocate sector lock\n");
		goto fail;
	}

        /* Initialize AIO */
        s->iocb_free_count = MAX_AIO_REQS;
        s->iocb_queued     = 0;

        /*Signal kernel to create Poll FD for Asyc completion events*/
        s->aio_ctx = (io_context_t) REQUEST_ASYNC_FD;   
        s->poll_fd = io_setup(MAX_AIO_REQS, &s->aio_ctx);

	if (s->poll_fd < 0) {
                if (s->poll_fd == -EAGAIN) {
                        DPRINTF("Couldn't setup AIO context.  If you are "
                                "trying to concurrently use a large number "
                                "of blktap-based disks, you may need to "
                                "increase the system-wide aio request limit. "
                                "(e.g. 'echo echo 1048576 > /proc/sys/fs/"
                                "aio-max-nr')\n");
                } else {
                        DPRINTF("Couldn't get fd for AIO poll support.  This "
                                "is probably because your kernel does not "
                                "have the aio-poll patch applied.\n");
                }
		goto fail;
	}

        for (i=0;i<MAX_AIO_REQS;i++)
                s->iocb_free[i] = &s->iocb_list[i];
	for (i=0;i<MAX_QCOW_IDS;i++)
		s->nr_reqs[i] = 0;
        DPRINTF("AIO state initialised\n");

        return 0;

 fail:
	return -1;
}

/*
 *Test if block is zero. 
 * Return: 
 *       1 for TRUE
 *       0 for FALSE
 */
static inline int IS_ZERO(char *buf, int len)
{
	int i;

	for (i = 0; i < len; i++) {
		/*if not zero, return false*/
		if (ZERO_TEST(*(buf + i))) return 0; 
	}
	return 1;
}

static uint32_t gen_cksum(char *ptr, int len)
{
	unsigned char *md;
	uint32_t ret;

	md = malloc(MD5_DIGEST_LENGTH);

	if(!md) return 0;

	if (MD5((unsigned char *)ptr, len, md) != md) return 0;

	memcpy(&ret, md, sizeof(uint32_t));
	free(md);
	return ret;
}

static int get_filesize(char *filename, uint64_t *size, struct stat *st)
{
	int blockfd;

	/*Set to the backing file size*/
	if(S_ISBLK(st->st_mode)) {
		blockfd = open(filename, O_RDONLY);
		if (blockfd < 0)
			return -1;
		if (ioctl(blockfd,BLKGETSIZE,size)!=0) {
			printf("Unable to get Block device size\n");
			close(blockfd);
			return -1;
		}
		close(blockfd);
	} else *size = (st->st_size >> SECTOR_SHIFT);	
	return 0;
}

static int qcow_set_key(struct td_state *bs, const char *key)
{
	struct tdqcow_state *s = (struct tdqcow_state *)bs->private;
	uint8_t keybuf[16];
	int len, i;
	
	memset(keybuf, 0, 16);
	len = strlen(key);
	if (len > 16)
		len = 16;
	/* XXX: we could compress the chars to 7 bits to increase
	   entropy */
	for (i = 0; i < len; i++) {
		keybuf[i] = key[i];
	}
	s->crypt_method = s->crypt_method_header;
	
	if (AES_set_encrypt_key(keybuf, 128, &s->aes_encrypt_key) != 0)
		return -1;
	if (AES_set_decrypt_key(keybuf, 128, &s->aes_decrypt_key) != 0)
		return -1;
#if 0
	/* test */
	{
		uint8_t in[16];
		uint8_t out[16];
		uint8_t tmp[16];
		for (i=0; i<16; i++)
			in[i] = i;
		AES_encrypt(in, tmp, &s->aes_encrypt_key);
		AES_decrypt(tmp, out, &s->aes_decrypt_key);
		for (i = 0; i < 16; i++)
			DPRINTF(" %02x", tmp[i]);
		DPRINTF("\n");
		for (i = 0; i < 16; i++)
			DPRINTF(" %02x", out[i]);
		DPRINTF("\n");
	}
#endif
	return 0;
}

static int async_read(struct tdqcow_state *s, int fd, int size, 
		     uint64_t offset,
		     char *buf, td_callback_t cb,
		     int id, uint64_t sector, int qcow_idx, void *private)
{
        struct   iocb *io;
        struct   pending_aio *pio;
	long     ioidx;

        io = s->iocb_free[--s->iocb_free_count];

        ioidx = IOCB_IDX(s, io);
        pio = &s->pending_aio[ioidx];
        pio->cb = cb;
        pio->id = id;
        pio->private = private;
	pio->nb_sectors = size/512;
	pio->buf = buf;
	pio->sector = sector;
	pio->qcow_idx = qcow_idx;

        io_prep_pread(io, fd, buf, size, offset);
        io->data = (void *)ioidx;

        s->iocb_queue[s->iocb_queued++] = io;

        return 1;
}

static int async_write(struct tdqcow_state *s, int fd, int size, 
		     uint64_t offset,
		     char *buf, td_callback_t cb,
		      int id, uint64_t sector, int qcow_idx, void *private)
{
        struct   iocb *io;
        struct   pending_aio *pio;
	long     ioidx;

        io = s->iocb_free[--s->iocb_free_count];

        ioidx = IOCB_IDX(s, io);
        pio = &s->pending_aio[ioidx];
        pio->cb = cb;
        pio->id = id;
        pio->private = private;
	pio->nb_sectors = size/512;
	pio->buf = buf;
	pio->sector = sector;
	pio->qcow_idx = qcow_idx;

        io_prep_pwrite(io, fd, buf, size, offset);
        io->data = (void *)ioidx;

        s->iocb_queue[s->iocb_queued++] = io;

        return 1;
}

/*TODO: Fix sector span!*/
static int aio_can_lock(struct tdqcow_state *s, uint64_t sector)
{
	return (s->sector_lock[sector] ? 0 : 1);
}

static int aio_lock(struct tdqcow_state *s, uint64_t sector)
{
	return ++s->sector_lock[sector];
}

static void aio_unlock(struct tdqcow_state *s, uint64_t sector)
{
	if (!s->sector_lock[sector]) return;

	--s->sector_lock[sector];
	return;
}

/*TODO - Use a freelist*/
static int get_free_idx(struct tdqcow_state *s)
{
	int i;
	
	for(i = 0; i < MAX_QCOW_IDS; i++) {
		if(s->nr_reqs[i] == 0) return i;
	}
	return -1;
}

/* 
 * The crypt function is compatible with the linux cryptoloop
 * algorithm for < 4 GB images. NOTE: out_buf == in_buf is
 * supported .
 */
static void encrypt_sectors(struct tdqcow_state *s, int64_t sector_num,
                            uint8_t *out_buf, const uint8_t *in_buf,
                            int nb_sectors, int enc,
                            const AES_KEY *key)
{
	union {
		uint64_t ll[2];
		uint8_t b[16];
	} ivec;
	int i;
	
	for (i = 0; i < nb_sectors; i++) {
		ivec.ll[0] = cpu_to_le64(sector_num);
		ivec.ll[1] = 0;
		AES_cbc_encrypt(in_buf, out_buf, 512, key, 
				ivec.b, enc);
		sector_num++;
		in_buf += 512;
		out_buf += 512;
	}
}

static int qtruncate(int fd, off_t length, int sparse)
{
	int ret, i; 
	int current = 0, rem = 0;
	int sectors = (length + DEFAULT_SECTOR_SIZE - 1)/DEFAULT_SECTOR_SIZE;
	struct stat st;
	char buf[DEFAULT_SECTOR_SIZE];

	/* If length is greater than the current file len
	 * we synchronously write zeroes to the end of the 
	 * file, otherwise we truncate the length down
	 */
	memset(buf, 0x00, DEFAULT_SECTOR_SIZE);
	ret = fstat(fd, &st);
	if (ret == -1)
		return -1;
	if (S_ISBLK(st.st_mode))
		return 0;

	current = (st.st_size + DEFAULT_SECTOR_SIZE - 1)/DEFAULT_SECTOR_SIZE;
	rem = st.st_size % DEFAULT_SECTOR_SIZE;

	/* If we are extending this file, we write zeros to the end --
	 * this tries to ensure that the extents allocated wind up being
	 * contiguous on disk.
	 */
	if(st.st_size < sectors * DEFAULT_SECTOR_SIZE) {
		/*We are extending the file*/
		if (lseek(fd, 0, SEEK_END)==-1) {
			fprintf(stderr, 
				"Lseek EOF failed (%d), internal error\n",
				errno);
			return -1;
		}
		if (rem) {
			ret = write(fd, buf, rem);
			if (ret != rem)
				return -1;
		}
		for (i = current; i < sectors; i++ ) {
			ret = write(fd, buf, DEFAULT_SECTOR_SIZE);
			if (ret != DEFAULT_SECTOR_SIZE)
				return -1;
		}
		
	} else if(sparse && (st.st_size > sectors * DEFAULT_SECTOR_SIZE))
		if (ftruncate(fd, sectors * DEFAULT_SECTOR_SIZE)==-1) {
			fprintf(stderr,
				"Ftruncate failed (%d), internal error\n",
                                errno);
			return -1;
		}
	return 0;
}


/* 'allocate' is:
 *
 * 0 to not allocate.
 *
 * 1 to allocate a normal cluster (for sector indexes 'n_start' to
 * 'n_end')
 *
 * 2 to allocate a compressed cluster of size
 * 'compressed_size'. 'compressed_size' must be > 0 and <
 * cluster_size 
 *
 * return 0 if not allocated.
 */
static uint64_t get_cluster_offset(struct td_state *bs,
                                   uint64_t offset, int allocate,
                                   int compressed_size,
                                   int n_start, int n_end)
{
	struct tdqcow_state *s = (struct tdqcow_state *)bs->private;
	int min_index, i, j, l1_index, l2_index, l2_sector, l1_sector;
	char *tmp_ptr, *tmp_ptr2, *l2_ptr, *l1_ptr;
	uint64_t l2_offset, *l2_table, cluster_offset, tmp;
	uint32_t min_count;
	int new_l2_table;

	/*Check L1 table for the extent offset*/
	l1_index = offset >> (s->l2_bits + s->cluster_bits);
	l2_offset = s->l1_table[l1_index];
	new_l2_table = 0;
	if (!l2_offset) {
		if (!allocate)
			return 0;
		/* 
		 * allocating a new l2 entry + extent 
		 * at the end of the file, we must also
		 * update the L1 entry safely.
		 */
		l2_offset = s->fd_end;

		/* round to cluster size */
		l2_offset = (l2_offset + s->cluster_size - 1) 
			& ~(s->cluster_size - 1);

		/* update the L1 entry */
		s->l1_table[l1_index] = l2_offset;
		tmp = cpu_to_be64(l2_offset);
		
		/*Truncate file for L2 table 
		 *(initialised to zero in case we crash)*/
		if (qtruncate(s->fd, 
			      l2_offset + (s->l2_size * sizeof(uint64_t)),
			      s->sparse) != 0) {
			DPRINTF("ERROR truncating file\n");
			return 0;
		}
		s->fd_end = l2_offset + (s->l2_size * sizeof(uint64_t));

		/*Update the L1 table entry on disk
                 * (for O_DIRECT we write 4KByte blocks)*/
		l1_sector = (l1_index * sizeof(uint64_t)) >> 12;
		l1_ptr = (char *)s->l1_table + (l1_sector << 12);

		if (posix_memalign((void **)&tmp_ptr, 4096, 4096) != 0) {
			DPRINTF("ERROR allocating memory for L1 table\n");
		}
		memcpy(tmp_ptr, l1_ptr, 4096);

		/*
		 * Issue non-asynchronous L1 write.
		 * For safety, we must ensure that
		 * entry is written before blocks.
		 */
		lseek(s->fd, s->l1_table_offset + (l1_sector << 12), SEEK_SET);
		if (write(s->fd, tmp_ptr, 4096) != 4096)
		 	return 0;
		free(tmp_ptr);

		new_l2_table = 1;
		goto cache_miss;
	} else if (s->min_cluster_alloc == s->l2_size) {
		/*Fast-track the request*/
		cluster_offset = l2_offset + (s->l2_size * sizeof(uint64_t));
		l2_index = (offset >> s->cluster_bits) & (s->l2_size - 1);
		return cluster_offset + (l2_index * s->cluster_size);
	}

	/*Check to see if L2 entry is already cached*/
	for (i = 0; i < L2_CACHE_SIZE; i++) {
		if (l2_offset == s->l2_cache_offsets[i]) {
			/* increment the hit count */
			if (++s->l2_cache_counts[i] == 0xffffffff) {
				for (j = 0; j < L2_CACHE_SIZE; j++) {
					s->l2_cache_counts[j] >>= 1;
				}
			}
			l2_table = s->l2_cache + (i << s->l2_bits);
			goto found;
		}
	}

cache_miss:
	/* not found: load a new entry in the least used one */
	min_index = 0;
	min_count = 0xffffffff;
	for (i = 0; i < L2_CACHE_SIZE; i++) {
		if (s->l2_cache_counts[i] < min_count) {
			min_count = s->l2_cache_counts[i];
			min_index = i;
		}
	}
	l2_table = s->l2_cache + (min_index << s->l2_bits);

	/*If extent pre-allocated, read table from disk, 
	 *otherwise write new table to disk*/
	if (new_l2_table) {
		/*Should we allocate the whole extent? Adjustable parameter.*/
		if (s->cluster_alloc == s->l2_size) {
			cluster_offset = l2_offset + 
				(s->l2_size * sizeof(uint64_t));
			cluster_offset = (cluster_offset + s->cluster_size - 1)
				& ~(s->cluster_size - 1);
			if (qtruncate(s->fd, cluster_offset + 
				  (s->cluster_size * s->l2_size), 
				      s->sparse) != 0) {
				DPRINTF("ERROR truncating file\n");
				return 0;
			}
			s->fd_end = cluster_offset + 
				(s->cluster_size * s->l2_size);
			for (i = 0; i < s->l2_size; i++) {
				l2_table[i] = cpu_to_be64(cluster_offset + 
							  (i*s->cluster_size));
			}  
		} else memset(l2_table, 0, s->l2_size * sizeof(uint64_t));

		lseek(s->fd, l2_offset, SEEK_SET);
		if (write(s->fd, l2_table, s->l2_size * sizeof(uint64_t)) !=
		   s->l2_size * sizeof(uint64_t))
			return 0;
	} else {
		lseek(s->fd, l2_offset, SEEK_SET);
		if (read(s->fd, l2_table, s->l2_size * sizeof(uint64_t)) != 
		    s->l2_size * sizeof(uint64_t))
			return 0;
	}
	
	/*Update the cache entries*/ 
	s->l2_cache_offsets[min_index] = l2_offset;
	s->l2_cache_counts[min_index] = 1;

found:
	/*The extent is split into 's->l2_size' blocks of 
	 *size 's->cluster_size'*/
	l2_index = (offset >> s->cluster_bits) & (s->l2_size - 1);
	cluster_offset = be64_to_cpu(l2_table[l2_index]);

	if (!cluster_offset || 
	    ((cluster_offset & QCOW_OFLAG_COMPRESSED) && allocate == 1) ) {
		if (!allocate)
			return 0;
		
		if ((cluster_offset & QCOW_OFLAG_COMPRESSED) &&
		    (n_end - n_start) < s->cluster_sectors) {
			/* cluster is already allocated but compressed, we must
			   decompress it in the case it is not completely
			   overwritten */
			if (decompress_cluster(s, cluster_offset) < 0)
				return 0;
			cluster_offset = lseek(s->fd, s->fd_end, SEEK_SET);
			cluster_offset = (cluster_offset + s->cluster_size - 1)
				& ~(s->cluster_size - 1);
			/* write the cluster content - not asynchronous */
			lseek(s->fd, cluster_offset, SEEK_SET);
			if (write(s->fd, s->cluster_cache, s->cluster_size) != 
			    s->cluster_size)
			    return -1;
		} else {
			/* allocate a new cluster */
			cluster_offset = lseek(s->fd, s->fd_end, SEEK_SET);
			if (allocate == 1) {
				/* round to cluster size */
				cluster_offset = 
					(cluster_offset + s->cluster_size - 1) 
					& ~(s->cluster_size - 1);
				if (qtruncate(s->fd, cluster_offset + 
					      s->cluster_size, s->sparse)!=0) {
					DPRINTF("ERROR truncating file\n");
					return 0;
				}
				s->fd_end = (cluster_offset + s->cluster_size);
				/* if encrypted, we must initialize the cluster
				   content which won't be written */
				if (s->crypt_method && 
				    (n_end - n_start) < s->cluster_sectors) {
					uint64_t start_sect;
					start_sect = (offset & 
						      ~(s->cluster_size - 1)) 
							      >> 9;
					memset(s->cluster_data + 512, 
					       0xaa, 512);
					for (i = 0; i < s->cluster_sectors;i++)
					{
						if (i < n_start || i >= n_end) 
						{
							encrypt_sectors(s, start_sect + i, 
									s->cluster_data, 
									s->cluster_data + 512, 1, 1,
									&s->aes_encrypt_key);
							lseek(s->fd, cluster_offset + i * 512, SEEK_SET);
							if (write(s->fd, s->cluster_data, 512) != 512)
								return -1;
						}
					}
				}
			} else {
				cluster_offset |= QCOW_OFLAG_COMPRESSED | 
					(uint64_t)compressed_size 
						<< (63 - s->cluster_bits);
			}
		}
		/* update L2 table */
		tmp = cpu_to_be64(cluster_offset);
		l2_table[l2_index] = tmp;

		/*For IO_DIRECT we write 4KByte blocks*/
		l2_sector = (l2_index * sizeof(uint64_t)) >> 12;
		l2_ptr = (char *)l2_table + (l2_sector << 12);
		
		if (posix_memalign((void **)&tmp_ptr2, 4096, 4096) != 0) {
			DPRINTF("ERROR allocating memory for L1 table\n");
		}
		memcpy(tmp_ptr2, l2_ptr, 4096);
		lseek(s->fd, l2_offset + (l2_sector << 12), SEEK_SET);
		write(s->fd, tmp_ptr2, 4096);
		free(tmp_ptr2);
	}
	return cluster_offset;
}

static void init_cluster_cache(struct td_state *bs)
{
	struct tdqcow_state *s = (struct tdqcow_state *)bs->private;
	uint32_t count = 0;
	int i, cluster_entries;

	cluster_entries = s->cluster_size / 512;
	DPRINTF("Initialising Cluster cache, %d sectors per cluster (%d cluster size)\n",
		cluster_entries, s->cluster_size);

	for (i = 0; i < bs->size; i += cluster_entries) {
		if (get_cluster_offset(bs, i << 9, 0, 0, 0, 1)) count++;
		if (count >= L2_CACHE_SIZE) return;
	}
	DPRINTF("Finished cluster initialisation, added %d entries\n", count);
	return;
}

static int qcow_is_allocated(struct td_state *bs, int64_t sector_num, 
                             int nb_sectors, int *pnum)
{
	struct tdqcow_state *s = (struct tdqcow_state *)bs->private;

	int index_in_cluster, n;
	uint64_t cluster_offset;

	cluster_offset = get_cluster_offset(bs, sector_num << 9, 0, 0, 0, 0);
	index_in_cluster = sector_num & (s->cluster_sectors - 1);
	n = s->cluster_sectors - index_in_cluster;
	if (n > nb_sectors)
		n = nb_sectors;
	*pnum = n;
	return (cluster_offset != 0);
}

static int decompress_buffer(uint8_t *out_buf, int out_buf_size,
                             const uint8_t *buf, int buf_size)
{
	z_stream strm1, *strm = &strm1;
	int ret, out_len;
	
	memset(strm, 0, sizeof(*strm));
	
	strm->next_in = (uint8_t *)buf;
	strm->avail_in = buf_size;
	strm->next_out = out_buf;
	strm->avail_out = out_buf_size;
	
	ret = inflateInit2(strm, -12);
	if (ret != Z_OK)
		return -1;
	ret = inflate(strm, Z_FINISH);
	out_len = strm->next_out - out_buf;
	if ( (ret != Z_STREAM_END && ret != Z_BUF_ERROR) ||
	    (out_len != out_buf_size) ) {
		inflateEnd(strm);
		return -1;
	}
	inflateEnd(strm);
	return 0;
}
                              
static int decompress_cluster(struct tdqcow_state *s, uint64_t cluster_offset)
{
	int ret, csize;
	uint64_t coffset;

	coffset = cluster_offset & s->cluster_offset_mask;
	if (s->cluster_cache_offset != coffset) {
		csize = cluster_offset >> (63 - s->cluster_bits);
		csize &= (s->cluster_size - 1);
		lseek(s->fd, coffset, SEEK_SET);
		ret = read(s->fd, s->cluster_data, csize);
		if (ret != csize) 
			return -1;
		if (decompress_buffer(s->cluster_cache, s->cluster_size,
				      s->cluster_data, csize) < 0) {
			return -1;
		}
		s->cluster_cache_offset = coffset;
	}
	return 0;
}

/* Open the disk file and initialize qcow state. */
int tdqcow_open (struct td_state *bs, const char *name)
{
	int fd, len, i, shift, ret, size, l1_table_size;
	struct tdqcow_state *s = (struct tdqcow_state *)bs->private;
	char *buf;
	QCowHeader *header;
	QCowHeader_ext *exthdr;
	uint32_t cksum;
	uint64_t final_cluster = 0;

 	DPRINTF("QCOW: Opening %s\n",name);
	/* set up a pipe so that we can hand back a poll fd that won't fire.*/
	ret = pipe(s->poll_pipe);
	if (ret != 0)
		return (0 - errno);

	fd = open(name, O_RDWR | O_DIRECT | O_LARGEFILE);
	if (fd < 0) {
		DPRINTF("Unable to open %s (%d)\n",name,0 - errno);
		return -1;
	}

	s->fd = fd;
	asprintf(&s->name,"%s", name);

	ASSERT(sizeof(header) < 512);

	ret = posix_memalign((void **)&buf, 512, 512);
	if (ret != 0) goto fail;

	if (read(fd, buf, 512) != 512)
		goto fail;

	header = (QCowHeader *)buf;
	be32_to_cpus(&header->magic);
	be32_to_cpus(&header->version);
	be64_to_cpus(&header->backing_file_offset);
	be32_to_cpus(&header->backing_file_size);
	be32_to_cpus(&header->mtime);
	be64_to_cpus(&header->size);
	be32_to_cpus(&header->crypt_method);
	be64_to_cpus(&header->l1_table_offset);

	if (header->magic != QCOW_MAGIC || header->version > QCOW_VERSION)
		goto fail;
	if (header->size <= 1 || header->cluster_bits < 9)
		goto fail;
	if (header->crypt_method > QCOW_CRYPT_AES)
		goto fail;
	s->crypt_method_header = header->crypt_method;
	if (s->crypt_method_header)
		s->encrypted = 1;
	s->cluster_bits = header->cluster_bits;
	s->cluster_size = 1 << s->cluster_bits;
	s->cluster_sectors = 1 << (s->cluster_bits - 9);
	s->l2_bits = header->l2_bits;
	s->l2_size = 1 << s->l2_bits;
	s->cluster_alloc = s->l2_size;
	bs->size = header->size / 512;
	s->cluster_offset_mask = (1LL << (63 - s->cluster_bits)) - 1;
	
	/* read the level 1 table */
	shift = s->cluster_bits + s->l2_bits;
	s->l1_size = (header->size + (1LL << shift) - 1) >> shift;
	
	s->l1_table_offset = header->l1_table_offset;

	/*allocate a 4Kbyte multiple of memory*/
	l1_table_size = s->l1_size * sizeof(uint64_t);
	if (l1_table_size % 4096 > 0) {
		l1_table_size = ((l1_table_size >> 12) + 1) << 12;
	}
	ret = posix_memalign((void **)&s->l1_table, 4096, l1_table_size);
	if (ret != 0) goto fail;

	memset(s->l1_table, 0x00, l1_table_size);

	DPRINTF("L1 Table offset detected: %llu, size %d (%d)\n",
		(long long)s->l1_table_offset,
		(int) (s->l1_size * sizeof(uint64_t)), 
		l1_table_size);

	lseek(fd, s->l1_table_offset, SEEK_SET);
	if (read(fd, s->l1_table, l1_table_size) != l1_table_size)
		goto fail;

	for(i = 0;i < s->l1_size; i++) {
		//be64_to_cpus(&s->l1_table[i]);
		//DPRINTF("L1[%d] => %llu\n", i, s->l1_table[i]);
		if (s->l1_table[i] > final_cluster)
			final_cluster = s->l1_table[i];
	}

	/* alloc L2 cache */
	size = s->l2_size * L2_CACHE_SIZE * sizeof(uint64_t);
	ret = posix_memalign((void **)&s->l2_cache, 4096, size);
	if(ret != 0) goto fail;

	size = s->cluster_size;
	ret = posix_memalign((void **)&s->cluster_cache, 4096, size);
	if(ret != 0) goto fail;

	ret = posix_memalign((void **)&s->cluster_data, 4096, size);
	if(ret != 0) goto fail;
	s->cluster_cache_offset = -1;

	/* read the backing file name */
	s->bfd = -1;
	if (header->backing_file_offset != 0) {
		DPRINTF("Reading backing file data\n");
		len = header->backing_file_size;
		if (len > 1023)
			len = 1023;

                /*TODO - Fix read size for O_DIRECT and use original fd!*/
		fd = open(name, O_RDONLY | O_LARGEFILE);

		lseek(fd, header->backing_file_offset, SEEK_SET);
		if (read(fd, bs->backing_file, len) != len)
			goto fail;
		bs->backing_file[len] = '\0';
		close(fd);
		/***********************************/

		/*Open backing file*/
		fd = open(bs->backing_file, O_RDONLY | O_DIRECT | O_LARGEFILE);
		if (fd < 0) {
			DPRINTF("Unable to open backing file: %s\n",
				bs->backing_file);
			goto fail;
		}
		s->bfd = fd;
		s->cluster_alloc = 1; /*Cannot use pre-alloc*/
	}

        bs->sector_size = 512;
        bs->info = 0;
	
	/*Detect min_cluster_alloc*/
	s->min_cluster_alloc = 1; /*Default*/
	if (s->bfd == -1 && (s->l1_table_offset % 4096 == 0) ) {
		/*We test to see if the xen magic # exists*/
		exthdr = (QCowHeader_ext *)(buf + sizeof(QCowHeader));
		be32_to_cpus(&exthdr->xmagic);
		if(exthdr->xmagic != XEN_MAGIC) 
			goto end_xenhdr;

		/*Finally check the L1 table cksum*/
		be32_to_cpus(&exthdr->cksum);
		cksum = gen_cksum((char *)s->l1_table, 
				  s->l1_size * sizeof(uint64_t));
		if(exthdr->cksum != cksum)
			goto end_xenhdr;
			
		be32_to_cpus(&exthdr->min_cluster_alloc);
		be32_to_cpus(&exthdr->flags);
		s->sparse = (exthdr->flags & SPARSE_FILE);
		s->min_cluster_alloc = exthdr->min_cluster_alloc; 
	}

 end_xenhdr:
	if (init_aio_state(bs)!=0) {
		DPRINTF("Unable to initialise AIO state\n");
		goto fail;
	}
	s->fd_end = (final_cluster == 0 ? (s->l1_table_offset + l1_table_size) : 
				(final_cluster + s->cluster_size));

	return 0;
	
fail:
	DPRINTF("QCOW Open failed\n");
	free(s->l1_table);
	free(s->l2_cache);
	free(s->cluster_cache);
	free(s->cluster_data);
	close(fd);
	return -1;
}

 int tdqcow_queue_read(struct td_state *bs, uint64_t sector,
			       int nb_sectors, char *buf, td_callback_t cb,
			       int id, void *private)
{
	struct tdqcow_state *s = (struct tdqcow_state *)bs->private;
	int ret = 0, index_in_cluster, n, i, qcow_idx, asubmit = 0;
	uint64_t cluster_offset;

	/*Check we can get a lock*/
	for (i = 0; i < nb_sectors; i++)
		if (!aio_can_lock(s, sector + i)) {
			DPRINTF("AIO_CAN_LOCK failed [%llu]\n", 
				(long long) sector + i);
			return -EBUSY;
		}
	
	/*We store a local record of the request*/
	qcow_idx = get_free_idx(s);
	while (nb_sectors > 0) {
		cluster_offset = 
			get_cluster_offset(bs, sector << 9, 0, 0, 0, 0);
		index_in_cluster = sector & (s->cluster_sectors - 1);
		n = s->cluster_sectors - index_in_cluster;
		if (n > nb_sectors)
			n = nb_sectors;

		if (s->iocb_free_count == 0 || !aio_lock(s, sector)) {
			DPRINTF("AIO_LOCK or iocb_free_count (%d) failed" 
				"[%llu]\n", s->iocb_free_count, 
				(long long) sector);
			return -ENOMEM;
		}
		
		if (!cluster_offset && (s->bfd > 0)) {
			s->nr_reqs[qcow_idx]++;
			asubmit += async_read(s, s->bfd, n * 512, sector << 9,