aboutsummaryrefslogtreecommitdiffstats
path: root/xen/common/lz4/defs.h
diff options
context:
space:
mode:
authorKyungsik Lee <kyungsik.lee@lge.com>2013-10-07 09:40:35 +0200
committerJan Beulich <jbeulich@suse.com>2013-10-07 09:40:35 +0200
commite850049bbc831bfe40b87eaea673675d8a829e74 (patch)
tree39585eab8bdef2dc999b2837efd404a231c9e2d3 /xen/common/lz4/defs.h
parent8e0da8c07f4f80e14314977a11f738bd74a5b62b (diff)
downloadxen-e850049bbc831bfe40b87eaea673675d8a829e74.tar.gz
xen-e850049bbc831bfe40b87eaea673675d8a829e74.tar.bz2
xen-e850049bbc831bfe40b87eaea673675d8a829e74.zip
xen: add LZ4 decompression support
Add support for LZ4 decompression in Xen. LZ4 Decompression APIs for Xen are based on LZ4 implementation by Yann Collet. Benchmark Results(PATCH v3) Compiler: Linaro ARM gcc 4.6.2 1. ARMv7, 1.5GHz based board Kernel: linux 3.4 Uncompressed Kernel Size: 14MB Compressed Size Decompression Speed LZO 6.7MB 20.1MB/s, 25.2MB/s(UA) LZ4 7.3MB 29.1MB/s, 45.6MB/s(UA) 2. ARMv7, 1.7GHz based board Kernel: linux 3.7 Uncompressed Kernel Size: 14MB Compressed Size Decompression Speed LZO 6.0MB 34.1MB/s, 52.2MB/s(UA) LZ4 6.5MB 86.7MB/s - UA: Unaligned memory Access support - Latest patch set for LZO applied This patch set is for adding support for LZ4-compressed Kernel. LZ4 is a very fast lossless compression algorithm and it also features an extremely fast decoder [1]. But we have five of decompressors already and one question which does arise, however, is that of where do we stop adding new ones? This issue had been discussed and came to the conclusion [2]. Russell King said that we should have: - one decompressor which is the fastest - one decompressor for the highest compression ratio - one popular decompressor (eg conventional gzip) If we have a replacement one for one of these, then it should do exactly that: replace it. The benchmark shows that an 8% increase in image size vs a 66% increase in decompression speed compared to LZO(which has been known as the fastest decompressor in the Kernel). Therefore the "fast but may not be small" compression title has clearly been taken by LZ4 [3]. [1] http://code.google.com/p/lz4/ [2] http://thread.gmane.org/gmane.linux.kbuild.devel/9157 [3] http://thread.gmane.org/gmane.linux.kbuild.devel/9347 LZ4 homepage: http://fastcompression.blogspot.com/p/lz4.html LZ4 source repository: http://code.google.com/p/lz4/ Signed-off-by: Kyungsik Lee <kyungsik.lee@lge.com> Signed-off-by: Yann Collet <yann.collet.73@gmail.com> Signed-off-by: Jan Beulich <jbeulich@suse.com> Acked-by: Keir Fraser <keir@xen.org>
Diffstat (limited to 'xen/common/lz4/defs.h')
-rw-r--r--xen/common/lz4/defs.h184
1 files changed, 184 insertions, 0 deletions
diff --git a/xen/common/lz4/defs.h b/xen/common/lz4/defs.h
new file mode 100644
index 0000000000..f46df0884e
--- /dev/null
+++ b/xen/common/lz4/defs.h
@@ -0,0 +1,184 @@
+/*
+ * lz4defs.h -- architecture specific defines
+ *
+ * Copyright (C) 2013, LG Electronics, Kyungsik Lee <kyungsik.lee@lge.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifdef __XEN__
+#include <asm/byteorder.h>
+#endif
+
+#ifdef __LITTLE_ENDIAN
+static inline u16 INIT get_unaligned_le16(const void *p)
+{
+ return le16_to_cpup(p);
+}
+
+static inline u32 INIT get_unaligned_le32(const void *p)
+{
+ return le32_to_cpup(p);
+}
+#else
+#include <asm/unaligned.h>
+
+static inline u16 INIT get_unaligned_le16(const void *p)
+{
+ return le16_to_cpu(__get_unaligned(p, 2));
+}
+
+static inline u32 INIT get_unaligned_le32(void *p)
+{
+ return le32_to_cpu(__get_unaligned(p, 4));
+}
+#endif
+
+/*
+ * Detects 64 bits mode
+ */
+#if (defined(__x86_64__) || defined(__x86_64) || defined(__amd64__) \
+ || defined(__ppc64__) || defined(__LP64__))
+#define LZ4_ARCH64 1
+#else
+#define LZ4_ARCH64 0
+#endif
+
+/*
+ * Architecture-specific macros
+ */
+#define BYTE u8
+typedef struct _U16_S { u16 v; } U16_S;
+typedef struct _U32_S { u32 v; } U32_S;
+typedef struct _U64_S { u64 v; } U64_S;
+#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) \
+ || defined(CONFIG_ARM) && __LINUX_ARM_ARCH__ >= 6 \
+ && defined(ARM_EFFICIENT_UNALIGNED_ACCESS)
+
+#define A16(x) (((U16_S *)(x))->v)
+#define A32(x) (((U32_S *)(x))->v)
+#define A64(x) (((U64_S *)(x))->v)
+
+#define PUT4(s, d) (A32(d) = A32(s))
+#define PUT8(s, d) (A64(d) = A64(s))
+#define LZ4_WRITE_LITTLEENDIAN_16(p, v) \
+ do { \
+ A16(p) = v; \
+ p += 2; \
+ } while (0)
+#else /* CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS */
+
+#define A64(x) get_unaligned((u64 *)&(((U16_S *)(x))->v))
+#define A32(x) get_unaligned((u32 *)&(((U16_S *)(x))->v))
+#define A16(x) get_unaligned((u16 *)&(((U16_S *)(x))->v))
+
+#define PUT4(s, d) \
+ put_unaligned(get_unaligned((const u32 *) s), (u32 *) d)
+#define PUT8(s, d) \
+ put_unaligned(get_unaligned((const u64 *) s), (u64 *) d)
+
+#define LZ4_WRITE_LITTLEENDIAN_16(p, v) \
+ do { \
+ put_unaligned(v, (u16 *)(p)); \
+ p += 2; \
+ } while (0)
+#endif
+
+#define COPYLENGTH 8
+#define ML_BITS 4
+#define ML_MASK ((1U << ML_BITS) - 1)
+#define RUN_BITS (8 - ML_BITS)
+#define RUN_MASK ((1U << RUN_BITS) - 1)
+#define MEMORY_USAGE 14
+#define MINMATCH 4
+#define SKIPSTRENGTH 6
+#define LASTLITERALS 5
+#define MFLIMIT (COPYLENGTH + MINMATCH)
+#define MINLENGTH (MFLIMIT + 1)
+#define MAXD_LOG 16
+#define MAXD (1 << MAXD_LOG)
+#define MAXD_MASK (u32)(MAXD - 1)
+#define MAX_DISTANCE (MAXD - 1)
+#define HASH_LOG (MAXD_LOG - 1)
+#define HASHTABLESIZE (1 << HASH_LOG)
+#define MAX_NB_ATTEMPTS 256
+#define OPTIMAL_ML (int)((ML_MASK-1)+MINMATCH)
+#define LZ4_64KLIMIT ((1<<16) + (MFLIMIT - 1))
+#define HASHLOG64K ((MEMORY_USAGE - 2) + 1)
+#define HASH64KTABLESIZE (1U << HASHLOG64K)
+#define LZ4_HASH_VALUE(p) (((A32(p)) * 2654435761U) >> \
+ ((MINMATCH * 8) - (MEMORY_USAGE-2)))
+#define LZ4_HASH64K_VALUE(p) (((A32(p)) * 2654435761U) >> \
+ ((MINMATCH * 8) - HASHLOG64K))
+#define HASH_VALUE(p) (((A32(p)) * 2654435761U) >> \
+ ((MINMATCH * 8) - HASH_LOG))
+
+#if LZ4_ARCH64/* 64-bit */
+#define STEPSIZE 8
+
+#define LZ4_COPYSTEP(s, d) \
+ do { \
+ PUT8(s, d); \
+ d += 8; \
+ s += 8; \
+ } while (0)
+
+#define LZ4_COPYPACKET(s, d) LZ4_COPYSTEP(s, d)
+
+#define LZ4_SECURECOPY(s, d, e) \
+ do { \
+ if (d < e) { \
+ LZ4_WILDCOPY(s, d, e); \
+ } \
+ } while (0)
+#define HTYPE u32
+
+#ifdef __BIG_ENDIAN
+#define LZ4_NBCOMMONBYTES(val) (__builtin_clzll(val) >> 3)
+#else
+#define LZ4_NBCOMMONBYTES(val) (__builtin_ctzll(val) >> 3)
+#endif
+
+#else /* 32-bit */
+#define STEPSIZE 4
+
+#define LZ4_COPYSTEP(s, d) \
+ do { \
+ PUT4(s, d); \
+ d += 4; \
+ s += 4; \
+ } while (0)
+
+#define LZ4_COPYPACKET(s, d) \
+ do { \
+ LZ4_COPYSTEP(s, d); \
+ LZ4_COPYSTEP(s, d); \
+ } while (0)
+
+#define LZ4_SECURECOPY LZ4_WILDCOPY
+#define HTYPE const u8*
+
+#ifdef __BIG_ENDIAN
+#define LZ4_NBCOMMONBYTES(val) (__builtin_clz(val) >> 3)
+#else
+#define LZ4_NBCOMMONBYTES(val) (__builtin_ctz(val) >> 3)
+#endif
+
+#endif
+
+#define LZ4_READ_LITTLEENDIAN_16(d, s, p) \
+ (d = s - get_unaligned_le16(p))
+
+#define LZ4_WILDCOPY(s, d, e) \
+ do { \
+ LZ4_COPYPACKET(s, d); \
+ } while (d < e)
+
+#define LZ4_BLINDCOPY(s, d, l) \
+ do { \
+ u8 *e = (d) + l; \
+ LZ4_WILDCOPY(s, d, e); \
+ d = e; \
+ } while (0)