aboutsummaryrefslogtreecommitdiffstats
path: root/OpenKeychain/src/main/java/org/sufficientlysecure/keychain/util
diff options
context:
space:
mode:
authorVincent Breitmoser <valodim@mugenguild.com>2016-02-23 14:14:12 +0100
committerVincent Breitmoser <valodim@mugenguild.com>2016-02-23 14:14:12 +0100
commitb9abf43153271e5929fc2e8415d7930c6ab3c708 (patch)
tree3d9072839389eafc50e7f308bc0be491eff8d893 /OpenKeychain/src/main/java/org/sufficientlysecure/keychain/util
parenta0c90f0ad57b66d6e7e0957526748b2e4a239063 (diff)
downloadopen-keychain-b9abf43153271e5929fc2e8415d7930c6ab3c708.tar.gz
open-keychain-b9abf43153271e5929fc2e8415d7930c6ab3c708.tar.bz2
open-keychain-b9abf43153271e5929fc2e8415d7930c6ab3c708.zip
move CharsetVerifier to utils package
Diffstat (limited to 'OpenKeychain/src/main/java/org/sufficientlysecure/keychain/util')
-rw-r--r--OpenKeychain/src/main/java/org/sufficientlysecure/keychain/util/CharsetVerifier.java142
1 files changed, 142 insertions, 0 deletions
diff --git a/OpenKeychain/src/main/java/org/sufficientlysecure/keychain/util/CharsetVerifier.java b/OpenKeychain/src/main/java/org/sufficientlysecure/keychain/util/CharsetVerifier.java
new file mode 100644
index 000000000..c1d11cc26
--- /dev/null
+++ b/OpenKeychain/src/main/java/org/sufficientlysecure/keychain/util/CharsetVerifier.java
@@ -0,0 +1,142 @@
+package org.sufficientlysecure.keychain.util;
+
+
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.charset.Charset;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CoderResult;
+import java.nio.charset.CodingErrorAction;
+
+import android.content.ClipDescription;
+import android.support.annotation.NonNull;
+import android.support.annotation.Nullable;
+
+/** This class can be used to guess whether a stream of data is encoded in a given
+ * charset or not.
+ *
+ * An object of this class must be initialized with a byte[] buffer, which should
+ * be filled with data, then processed with {@link #readBytesFromBuffer}. This can
+ * be done any number of times. Once all data has been read, a final status can be
+ * read using the getter methods.
+ */
+public class CharsetVerifier {
+
+ private final ByteBuffer bufWrap;
+ private final CharBuffer dummyOutput;
+
+ private final CharsetDecoder charsetDecoder;
+
+ private boolean isFinished;
+ private boolean isFaulty;
+ private boolean isGuessed;
+ private boolean isPossibleTextMimeType;
+ private boolean isTextMimeType;
+ private String charset;
+
+ public CharsetVerifier(@NonNull byte[] buf, String mimeType, @Nullable String charset) {
+
+ isPossibleTextMimeType = ClipDescription.compareMimeTypes(mimeType, "application/octet-stream")
+ || ClipDescription.compareMimeTypes(mimeType, "application/x-download")
+ || ClipDescription.compareMimeTypes(mimeType, "text/*");
+ if (!isPossibleTextMimeType) {
+ charsetDecoder = null;
+ bufWrap = null;
+ dummyOutput = null;
+ return;
+ }
+ isTextMimeType = ClipDescription.compareMimeTypes(mimeType, "text/*");
+
+ bufWrap = ByteBuffer.wrap(buf);
+ dummyOutput = CharBuffer.allocate(buf.length);
+
+ // the charset defaults to us-ascii, but we want to default to utf-8
+ if (charset == null || "us-ascii".equals(charset)) {
+ charset = "utf-8";
+ isGuessed = true;
+ } else {
+ isGuessed = false;
+ }
+ this.charset = charset;
+
+ charsetDecoder = Charset.forName(charset).newDecoder();
+ charsetDecoder.onMalformedInput(CodingErrorAction.REPORT);
+ charsetDecoder.onUnmappableCharacter(CodingErrorAction.REPORT);
+ charsetDecoder.reset();
+ }
+
+ public void readBytesFromBuffer(int pos, int len) {
+ if (isFinished) {
+ throw new IllegalStateException("cannot write again after reading charset status!");
+ }
+ if (isFaulty || bufWrap == null) {
+ return;
+ }
+ bufWrap.rewind();
+ bufWrap.position(pos);
+ bufWrap.limit(len);
+ dummyOutput.rewind();
+ CoderResult result = charsetDecoder.decode(bufWrap, dummyOutput, false);
+ if (result.isError()) {
+ isFaulty = true;
+ }
+ }
+
+ private void finishIfNecessary() {
+ if (isFinished || isFaulty || bufWrap == null) {
+ return;
+ }
+ isFinished = true;
+ bufWrap.rewind();
+ bufWrap.limit(0);
+ dummyOutput.rewind();
+ CoderResult result = charsetDecoder.decode(bufWrap, dummyOutput, true);
+ if (result.isError()) {
+ isFaulty = true;
+ }
+ }
+
+ public boolean isCharsetFaulty() {
+ finishIfNecessary();
+ return isFaulty;
+ }
+
+ public boolean isCharsetGuessed() {
+ finishIfNecessary();
+ return isGuessed;
+ }
+
+ public String getCharset() {
+ finishIfNecessary();
+ if (!isPossibleTextMimeType || (isGuessed && isFaulty)) {
+ return null;
+ }
+ return charset;
+ }
+
+ public String getMaybeFaultyCharset() {
+ return charset;
+ }
+
+ /** Returns true if the data which was read is definitely binary.
+ *
+ * This can happen when either the supplied mimeType indicated a non-ambiguous
+ * binary data type, or if we guessed a charset but got errors while decoding.
+ */
+ public boolean isDefinitelyBinary() {
+ finishIfNecessary();
+ return !isTextMimeType && (!isPossibleTextMimeType || (isGuessed && isFaulty));
+ }
+
+ /** Returns true iff the data which was read is probably (or
+ * definitely) text.
+ *
+ * The corner case where isDefinitelyBinary returns false but isProbablyText
+ * returns true is where the charset was provided by the data (so is not
+ * guessed) but is still faulty.
+ */
+ public boolean isProbablyText() {
+ finishIfNecessary();
+ return isTextMimeType || isPossibleTextMimeType && (!isGuessed || !isFaulty);
+ }
+}