From b9abf43153271e5929fc2e8415d7930c6ab3c708 Mon Sep 17 00:00:00 2001 From: Vincent Breitmoser Date: Tue, 23 Feb 2016 14:14:12 +0100 Subject: move CharsetVerifier to utils package --- .../keychain/util/CharsetVerifier.java | 142 +++++++++++++++++++++ 1 file changed, 142 insertions(+) create mode 100644 OpenKeychain/src/main/java/org/sufficientlysecure/keychain/util/CharsetVerifier.java (limited to 'OpenKeychain/src/main/java/org/sufficientlysecure/keychain/util') diff --git a/OpenKeychain/src/main/java/org/sufficientlysecure/keychain/util/CharsetVerifier.java b/OpenKeychain/src/main/java/org/sufficientlysecure/keychain/util/CharsetVerifier.java new file mode 100644 index 000000000..c1d11cc26 --- /dev/null +++ b/OpenKeychain/src/main/java/org/sufficientlysecure/keychain/util/CharsetVerifier.java @@ -0,0 +1,142 @@ +package org.sufficientlysecure.keychain.util; + + +import java.nio.ByteBuffer; +import java.nio.CharBuffer; +import java.nio.charset.Charset; +import java.nio.charset.CharsetDecoder; +import java.nio.charset.CoderResult; +import java.nio.charset.CodingErrorAction; + +import android.content.ClipDescription; +import android.support.annotation.NonNull; +import android.support.annotation.Nullable; + +/** This class can be used to guess whether a stream of data is encoded in a given + * charset or not. + * + * An object of this class must be initialized with a byte[] buffer, which should + * be filled with data, then processed with {@link #readBytesFromBuffer}. This can + * be done any number of times. Once all data has been read, a final status can be + * read using the getter methods. + */ +public class CharsetVerifier { + + private final ByteBuffer bufWrap; + private final CharBuffer dummyOutput; + + private final CharsetDecoder charsetDecoder; + + private boolean isFinished; + private boolean isFaulty; + private boolean isGuessed; + private boolean isPossibleTextMimeType; + private boolean isTextMimeType; + private String charset; + + public CharsetVerifier(@NonNull byte[] buf, String mimeType, @Nullable String charset) { + + isPossibleTextMimeType = ClipDescription.compareMimeTypes(mimeType, "application/octet-stream") + || ClipDescription.compareMimeTypes(mimeType, "application/x-download") + || ClipDescription.compareMimeTypes(mimeType, "text/*"); + if (!isPossibleTextMimeType) { + charsetDecoder = null; + bufWrap = null; + dummyOutput = null; + return; + } + isTextMimeType = ClipDescription.compareMimeTypes(mimeType, "text/*"); + + bufWrap = ByteBuffer.wrap(buf); + dummyOutput = CharBuffer.allocate(buf.length); + + // the charset defaults to us-ascii, but we want to default to utf-8 + if (charset == null || "us-ascii".equals(charset)) { + charset = "utf-8"; + isGuessed = true; + } else { + isGuessed = false; + } + this.charset = charset; + + charsetDecoder = Charset.forName(charset).newDecoder(); + charsetDecoder.onMalformedInput(CodingErrorAction.REPORT); + charsetDecoder.onUnmappableCharacter(CodingErrorAction.REPORT); + charsetDecoder.reset(); + } + + public void readBytesFromBuffer(int pos, int len) { + if (isFinished) { + throw new IllegalStateException("cannot write again after reading charset status!"); + } + if (isFaulty || bufWrap == null) { + return; + } + bufWrap.rewind(); + bufWrap.position(pos); + bufWrap.limit(len); + dummyOutput.rewind(); + CoderResult result = charsetDecoder.decode(bufWrap, dummyOutput, false); + if (result.isError()) { + isFaulty = true; + } + } + + private void finishIfNecessary() { + if (isFinished || isFaulty || bufWrap == null) { + return; + } + isFinished = true; + bufWrap.rewind(); + bufWrap.limit(0); + dummyOutput.rewind(); + CoderResult result = charsetDecoder.decode(bufWrap, dummyOutput, true); + if (result.isError()) { + isFaulty = true; + } + } + + public boolean isCharsetFaulty() { + finishIfNecessary(); + return isFaulty; + } + + public boolean isCharsetGuessed() { + finishIfNecessary(); + return isGuessed; + } + + public String getCharset() { + finishIfNecessary(); + if (!isPossibleTextMimeType || (isGuessed && isFaulty)) { + return null; + } + return charset; + } + + public String getMaybeFaultyCharset() { + return charset; + } + + /** Returns true if the data which was read is definitely binary. + * + * This can happen when either the supplied mimeType indicated a non-ambiguous + * binary data type, or if we guessed a charset but got errors while decoding. + */ + public boolean isDefinitelyBinary() { + finishIfNecessary(); + return !isTextMimeType && (!isPossibleTextMimeType || (isGuessed && isFaulty)); + } + + /** Returns true iff the data which was read is probably (or + * definitely) text. + * + * The corner case where isDefinitelyBinary returns false but isProbablyText + * returns true is where the charset was provided by the data (so is not + * guessed) but is still faulty. + */ + public boolean isProbablyText() { + finishIfNecessary(); + return isTextMimeType || isPossibleTextMimeType && (!isGuessed || !isFaulty); + } +} -- cgit v1.2.3