aboutsummaryrefslogtreecommitdiffstats
path: root/OpenKeychain/src/main/java/org/sufficientlysecure/keychain/util/CharsetVerifier.java
blob: c03decc8946417025de142dfc381cbf96a62df47 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
package org.sufficientlysecure.keychain.util;


import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.CoderResult;
import java.nio.charset.CodingErrorAction;

import android.content.ClipDescription;
import android.support.annotation.NonNull;
import android.support.annotation.Nullable;

/** This class can be used to guess whether a stream of data is encoded in a given
 * charset or not.
 *
 * An object of this class must be initialized with a byte[] buffer, which should
 * be filled with data, then processed with {@link #readBytesFromBuffer}. This can
 * be done any number of times. Once all data has been read, a final status can be
 * read using the getter methods.
 */
public class CharsetVerifier {

    private final ByteBuffer bufWrap;
    private final CharBuffer dummyOutput;

    private final CharsetDecoder charsetDecoder;

    private boolean isFinished;
    private boolean isFaulty;
    private boolean isGuessed;
    private boolean isPossibleTextMimeType;
    private boolean isTextMimeType;
    private String charset;
    private String mimeType;

    public CharsetVerifier(@NonNull  byte[] buf, @NonNull String mimeType, @Nullable String charset) {

        this.mimeType = mimeType;
        isTextMimeType = ClipDescription.compareMimeTypes(mimeType, "text/*");
        isPossibleTextMimeType = isTextMimeType
                || ClipDescription.compareMimeTypes(mimeType, "application/octet-stream")
                || ClipDescription.compareMimeTypes(mimeType, "application/x-download");
        if (!isPossibleTextMimeType) {
            charsetDecoder = null;
            bufWrap = null;
            dummyOutput = null;
            return;
        }

        bufWrap = ByteBuffer.wrap(buf);
        dummyOutput = CharBuffer.allocate(buf.length);

        // the charset defaults to us-ascii, but we want to default to utf-8
        if (charset == null || "us-ascii".equals(charset)) {
            charset = "utf-8";
            isGuessed = true;
        } else {
            isGuessed = false;
        }
        this.charset = charset;

        charsetDecoder = Charset.forName(charset).newDecoder();
        charsetDecoder.onMalformedInput(CodingErrorAction.REPORT);
        charsetDecoder.onUnmappableCharacter(CodingErrorAction.REPORT);
        charsetDecoder.reset();
    }

    public void readBytesFromBuffer(int pos, int len) {
        if (isFinished) {
            throw new IllegalStateException("cannot write again after reading charset status!");
        }
        if (isFaulty || bufWrap == null) {
            return;
        }
        bufWrap.rewind();
        bufWrap.position(pos);
        bufWrap.limit(len);
        dummyOutput.rewind();
        CoderResult result = charsetDecoder.decode(bufWrap, dummyOutput, false);
        if (result.isError()) {
            isFaulty = true;
        }
    }

    private void finishIfNecessary() {
        if (isFinished || isFaulty || bufWrap == null) {
            return;
        }
        isFinished = true;
        bufWrap.rewind();
        bufWrap.limit(0);
        dummyOutput.rewind();
        CoderResult result = charsetDecoder.decode(bufWrap, dummyOutput, true);
        if (result.isError()) {
            isFaulty = true;
        }
    }

    public String getGuessedMimeType() {
        if (isTextMimeType) {
            return mimeType;
        }
        if (isProbablyText()) {
            return "text/plain";
        }
        return mimeType;
    }

    public boolean isCharsetFaulty() {
        finishIfNecessary();
        return isFaulty;
    }

    public boolean isCharsetGuessed() {
        finishIfNecessary();
        return isGuessed;
    }

    public String getCharset() {
        finishIfNecessary();
        if (!isPossibleTextMimeType || (isGuessed && isFaulty)) {
            return null;
        }
        return charset;
    }

    public String getMaybeFaultyCharset() {
        return charset;
    }

    /** Returns true if the data which was read is definitely binary.
     *
     * This can happen when either the supplied mimeType indicated a non-ambiguous
     * binary data type, or if we guessed a charset but got errors while decoding.
     */
    public boolean isDefinitelyBinary() {
        finishIfNecessary();
        return !isTextMimeType && (!isPossibleTextMimeType || (isGuessed && isFaulty));
    }

    /** Returns true iff the data which was read is probably (or
     * definitely) text.
     *
     * The corner case where isDefinitelyBinary returns false but isProbablyText
     * returns true is where the charset was provided by the data (so is not
     * guessed) but is still faulty.
     */
    public boolean isProbablyText() {
        finishIfNecessary();
        return isTextMimeType || isPossibleTextMimeType && (!isGuessed || !isFaulty);
    }
}