From bdae99c0847556dd8103f172fc1836eb83ae4c4a Mon Sep 17 00:00:00 2001 From: Vincent Breitmoser Date: Fri, 12 Feb 2016 17:08:09 +0100 Subject: mime: try to decode with given charset or utf-8 while file is read --- .../keychain/operations/InputDataOperation.java | 86 ++++++++++++- .../operations/results/OperationResult.java | 4 + OpenKeychain/src/main/res/values/strings.xml | 4 + .../keychain/pgp/InputDataOperationTest.java | 141 ++++++++++++++++++++- 4 files changed, 225 insertions(+), 10 deletions(-) (limited to 'OpenKeychain/src') diff --git a/OpenKeychain/src/main/java/org/sufficientlysecure/keychain/operations/InputDataOperation.java b/OpenKeychain/src/main/java/org/sufficientlysecure/keychain/operations/InputDataOperation.java index bb8d6ad73..80f9d6368 100644 --- a/OpenKeychain/src/main/java/org/sufficientlysecure/keychain/operations/InputDataOperation.java +++ b/OpenKeychain/src/main/java/org/sufficientlysecure/keychain/operations/InputDataOperation.java @@ -23,6 +23,13 @@ import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; +import java.nio.ByteBuffer; +import java.nio.CharBuffer; +import java.nio.charset.Charset; +import java.nio.charset.CharsetDecoder; +import java.nio.charset.CoderResult; +import java.nio.charset.CodingErrorAction; +import java.nio.charset.UnsupportedCharsetException; import java.util.ArrayList; import android.content.ClipDescription; @@ -67,10 +74,15 @@ import org.sufficientlysecure.keychain.service.input.CryptoInputParcel; */ public class InputDataOperation extends BaseOperation { - final private byte[] buf = new byte[256]; + private final byte[] buf = new byte[256]; + private final ByteBuffer bufWrap; + private final CharBuffer dummyOutput; public InputDataOperation(Context context, ProviderHelper providerHelper, Progressable progressable) { super(context, providerHelper, progressable); + + bufWrap = ByteBuffer.wrap(buf); + dummyOutput = CharBuffer.allocate(256); } Uri mSignedDataUri; @@ -326,20 +338,82 @@ public class InputDataOperation extends BaseOperation { throw new IOException("Error getting file for writing!"); } + boolean isPossibleTextMimeType = ClipDescription.compareMimeTypes(mimeType, "application/octet-stream") + || ClipDescription.compareMimeTypes(mimeType, "application/x-download") + || ClipDescription.compareMimeTypes(mimeType, "text/*"); + + // If this data looks like text, we pipe the incoming data into a charset + // decoder, to see if the data is legal for the assumed charset. + String charset; + boolean charsetIsFaulty; + boolean charsetIsGuessed; + CharsetDecoder charsetDecoder = null; + if (isPossibleTextMimeType) { + charset = bd.getCharset(); + // the charset defaults to us-ascii, but we want to default to utf-8 + if (charset == null || "us-ascii".equals(charset)) { + charset = "utf-8"; + charsetIsGuessed = true; + } else { + charsetIsGuessed = false; + } + + try { + charsetDecoder = Charset.forName(charset).newDecoder(); + charsetDecoder.onMalformedInput(CodingErrorAction.REPORT); + charsetDecoder.onUnmappableCharacter(CodingErrorAction.REPORT); + charsetDecoder.reset(); + charsetIsFaulty = false; + } catch (UnsupportedCharsetException e) { + charsetIsFaulty = true; + } + } else { + charsetIsFaulty = true; + charsetIsGuessed = false; + charset = null; + } + int totalLength = 0; do { totalLength += len; out.write(buf, 0, len); + + if (isPossibleTextMimeType && !charsetIsFaulty) { + bufWrap.rewind(); + bufWrap.limit(len); + dummyOutput.rewind(); + CoderResult result = charsetDecoder.decode(bufWrap, dummyOutput, false); + if (result.isError()) { + charsetIsFaulty = true; + } + } } while ((len = is.read(buf)) > 0); - log.add(LogType.MSG_DATA_MIME_LENGTH, 3, Long.toString(totalLength)); + if (!charsetIsFaulty) { + bufWrap.rewind(); + bufWrap.limit(0); + dummyOutput.rewind(); + CoderResult result = charsetDecoder.decode(bufWrap, dummyOutput, true); + if (result.isError()) { + charsetIsFaulty = true; + } + } - String charset = bd.getCharset(); - // the charset defaults to us-ascii, but we want to default to utf-8 - if ("us-ascii".equals(charset)) { - charset = "utf-8"; + if (isPossibleTextMimeType) { + if (charsetIsFaulty && charsetIsGuessed) { + log.add(LogType.MSG_DATA_MIME_CHARSET_UNKNOWN, 3, charset); + charset = null; + } else if (charsetIsFaulty) { + log.add(LogType.MSG_DATA_MIME_CHARSET_FAULTY, 3, charset); + } else if (charsetIsGuessed) { + log.add(LogType.MSG_DATA_MIME_CHARSET_GUESS, 3, charset); + } else { + log.add(LogType.MSG_DATA_MIME_CHARSET, 3, charset); + } } + log.add(LogType.MSG_DATA_MIME_LENGTH, 3, Long.toString(totalLength)); + OpenPgpMetadata metadata = new OpenPgpMetadata(mFilename, mimeType, 0L, totalLength, charset); out.close(); diff --git a/OpenKeychain/src/main/java/org/sufficientlysecure/keychain/operations/results/OperationResult.java b/OpenKeychain/src/main/java/org/sufficientlysecure/keychain/operations/results/OperationResult.java index f9c2db8e8..ec2fddbd0 100644 --- a/OpenKeychain/src/main/java/org/sufficientlysecure/keychain/operations/results/OperationResult.java +++ b/OpenKeychain/src/main/java/org/sufficientlysecure/keychain/operations/results/OperationResult.java @@ -860,6 +860,10 @@ public abstract class OperationResult implements Parcelable { MSG_DATA_MIME_FROM_EXTENSION (LogLevel.DEBUG, R.string.msg_data_mime_from_extension), MSG_DATA_MIME_FILENAME (LogLevel.DEBUG, R.string.msg_data_mime_filename), MSG_DATA_MIME_LENGTH (LogLevel.DEBUG, R.string.msg_data_mime_length), + MSG_DATA_MIME_CHARSET (LogLevel.DEBUG, R.string.msg_data_mime_charset), + MSG_DATA_MIME_CHARSET_FAULTY (LogLevel.WARN, R.string.msg_data_mime_charset_faulty), + MSG_DATA_MIME_CHARSET_GUESS (LogLevel.DEBUG, R.string.msg_data_mime_charset_guess), + MSG_DATA_MIME_CHARSET_UNKNOWN (LogLevel.DEBUG, R.string.msg_data_mime_charset_unknown), MSG_DATA_MIME (LogLevel.DEBUG, R.string.msg_data_mime), MSG_DATA_MIME_OK (LogLevel.INFO, R.string.msg_data_mime_ok), MSG_DATA_MIME_NONE (LogLevel.DEBUG, R.string.msg_data_mime_none), diff --git a/OpenKeychain/src/main/res/values/strings.xml b/OpenKeychain/src/main/res/values/strings.xml index f32eb0269..7d66c06a1 100644 --- a/OpenKeychain/src/main/res/values/strings.xml +++ b/OpenKeychain/src/main/res/values/strings.xml @@ -1401,6 +1401,10 @@ "Filename: '%s'" "Guessing MIME type from extension" "Content-Length: %s" + "Charset is '%s'" + "Charset is '%s', but decoding failed!" + "Charset appears to be '%s'" + "Charset is unknown, or data is not text." "Parsing MIME data structure" "Finished parsing" "No MIME structure found" diff --git a/OpenKeychain/src/test/java/org/sufficientlysecure/keychain/pgp/InputDataOperationTest.java b/OpenKeychain/src/test/java/org/sufficientlysecure/keychain/pgp/InputDataOperationTest.java index 4277649fb..0464bf508 100644 --- a/OpenKeychain/src/test/java/org/sufficientlysecure/keychain/pgp/InputDataOperationTest.java +++ b/OpenKeychain/src/test/java/org/sufficientlysecure/keychain/pgp/InputDataOperationTest.java @@ -20,6 +20,7 @@ package org.sufficientlysecure.keychain.pgp; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; +import java.io.FileNotFoundException; import java.io.PrintStream; import java.security.Security; import java.util.ArrayList; @@ -30,18 +31,20 @@ import android.content.ContentValues; import android.net.Uri; import junit.framework.Assert; +import org.bouncycastle.jce.provider.BouncyCastleProvider; import org.junit.Before; import org.junit.BeforeClass; import org.junit.Test; import org.junit.runner.RunWith; +import org.openintents.openpgp.OpenPgpMetadata; import org.robolectric.RobolectricGradleTestRunner; import org.robolectric.RuntimeEnvironment; import org.robolectric.annotation.Config; import org.robolectric.shadows.ShadowLog; -import org.bouncycastle.jce.provider.BouncyCastleProvider; import org.sufficientlysecure.keychain.WorkaroundBuildConfig; import org.sufficientlysecure.keychain.operations.InputDataOperation; import org.sufficientlysecure.keychain.operations.results.InputDataResult; +import org.sufficientlysecure.keychain.operations.results.OperationResult.LogType; import org.sufficientlysecure.keychain.provider.ProviderHelper; import org.sufficientlysecure.keychain.provider.TemporaryFileProvider; import org.sufficientlysecure.keychain.service.InputDataParcel; @@ -80,20 +83,20 @@ public class InputDataOperationTest { } @Test - public void testMimeDecoding () throws Exception { + public void testMimeDecoding() throws Exception { String mimeMail = "Content-Type: multipart/mixed; boundary=\"=-26BafqxfXmhVNMbYdoIi\"\n" + "\n" + "--=-26BafqxfXmhVNMbYdoIi\n" + - "Content-Type: text/plain\n" + + "Content-Type: text/plain; charset=utf-8\n" + "Content-Transfer-Encoding: quoted-printable\n" + "Content-Disposition: attachment; filename=data.txt\n" + "\n" + "message part 1\n" + "\n" + "--=-26BafqxfXmhVNMbYdoIi\n" + - "Content-Type: text/testvalue\n" + + "Content-Type: text/testvalue; charset=iso-8859-1\n" + "Content-Description: Dummy content description\n" + "\n" + "message part 2.1\n" + @@ -156,7 +159,137 @@ public class InputDataOperationTest { Assert.assertEquals("second part must have expected content", "message part 2.1\nmessage part 2.2\n", new String(outStream2.toByteArray())); + OpenPgpMetadata metadata = result.mMetadata.get(0); + Assert.assertEquals("text/plain", metadata.getMimeType()); + Assert.assertEquals("utf-8", metadata.getCharset()); + + metadata = result.mMetadata.get(1); + Assert.assertEquals("text/testvalue", metadata.getMimeType()); + Assert.assertEquals("iso-8859-1", metadata.getCharset()); + } + + @Test + public void testMimeDecodingExplicitFaultyCharset() throws Exception { + + String mimeContent = "Content-Type: text/plain; charset=utf-8\n" + + "\n" + + "message with binary data in it\n"; + + byte[] data = mimeContent.getBytes(); + data[60] = (byte) 0xc3; + data[61] = (byte) 0x28; + + InputDataResult result = runSimpleDataInputOperation(data); + + // must be successful, no verification, have two output URIs + Assert.assertTrue(result.success()); + Assert.assertNull(result.mDecryptVerifyResult); + + OpenPgpMetadata metadata = result.mMetadata.get(0); + Assert.assertEquals("text/plain", metadata.getMimeType()); + + Assert.assertEquals("charset should be set since it was explicitly specified", + "utf-8", metadata.getCharset()); + Assert.assertTrue("faulty charset should have been detected", + result.getLog().containsType(LogType.MSG_DATA_MIME_CHARSET_FAULTY)); + } + + @Test + public void testMimeDecodingImplicitFaultyCharset() throws Exception { + + String mimeContent = "Content-Type: text/plain\n" + + "\n" + + "message with binary data in it\n"; + + byte[] data = mimeContent.getBytes(); + data[45] = (byte) 0xc3; + data[46] = (byte) 0x28; + + InputDataResult result = runSimpleDataInputOperation(data); + + // must be successful, no verification, have two output URIs + Assert.assertTrue(result.success()); + Assert.assertNull(result.mDecryptVerifyResult); + + OpenPgpMetadata metadata = result.mMetadata.get(0); + Assert.assertEquals("text/plain", metadata.getMimeType()); + + Assert.assertNull("charset was bad so it should not be set", metadata.getCharset()); + Assert.assertTrue("faulty charset should have been detected", + result.getLog().containsType(LogType.MSG_DATA_MIME_CHARSET_UNKNOWN)); + } + + @Test + public void testMimeDecodingImplicitGuessedCharset() throws Exception { + + String mimeContent = "Content-Type: text/plain\n" + + "\n" + + "proper, utf-8 encoded message ☭\n"; + + InputDataResult result = runSimpleDataInputOperation(mimeContent.getBytes()); + + // must be successful, no verification, have two output URIs + Assert.assertTrue(result.success()); + Assert.assertNull(result.mDecryptVerifyResult); + + OpenPgpMetadata metadata = result.mMetadata.get(0); + Assert.assertEquals("text/plain", metadata.getMimeType()); + Assert.assertEquals("charset should be set since it was guessed and not faulty", + "utf-8", metadata.getCharset()); + Assert.assertTrue("charset should have been guessed", + result.getLog().containsType(LogType.MSG_DATA_MIME_CHARSET_GUESS)); + } + + @Test + public void testMimeDecodingOctetStreamGuessedCharset() throws Exception { + + String mimeContent = "Content-Type: application/octet-stream\n" + + "\n" + + "proper, utf-8 encoded message ☭\n"; + + InputDataResult result = runSimpleDataInputOperation(mimeContent.getBytes()); + + // must be successful, no verification, have two output URIs + Assert.assertTrue(result.success()); + Assert.assertNull(result.mDecryptVerifyResult); + + OpenPgpMetadata metadata = result.mMetadata.get(0); + Assert.assertEquals("application/octet-stream", metadata.getMimeType()); + + Assert.assertEquals("charset should be set since it was guessed and not faulty", + "utf-8", metadata.getCharset()); + Assert.assertTrue("charset should have been guessed", + result.getLog().containsType(LogType.MSG_DATA_MIME_CHARSET_GUESS)); + } + + private InputDataResult runSimpleDataInputOperation(byte[] mimeContentBytes) throws FileNotFoundException { + ByteArrayOutputStream outStream1 = new ByteArrayOutputStream(); + ByteArrayOutputStream outStream2 = new ByteArrayOutputStream(); + ContentResolver mockResolver = mock(ContentResolver.class); + + // fake openOutputStream first and second + when(mockResolver.openOutputStream(any(Uri.class), eq("w"))) + .thenReturn(outStream1, outStream2); + + // fake openInputStream + Uri fakeInputUri = Uri.parse("content://fake/1"); + when(mockResolver.openInputStream(fakeInputUri)).thenReturn( + new ByteArrayInputStream(mimeContentBytes)); + + Uri fakeOutputUri1 = Uri.parse("content://fake/out/1"); + when(mockResolver.insert(eq(TemporaryFileProvider.CONTENT_URI), any(ContentValues.class))) + .thenReturn(fakeOutputUri1); + + // application which returns mockresolver + Application spyApplication = spy(RuntimeEnvironment.application); + when(spyApplication.getContentResolver()).thenReturn(mockResolver); + + InputDataOperation op = new InputDataOperation(spyApplication, + new ProviderHelper(RuntimeEnvironment.application), null); + + InputDataParcel input = new InputDataParcel(fakeInputUri, null); + return op.execute(input, new CryptoInputParcel()); } } -- cgit v1.2.3