From 8714a5eac42449dc3584a6bc14a7404c6a4686b5 Mon Sep 17 00:00:00 2001 From: Vincent Breitmoser Date: Tue, 23 Feb 2016 14:15:47 +0100 Subject: add test for CharsetVerifier --- .../keychain/util/CharsetVerifierTest.java | 141 +++++++++++++++++++++ 1 file changed, 141 insertions(+) create mode 100644 OpenKeychain/src/test/java/org/sufficientlysecure/keychain/util/CharsetVerifierTest.java (limited to 'OpenKeychain/src/test') diff --git a/OpenKeychain/src/test/java/org/sufficientlysecure/keychain/util/CharsetVerifierTest.java b/OpenKeychain/src/test/java/org/sufficientlysecure/keychain/util/CharsetVerifierTest.java new file mode 100644 index 000000000..8ad6a021f --- /dev/null +++ b/OpenKeychain/src/test/java/org/sufficientlysecure/keychain/util/CharsetVerifierTest.java @@ -0,0 +1,141 @@ +package org.sufficientlysecure.keychain.util; + + +import org.junit.Test; +import org.junit.runner.RunWith; +import org.robolectric.RobolectricGradleTestRunner; +import org.robolectric.annotation.Config; +import org.sufficientlysecure.keychain.WorkaroundBuildConfig; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertTrue; + +@RunWith(RobolectricGradleTestRunner.class) +@Config(constants = WorkaroundBuildConfig.class, sdk = 21, manifest = "src/main/AndroidManifest.xml") +public class CharsetVerifierTest { + + @Test + public void testTypeImagePngAlwaysBinary() throws Exception { + byte[] bytes = "bla bluh ☭".getBytes("utf-8"); + + CharsetVerifier charsetVerifier = new CharsetVerifier(bytes, "image/png", null); + charsetVerifier.readBytesFromBuffer(0, bytes.length); + + assertTrue("image/png should be marked as definitely binary", charsetVerifier.isDefinitelyBinary()); + assertFalse("image/png should never be marked as, even if it is", charsetVerifier.isProbablyText()); + assertNull("charset should be null", charsetVerifier.getCharset()); + } + + @Test + public void testUtf8SpecifiedButFaulty() throws Exception { + byte[] bytes = "bla bluh ☭".getBytes("utf-8"); + bytes[4] = (byte) 0xc3; + bytes[5] = (byte) 0x28; + + CharsetVerifier charsetVerifier = new CharsetVerifier(bytes, "text/plain", "utf-8"); + charsetVerifier.readBytesFromBuffer(0, bytes.length); + + assertFalse("text/plain should not be marked as binary, even if it is", charsetVerifier.isDefinitelyBinary()); + assertTrue("text/plain should be marked as text, even if it isn't valid", charsetVerifier.isProbablyText()); + assertTrue("encoding contained illegal chars, so it should be marked as faulty", charsetVerifier.isCharsetFaulty()); + assertFalse("charset was specified and should not be marked as guessed", charsetVerifier.isCharsetGuessed()); + assertEquals("charset should be utf-8 since it was given explicitly", "utf-8", charsetVerifier.getCharset()); + assertEquals("charset should be utf-8 since it was given explicitly", "utf-8", charsetVerifier.getMaybeFaultyCharset()); + } + + @Test + public void testUtf8GuessedAndFaulty() throws Exception { + byte[] bytes = "bla bluh ☭".getBytes("utf-8"); + bytes[4] = (byte) 0xc3; + bytes[5] = (byte) 0x28; + + CharsetVerifier charsetVerifier = new CharsetVerifier(bytes, "text/plain", null); + charsetVerifier.readBytesFromBuffer(0, bytes.length); + + assertFalse("text/plain should not be marked as binary, even if it is", charsetVerifier.isDefinitelyBinary()); + assertTrue("text/plain should be marked as text, even if it isn't valid", charsetVerifier.isProbablyText()); + assertTrue("encoding contained illegal chars, so it should be marked as faulty", charsetVerifier.isCharsetFaulty()); + assertTrue("charset was guessed and should be marked as such", charsetVerifier.isCharsetGuessed()); + assertNull("charset should be null since the guess was faulty", charsetVerifier.getCharset()); + assertEquals("maybe-faulty charset should be utf-8", "utf-8", charsetVerifier.getMaybeFaultyCharset()); + } + + @Test + public void testGuessedEncoding() throws Exception { + byte[] bytes = "bla bluh ☭".getBytes("utf-8"); + + CharsetVerifier charsetVerifier = new CharsetVerifier(bytes, "application/octet-stream", null); + charsetVerifier.readBytesFromBuffer(0, bytes.length); + + assertFalse("application/octet-stream with text content is not definitely binary", charsetVerifier.isDefinitelyBinary()); + assertTrue("application/octet-stream with text content should be probably text", charsetVerifier.isProbablyText()); + assertFalse("detected charset should not be faulty", charsetVerifier.isCharsetFaulty()); + assertTrue("charset was guessed and should be marked as such", charsetVerifier.isCharsetGuessed()); + assertEquals("guessed charset is utf-8", "utf-8", charsetVerifier.getCharset()); + } + + @Test + public void testWindows1252Faulty() throws Exception { + byte[] bytes = "bla bluh ☭".getBytes("windows-1252"); + bytes[2] = (byte) 0x9d; + + CharsetVerifier charsetVerifier = new CharsetVerifier(bytes, "text/plain", "windows-1252"); + charsetVerifier.readBytesFromBuffer(0, bytes.length); + + assertFalse("text/plain is never definitely binary", charsetVerifier.isDefinitelyBinary()); + assertTrue("text/plain is always probably text", charsetVerifier.isProbablyText()); + assertTrue("charset contained faulty characters", charsetVerifier.isCharsetFaulty()); + assertFalse("charset was not guessed", charsetVerifier.isCharsetGuessed()); + assertEquals("charset is returned correctly", "windows-1252", charsetVerifier.getCharset()); + } + + @Test + public void testWindows1252Good() throws Exception { + byte[] bytes = "bla bluh ☭".getBytes("windows-1252"); + // this is ‡ in windows-1252 + bytes[2] = (byte) 0x87; + + CharsetVerifier charsetVerifier = new CharsetVerifier(bytes, "text/plain", "windows-1252"); + charsetVerifier.readBytesFromBuffer(0, bytes.length); + + assertFalse("text/plain is never definitely binary", charsetVerifier.isDefinitelyBinary()); + assertTrue("text/plain is always probably text", charsetVerifier.isProbablyText()); + assertFalse("charset contained no faulty characters", charsetVerifier.isCharsetFaulty()); + assertFalse("charset was not guessed", charsetVerifier.isCharsetGuessed()); + assertEquals("charset is returned correctly", "windows-1252", charsetVerifier.getCharset()); + } + + @Test(expected = IllegalStateException.class) + public void testReadAfterGetterShouldCrash() throws Exception { + byte[] bytes = "bla bluh ☭".getBytes("utf-8"); + + CharsetVerifier charsetVerifier = new CharsetVerifier(bytes, "text/plain", null); + charsetVerifier.readBytesFromBuffer(0, bytes.length); + charsetVerifier.isCharsetFaulty(); + + charsetVerifier.readBytesFromBuffer(0, bytes.length); + } + + + @Test + public void testStaggeredInput() throws Exception { + byte[] bytes = "bla bluh ☭".getBytes("utf-8"); + bytes[4] = (byte) 0xc3; + bytes[5] = (byte) 0x28; + + CharsetVerifier charsetVerifier = new CharsetVerifier(bytes, "text/plain", null); + for (int i = 0; i < bytes.length; i++) { + charsetVerifier.readBytesFromBuffer(i, i+1); + } + + assertFalse("text/plain should not be marked as binary, even if it is", charsetVerifier.isDefinitelyBinary()); + assertTrue("text/plain should be marked as text, even if it isn't valid", charsetVerifier.isProbablyText()); + assertTrue("encoding contained illegal chars, so it should be marked as faulty", charsetVerifier.isCharsetFaulty()); + assertTrue("charset was guessed and should be marked as such", charsetVerifier.isCharsetGuessed()); + assertNull("charset should be null since the guess was faulty", charsetVerifier.getCharset()); + assertEquals("maybe-faulty charset should be utf-8", "utf-8", charsetVerifier.getMaybeFaultyCharset()); + } + +} -- cgit v1.2.3