Merge pull request #1727 from open-keychain/mime-fallback

try to guess if data is text or not
author: Vincent <valodim@mugenguild.com> 2016-02-23 16:01:13 +0100
committer: Vincent <valodim@mugenguild.com> 2016-02-23 16:01:13 +0100
commit: fd24acbf0e54be2be222ca107d97c63f308a1d4a (patch)
tree: df87be1e599f5beb146f31e84deb74a815bdab4c /OpenKeychain/src/main/java/org/sufficientlysecure/keychain/util
parent: c1d268a5ecbb49a56561bd66c9b2967e282b9071 (diff)
parent: 03e695c6509ee6ada0ad6a0a21181277ba298c34 (diff)
download: open-keychain-fd24acbf0e54be2be222ca107d97c63f308a1d4a.tar.gz
open-keychain-fd24acbf0e54be2be222ca107d97c63f308a1d4a.tar.bz2
open-keychain-fd24acbf0e54be2be222ca107d97c63f308a1d4a.zip
1 files changed, 154 insertions, 0 deletions
diff --git a/OpenKeychain/src/main/java/org/sufficientlysecure/keychain/util/CharsetVerifier.java b/OpenKeychain/src/main/java/org/sufficientlysecure/keychain/util/CharsetVerifier.java
new file mode 100644
index 000000000..c03decc89
--- /dev/null
+++ b/OpenKeychain/src/main/java/org/sufficientlysecure/keychain/util/CharsetVerifier.java
@@ -0,0 +1,154 @@
+package org.sufficientlysecure.keychain.util;
+
+
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.charset.Charset;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CoderResult;
+import java.nio.charset.CodingErrorAction;
+
+import android.content.ClipDescription;
+import android.support.annotation.NonNull;
+import android.support.annotation.Nullable;
+
+/** This class can be used to guess whether a stream of data is encoded in a given
+ * charset or not.
+ *
+ * An object of this class must be initialized with a byte[] buffer, which should
+ * be filled with data, then processed with {@link #readBytesFromBuffer}. This can
+ * be done any number of times. Once all data has been read, a final status can be
+ * read using the getter methods.
+ */
+public class CharsetVerifier {
+
+    private final ByteBuffer bufWrap;
+    private final CharBuffer dummyOutput;
+
+    private final CharsetDecoder charsetDecoder;
+
+    private boolean isFinished;
+    private boolean isFaulty;
+    private boolean isGuessed;
+    private boolean isPossibleTextMimeType;
+    private boolean isTextMimeType;
+    private String charset;
+    private String mimeType;
+
+    public CharsetVerifier(@NonNull  byte[] buf, @NonNull String mimeType, @Nullable String charset) {
+
+        this.mimeType = mimeType;
+        isTextMimeType = ClipDescription.compareMimeTypes(mimeType, "text/*");
+        isPossibleTextMimeType = isTextMimeType
+                || ClipDescription.compareMimeTypes(mimeType, "application/octet-stream")
+                || ClipDescription.compareMimeTypes(mimeType, "application/x-download");
+        if (!isPossibleTextMimeType) {
+            charsetDecoder = null;
+            bufWrap = null;
+            dummyOutput = null;
+            return;
+        }
+
+        bufWrap = ByteBuffer.wrap(buf);
+        dummyOutput = CharBuffer.allocate(buf.length);
+
+        // the charset defaults to us-ascii, but we want to default to utf-8
+        if (charset == null || "us-ascii".equals(charset)) {
+            charset = "utf-8";
+            isGuessed = true;
+        } else {
+            isGuessed = false;
+        }
+        this.charset = charset;
+
+        charsetDecoder = Charset.forName(charset).newDecoder();
+        charsetDecoder.onMalformedInput(CodingErrorAction.REPORT);
+        charsetDecoder.onUnmappableCharacter(CodingErrorAction.REPORT);
+        charsetDecoder.reset();
+    }
+
+    public void readBytesFromBuffer(int pos, int len) {
+        if (isFinished) {
+            throw new IllegalStateException("cannot write again after reading charset status!");
+        }
+        if (isFaulty || bufWrap == null) {
+            return;
+        }
+        bufWrap.rewind();
+        bufWrap.position(pos);
+        bufWrap.limit(len);
+        dummyOutput.rewind();
+        CoderResult result = charsetDecoder.decode(bufWrap, dummyOutput, false);
+        if (result.isError()) {
+            isFaulty = true;
+        }
+    }
+
+    private void finishIfNecessary() {
+        if (isFinished || isFaulty || bufWrap == null) {
+            return;
+        }
+        isFinished = true;
+        bufWrap.rewind();
+        bufWrap.limit(0);
+        dummyOutput.rewind();
+        CoderResult result = charsetDecoder.decode(bufWrap, dummyOutput, true);
+        if (result.isError()) {
+            isFaulty = true;
+        }
+    }
+
+    public String getGuessedMimeType() {
+        if (isTextMimeType) {
+            return mimeType;
+        }
+        if (isProbablyText()) {
+            return "text/plain";
+        }
+        return mimeType;
+    }
+
+    public boolean isCharsetFaulty() {
+        finishIfNecessary();
+        return isFaulty;
+    }
+
+    public boolean isCharsetGuessed() {
+        finishIfNecessary();
+        return isGuessed;
+    }
+
+    public String getCharset() {
+        finishIfNecessary();
+        if (!isPossibleTextMimeType || (isGuessed && isFaulty)) {
+            return null;
+        }
+        return charset;
+    }
+
+    public String getMaybeFaultyCharset() {
+        return charset;
+    }
+
+    /** Returns true if the data which was read is definitely binary.
+     *
+     * This can happen when either the supplied mimeType indicated a non-ambiguous
+     * binary data type, or if we guessed a charset but got errors while decoding.
+     */
+    public boolean isDefinitelyBinary() {
+        finishIfNecessary();
+        return !isTextMimeType && (!isPossibleTextMimeType || (isGuessed && isFaulty));
+    }
+
+    /** Returns true iff the data which was read is probably (or
+     * definitely) text.
+     *
+     * The corner case where isDefinitelyBinary returns false but isProbablyText
+     * returns true is where the charset was provided by the data (so is not
+     * guessed) but is still faulty.
+     */
+    public boolean isProbablyText() {
+        finishIfNecessary();
+        return isTextMimeType || isPossibleTextMimeType && (!isGuessed || !isFaulty);
+    }
+}
author	Vincent <valodim@mugenguild.com>	2016-02-23 16:01:13 +0100
committer	Vincent <valodim@mugenguild.com>	2016-02-23 16:01:13 +0100
commit	fd24acbf0e54be2be222ca107d97c63f308a1d4a (patch)
tree	df87be1e599f5beb146f31e84deb74a815bdab4c /OpenKeychain/src/main/java/org/sufficientlysecure/keychain/util
parent	c1d268a5ecbb49a56561bd66c9b2967e282b9071 (diff)
parent	03e695c6509ee6ada0ad6a0a21181277ba298c34 (diff)
download	open-keychain-fd24acbf0e54be2be222ca107d97c63f308a1d4a.tar.gz open-keychain-fd24acbf0e54be2be222ca107d97c63f308a1d4a.tar.bz2 open-keychain-fd24acbf0e54be2be222ca107d97c63f308a1d4a.zip