diff options
author | Vincent <valodim@mugenguild.com> | 2016-02-23 16:01:13 +0100 |
---|---|---|
committer | Vincent <valodim@mugenguild.com> | 2016-02-23 16:01:13 +0100 |
commit | fd24acbf0e54be2be222ca107d97c63f308a1d4a (patch) | |
tree | df87be1e599f5beb146f31e84deb74a815bdab4c /OpenKeychain/src/main/java/org/sufficientlysecure/keychain/util | |
parent | c1d268a5ecbb49a56561bd66c9b2967e282b9071 (diff) | |
parent | 03e695c6509ee6ada0ad6a0a21181277ba298c34 (diff) | |
download | open-keychain-fd24acbf0e54be2be222ca107d97c63f308a1d4a.tar.gz open-keychain-fd24acbf0e54be2be222ca107d97c63f308a1d4a.tar.bz2 open-keychain-fd24acbf0e54be2be222ca107d97c63f308a1d4a.zip |
Merge pull request #1727 from open-keychain/mime-fallback
try to guess if data is text or not
Diffstat (limited to 'OpenKeychain/src/main/java/org/sufficientlysecure/keychain/util')
-rw-r--r-- | OpenKeychain/src/main/java/org/sufficientlysecure/keychain/util/CharsetVerifier.java | 154 |
1 files changed, 154 insertions, 0 deletions
diff --git a/OpenKeychain/src/main/java/org/sufficientlysecure/keychain/util/CharsetVerifier.java b/OpenKeychain/src/main/java/org/sufficientlysecure/keychain/util/CharsetVerifier.java new file mode 100644 index 000000000..c03decc89 --- /dev/null +++ b/OpenKeychain/src/main/java/org/sufficientlysecure/keychain/util/CharsetVerifier.java @@ -0,0 +1,154 @@ +package org.sufficientlysecure.keychain.util; + + +import java.nio.ByteBuffer; +import java.nio.CharBuffer; +import java.nio.charset.Charset; +import java.nio.charset.CharsetDecoder; +import java.nio.charset.CoderResult; +import java.nio.charset.CodingErrorAction; + +import android.content.ClipDescription; +import android.support.annotation.NonNull; +import android.support.annotation.Nullable; + +/** This class can be used to guess whether a stream of data is encoded in a given + * charset or not. + * + * An object of this class must be initialized with a byte[] buffer, which should + * be filled with data, then processed with {@link #readBytesFromBuffer}. This can + * be done any number of times. Once all data has been read, a final status can be + * read using the getter methods. + */ +public class CharsetVerifier { + + private final ByteBuffer bufWrap; + private final CharBuffer dummyOutput; + + private final CharsetDecoder charsetDecoder; + + private boolean isFinished; + private boolean isFaulty; + private boolean isGuessed; + private boolean isPossibleTextMimeType; + private boolean isTextMimeType; + private String charset; + private String mimeType; + + public CharsetVerifier(@NonNull byte[] buf, @NonNull String mimeType, @Nullable String charset) { + + this.mimeType = mimeType; + isTextMimeType = ClipDescription.compareMimeTypes(mimeType, "text/*"); + isPossibleTextMimeType = isTextMimeType + || ClipDescription.compareMimeTypes(mimeType, "application/octet-stream") + || ClipDescription.compareMimeTypes(mimeType, "application/x-download"); + if (!isPossibleTextMimeType) { + charsetDecoder = null; + bufWrap = null; + dummyOutput = null; + return; + } + + bufWrap = ByteBuffer.wrap(buf); + dummyOutput = CharBuffer.allocate(buf.length); + + // the charset defaults to us-ascii, but we want to default to utf-8 + if (charset == null || "us-ascii".equals(charset)) { + charset = "utf-8"; + isGuessed = true; + } else { + isGuessed = false; + } + this.charset = charset; + + charsetDecoder = Charset.forName(charset).newDecoder(); + charsetDecoder.onMalformedInput(CodingErrorAction.REPORT); + charsetDecoder.onUnmappableCharacter(CodingErrorAction.REPORT); + charsetDecoder.reset(); + } + + public void readBytesFromBuffer(int pos, int len) { + if (isFinished) { + throw new IllegalStateException("cannot write again after reading charset status!"); + } + if (isFaulty || bufWrap == null) { + return; + } + bufWrap.rewind(); + bufWrap.position(pos); + bufWrap.limit(len); + dummyOutput.rewind(); + CoderResult result = charsetDecoder.decode(bufWrap, dummyOutput, false); + if (result.isError()) { + isFaulty = true; + } + } + + private void finishIfNecessary() { + if (isFinished || isFaulty || bufWrap == null) { + return; + } + isFinished = true; + bufWrap.rewind(); + bufWrap.limit(0); + dummyOutput.rewind(); + CoderResult result = charsetDecoder.decode(bufWrap, dummyOutput, true); + if (result.isError()) { + isFaulty = true; + } + } + + public String getGuessedMimeType() { + if (isTextMimeType) { + return mimeType; + } + if (isProbablyText()) { + return "text/plain"; + } + return mimeType; + } + + public boolean isCharsetFaulty() { + finishIfNecessary(); + return isFaulty; + } + + public boolean isCharsetGuessed() { + finishIfNecessary(); + return isGuessed; + } + + public String getCharset() { + finishIfNecessary(); + if (!isPossibleTextMimeType || (isGuessed && isFaulty)) { + return null; + } + return charset; + } + + public String getMaybeFaultyCharset() { + return charset; + } + + /** Returns true if the data which was read is definitely binary. + * + * This can happen when either the supplied mimeType indicated a non-ambiguous + * binary data type, or if we guessed a charset but got errors while decoding. + */ + public boolean isDefinitelyBinary() { + finishIfNecessary(); + return !isTextMimeType && (!isPossibleTextMimeType || (isGuessed && isFaulty)); + } + + /** Returns true iff the data which was read is probably (or + * definitely) text. + * + * The corner case where isDefinitelyBinary returns false but isProbablyText + * returns true is where the charset was provided by the data (so is not + * guessed) but is still faulty. + */ + public boolean isProbablyText() { + finishIfNecessary(); + return isTextMimeType || isPossibleTextMimeType && (!isGuessed || !isFaulty); + } +} |