summaryrefslogtreecommitdiffstats
path: root/src/misc/bzlib
diff options
context:
space:
mode:
authorAlan Mishchenko <alanmi@berkeley.edu>2008-07-01 08:01:00 -0700
committerAlan Mishchenko <alanmi@berkeley.edu>2008-07-01 08:01:00 -0700
commit4a9789e58d27ecaba541ba3fcb0565a334dcd54b (patch)
tree1784dcf05dd78b0acddb7d52764f1e3fd6ef2d49 /src/misc/bzlib
parentd0341836ddb38ccc087bdac3df4e8b2ff7fe7a8f (diff)
downloadabc-4a9789e58d27ecaba541ba3fcb0565a334dcd54b.tar.gz
abc-4a9789e58d27ecaba541ba3fcb0565a334dcd54b.tar.bz2
abc-4a9789e58d27ecaba541ba3fcb0565a334dcd54b.zip
Version abc80701
Diffstat (limited to 'src/misc/bzlib')
-rw-r--r--src/misc/bzlib/CHANGES319
-rw-r--r--src/misc/bzlib/LICENSE42
-rw-r--r--src/misc/bzlib/blocksort.c1094
-rw-r--r--src/misc/bzlib/bzip2.txt391
-rw-r--r--src/misc/bzlib/bzlib.c1571
-rw-r--r--src/misc/bzlib/bzlib.h286
-rw-r--r--src/misc/bzlib/bzlib_private.h509
-rw-r--r--src/misc/bzlib/compress.c672
-rw-r--r--src/misc/bzlib/crctable.c104
-rw-r--r--src/misc/bzlib/decompress.c626
-rw-r--r--src/misc/bzlib/huffman.c205
-rw-r--r--src/misc/bzlib/link.txt2
-rw-r--r--src/misc/bzlib/manual.html2540
-rw-r--r--src/misc/bzlib/manual.pdfbin0 -> 288134 bytes
-rw-r--r--src/misc/bzlib/randtable.c84
15 files changed, 8445 insertions, 0 deletions
diff --git a/src/misc/bzlib/CHANGES b/src/misc/bzlib/CHANGES
new file mode 100644
index 00000000..6e4f65e2
--- /dev/null
+++ b/src/misc/bzlib/CHANGES
@@ -0,0 +1,319 @@
+ ------------------------------------------------------------------
+ This file is part of bzip2/libbzip2, a program and library for
+ lossless, block-sorting data compression.
+
+ bzip2/libbzip2 version 1.0.5 of 10 December 2007
+ Copyright (C) 1996-2007 Julian Seward <jseward@bzip.org>
+
+ Please read the WARNING, DISCLAIMER and PATENTS sections in the
+ README file.
+
+ This program is released under the terms of the license contained
+ in the file LICENSE.
+ ------------------------------------------------------------------
+
+
+0.9.0
+~~~~~
+First version.
+
+
+0.9.0a
+~~~~~~
+Removed 'ranlib' from Makefile, since most modern Unix-es
+don't need it, or even know about it.
+
+
+0.9.0b
+~~~~~~
+Fixed a problem with error reporting in bzip2.c. This does not effect
+the library in any way. Problem is: versions 0.9.0 and 0.9.0a (of the
+program proper) compress and decompress correctly, but give misleading
+error messages (internal panics) when an I/O error occurs, instead of
+reporting the problem correctly. This shouldn't give any data loss
+(as far as I can see), but is confusing.
+
+Made the inline declarations disappear for non-GCC compilers.
+
+
+0.9.0c
+~~~~~~
+Fixed some problems in the library pertaining to some boundary cases.
+This makes the library behave more correctly in those situations. The
+fixes apply only to features (calls and parameters) not used by
+bzip2.c, so the non-fixedness of them in previous versions has no
+effect on reliability of bzip2.c.
+
+In bzlib.c:
+ * made zero-length BZ_FLUSH work correctly in bzCompress().
+ * fixed bzWrite/bzRead to ignore zero-length requests.
+ * fixed bzread to correctly handle read requests after EOF.
+ * wrong parameter order in call to bzDecompressInit in
+ bzBuffToBuffDecompress. Fixed.
+
+In compress.c:
+ * changed setting of nGroups in sendMTFValues() so as to
+ do a bit better on small files. This _does_ effect
+ bzip2.c.
+
+
+0.9.5a
+~~~~~~
+Major change: add a fallback sorting algorithm (blocksort.c)
+to give reasonable behaviour even for very repetitive inputs.
+Nuked --repetitive-best and --repetitive-fast since they are
+no longer useful.
+
+Minor changes: mostly a whole bunch of small changes/
+bugfixes in the driver (bzip2.c). Changes pertaining to the
+user interface are:
+
+ allow decompression of symlink'd files to stdout
+ decompress/test files even without .bz2 extension
+ give more accurate error messages for I/O errors
+ when compressing/decompressing to stdout, don't catch control-C
+ read flags from BZIP2 and BZIP environment variables
+ decline to break hard links to a file unless forced with -f
+ allow -c flag even with no filenames
+ preserve file ownerships as far as possible
+ make -s -1 give the expected block size (100k)
+ add a flag -q --quiet to suppress nonessential warnings
+ stop decoding flags after --, so files beginning in - can be handled
+ resolved inconsistent naming: bzcat or bz2cat ?
+ bzip2 --help now returns 0
+
+Programming-level changes are:
+
+ fixed syntax error in GET_LL4 for Borland C++ 5.02
+ let bzBuffToBuffDecompress return BZ_DATA_ERROR{_MAGIC}
+ fix overshoot of mode-string end in bzopen_or_bzdopen
+ wrapped bzlib.h in #ifdef __cplusplus ... extern "C" { ... }
+ close file handles under all error conditions
+ added minor mods so it compiles with DJGPP out of the box
+ fixed Makefile so it doesn't give problems with BSD make
+ fix uninitialised memory reads in dlltest.c
+
+0.9.5b
+~~~~~~
+Open stdin/stdout in binary mode for DJGPP.
+
+0.9.5c
+~~~~~~
+Changed BZ_N_OVERSHOOT to be ... + 2 instead of ... + 1. The + 1
+version could cause the sorted order to be wrong in some extremely
+obscure cases. Also changed setting of quadrant in blocksort.c.
+
+0.9.5d
+~~~~~~
+The only functional change is to make bzlibVersion() in the library
+return the correct string. This has no effect whatsoever on the
+functioning of the bzip2 program or library. Added a couple of casts
+so the library compiles without warnings at level 3 in MS Visual
+Studio 6.0. Included a Y2K statement in the file Y2K_INFO. All other
+changes are minor documentation changes.
+
+1.0
+~~~
+Several minor bugfixes and enhancements:
+
+* Large file support. The library uses 64-bit counters to
+ count the volume of data passing through it. bzip2.c
+ is now compiled with -D_FILE_OFFSET_BITS=64 to get large
+ file support from the C library. -v correctly prints out
+ file sizes greater than 4 gigabytes. All these changes have
+ been made without assuming a 64-bit platform or a C compiler
+ which supports 64-bit ints, so, except for the C library
+ aspect, they are fully portable.
+
+* Decompression robustness. The library/program should be
+ robust to any corruption of compressed data, detecting and
+ handling _all_ corruption, instead of merely relying on
+ the CRCs. What this means is that the program should
+ never crash, given corrupted data, and the library should
+ always return BZ_DATA_ERROR.
+
+* Fixed an obscure race-condition bug only ever observed on
+ Solaris, in which, if you were very unlucky and issued
+ control-C at exactly the wrong time, both input and output
+ files would be deleted.
+
+* Don't run out of file handles on test/decompression when
+ large numbers of files have invalid magic numbers.
+
+* Avoid library namespace pollution. Prefix all exported
+ symbols with BZ2_.
+
+* Minor sorting enhancements from my DCC2000 paper.
+
+* Advance the version number to 1.0, so as to counteract the
+ (false-in-this-case) impression some people have that programs
+ with version numbers less than 1.0 are in some way, experimental,
+ pre-release versions.
+
+* Create an initial Makefile-libbz2_so to build a shared library.
+ Yes, I know I should really use libtool et al ...
+
+* Make the program exit with 2 instead of 0 when decompression
+ fails due to a bad magic number (ie, an invalid bzip2 header).
+ Also exit with 1 (as the manual claims :-) whenever a diagnostic
+ message would have been printed AND the corresponding operation
+ is aborted, for example
+ bzip2: Output file xx already exists.
+ When a diagnostic message is printed but the operation is not
+ aborted, for example
+ bzip2: Can't guess original name for wurble -- using wurble.out
+ then the exit value 0 is returned, unless some other problem is
+ also detected.
+
+ I think it corresponds more closely to what the manual claims now.
+
+
+1.0.1
+~~~~~
+* Modified dlltest.c so it uses the new BZ2_ naming scheme.
+* Modified makefile-msc to fix minor build probs on Win2k.
+* Updated README.COMPILATION.PROBLEMS.
+
+There are no functionality changes or bug fixes relative to version
+1.0.0. This is just a documentation update + a fix for minor Win32
+build problems. For almost everyone, upgrading from 1.0.0 to 1.0.1 is
+utterly pointless. Don't bother.
+
+
+1.0.2
+~~~~~
+A bug fix release, addressing various minor issues which have appeared
+in the 18 or so months since 1.0.1 was released. Most of the fixes
+are to do with file-handling or documentation bugs. To the best of my
+knowledge, there have been no data-loss-causing bugs reported in the
+compression/decompression engine of 1.0.0 or 1.0.1.
+
+Note that this release does not improve the rather crude build system
+for Unix platforms. The general plan here is to autoconfiscate/
+libtoolise 1.0.2 soon after release, and release the result as 1.1.0
+or perhaps 1.2.0. That, however, is still just a plan at this point.
+
+Here are the changes in 1.0.2. Bug-reporters and/or patch-senders in
+parentheses.
+
+* Fix an infinite segfault loop in 1.0.1 when a directory is
+ encountered in -f (force) mode.
+ (Trond Eivind Glomsrod, Nicholas Nethercote, Volker Schmidt)
+
+* Avoid double fclose() of output file on certain I/O error paths.
+ (Solar Designer)
+
+* Don't fail with internal error 1007 when fed a long stream (> 48MB)
+ of byte 251. Also print useful message suggesting that 1007s may be
+ caused by bad memory.
+ (noticed by Juan Pedro Vallejo, fixed by me)
+
+* Fix uninitialised variable silly bug in demo prog dlltest.c.
+ (Jorj Bauer)
+
+* Remove 512-MB limitation on recovered file size for bzip2recover
+ on selected platforms which support 64-bit ints. At the moment
+ all GCC supported platforms, and Win32.
+ (me, Alson van der Meulen)
+
+* Hard-code header byte values, to give correct operation on platforms
+ using EBCDIC as their native character set (IBM's OS/390).
+ (Leland Lucius)
+
+* Copy file access times correctly.
+ (Marty Leisner)
+
+* Add distclean and check targets to Makefile.
+ (Michael Carmack)
+
+* Parameterise use of ar and ranlib in Makefile. Also add $(LDFLAGS).
+ (Rich Ireland, Bo Thorsen)
+
+* Pass -p (create parent dirs as needed) to mkdir during make install.
+ (Jeremy Fusco)
+
+* Dereference symlinks when copying file permissions in -f mode.
+ (Volker Schmidt)
+
+* Majorly simplify implementation of uInt64_qrm10.
+ (Bo Lindbergh)
+
+* Check the input file still exists before deleting the output one,
+ when aborting in cleanUpAndFail().
+ (Joerg Prante, Robert Linden, Matthias Krings)
+
+Also a bunch of patches courtesy of Philippe Troin, the Debian maintainer
+of bzip2:
+
+* Wrapper scripts (with manpages): bzdiff, bzgrep, bzmore.
+
+* Spelling changes and minor enhancements in bzip2.1.
+
+* Avoid race condition between creating the output file and setting its
+ interim permissions safely, by using fopen_output_safely().
+ No changes to bzip2recover since there is no issue with file
+ permissions there.
+
+* do not print senseless report with -v when compressing an empty
+ file.
+
+* bzcat -f works on non-bzip2 files.
+
+* do not try to escape shell meta-characters on unix (the shell takes
+ care of these).
+
+* added --fast and --best aliases for -1 -9 for gzip compatibility.
+
+
+1.0.3 (15 Feb 05)
+~~~~~~~~~~~~~~~~~
+Fixes some minor bugs since the last version, 1.0.2.
+
+* Further robustification against corrupted compressed data.
+ There are currently no known bitstreams which can cause the
+ decompressor to crash, loop or access memory which does not
+ belong to it. If you are using bzip2 or the library to
+ decompress bitstreams from untrusted sources, an upgrade
+ to 1.0.3 is recommended. This fixes CAN-2005-1260.
+
+* The documentation has been converted to XML, from which html
+ and pdf can be derived.
+
+* Various minor bugs in the documentation have been fixed.
+
+* Fixes for various compilation warnings with newer versions of
+ gcc, and on 64-bit platforms.
+
+* The BZ_NO_STDIO cpp symbol was not properly observed in 1.0.2.
+ This has been fixed.
+
+
+1.0.4 (20 Dec 06)
+~~~~~~~~~~~~~~~~~
+Fixes some minor bugs since the last version, 1.0.3.
+
+* Fix file permissions race problem (CAN-2005-0953).
+
+* Avoid possible segfault in BZ2_bzclose. From Coverity's NetBSD
+ scan.
+
+* 'const'/prototype cleanups in the C code.
+
+* Change default install location to /usr/local, and handle multiple
+ 'make install's without error.
+
+* Sanitise file names more carefully in bzgrep. Fixes CAN-2005-0758
+ to the extent that applies to bzgrep.
+
+* Use 'mktemp' rather than 'tempfile' in bzdiff.
+
+* Tighten up a couple of assertions in blocksort.c following automated
+ analysis.
+
+* Fix minor doc/comment bugs.
+
+
+1.0.5 (10 Dec 07)
+~~~~~~~~~~~~~~~~~
+Security fix only. Fixes CERT-FI 20469 as it applies to bzip2.
+
diff --git a/src/misc/bzlib/LICENSE b/src/misc/bzlib/LICENSE
new file mode 100644
index 00000000..f420cffb
--- /dev/null
+++ b/src/misc/bzlib/LICENSE
@@ -0,0 +1,42 @@
+
+--------------------------------------------------------------------------
+
+This program, "bzip2", the associated library "libbzip2", and all
+documentation, are copyright (C) 1996-2007 Julian R Seward. All
+rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+2. The origin of this software must not be misrepresented; you must
+ not claim that you wrote the original software. If you use this
+ software in a product, an acknowledgment in the product
+ documentation would be appreciated but is not required.
+
+3. Altered source versions must be plainly marked as such, and must
+ not be misrepresented as being the original software.
+
+4. The name of the author may not be used to endorse or promote
+ products derived from this software without specific prior written
+ permission.
+
+THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
+OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
+GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+Julian Seward, jseward@bzip.org
+bzip2/libbzip2 version 1.0.5 of 10 December 2007
+
+--------------------------------------------------------------------------
diff --git a/src/misc/bzlib/blocksort.c b/src/misc/bzlib/blocksort.c
new file mode 100644
index 00000000..95adb5ef
--- /dev/null
+++ b/src/misc/bzlib/blocksort.c
@@ -0,0 +1,1094 @@
+
+/*-------------------------------------------------------------*/
+/*--- Block sorting machinery ---*/
+/*--- blocksort.c ---*/
+/*-------------------------------------------------------------*/
+
+/* ------------------------------------------------------------------
+ This file is part of bzip2/libbzip2, a program and library for
+ lossless, block-sorting data compression.
+
+ bzip2/libbzip2 version 1.0.5 of 10 December 2007
+ Copyright (C) 1996-2007 Julian Seward <jseward@bzip.org>
+
+ Please read the WARNING, DISCLAIMER and PATENTS sections in the
+ README file.
+
+ This program is released under the terms of the license contained
+ in the file LICENSE.
+ ------------------------------------------------------------------ */
+
+
+#include "bzlib_private.h"
+
+/*---------------------------------------------*/
+/*--- Fallback O(N log(N)^2) sorting ---*/
+/*--- algorithm, for repetitive blocks ---*/
+/*---------------------------------------------*/
+
+/*---------------------------------------------*/
+static
+__inline__
+void fallbackSimpleSort ( UInt32* fmap,
+ UInt32* eclass,
+ Int32 lo,
+ Int32 hi )
+{
+ Int32 i, j, tmp;
+ UInt32 ec_tmp;
+
+ if (lo == hi) return;
+
+ if (hi - lo > 3) {
+ for ( i = hi-4; i >= lo; i-- ) {
+ tmp = fmap[i];
+ ec_tmp = eclass[tmp];
+ for ( j = i+4; j <= hi && ec_tmp > eclass[fmap[j]]; j += 4 )
+ fmap[j-4] = fmap[j];
+ fmap[j-4] = tmp;
+ }
+ }
+
+ for ( i = hi-1; i >= lo; i-- ) {
+ tmp = fmap[i];
+ ec_tmp = eclass[tmp];
+ for ( j = i+1; j <= hi && ec_tmp > eclass[fmap[j]]; j++ )
+ fmap[j-1] = fmap[j];
+ fmap[j-1] = tmp;
+ }
+}
+
+
+/*---------------------------------------------*/
+#define fswap(zz1, zz2) \
+ { Int32 zztmp = zz1; zz1 = zz2; zz2 = zztmp; }
+
+#define fvswap(zzp1, zzp2, zzn) \
+{ \
+ Int32 yyp1 = (zzp1); \
+ Int32 yyp2 = (zzp2); \
+ Int32 yyn = (zzn); \
+ while (yyn > 0) { \
+ fswap(fmap[yyp1], fmap[yyp2]); \
+ yyp1++; yyp2++; yyn--; \
+ } \
+}
+
+
+#define fmin(a,b) ((a) < (b)) ? (a) : (b)
+
+#define fpush(lz,hz) { stackLo[sp] = lz; \
+ stackHi[sp] = hz; \
+ sp++; }
+
+#define fpop(lz,hz) { sp--; \
+ lz = stackLo[sp]; \
+ hz = stackHi[sp]; }
+
+#define FALLBACK_QSORT_SMALL_THRESH 10
+#define FALLBACK_QSORT_STACK_SIZE 100
+
+
+static
+void fallbackQSort3 ( UInt32* fmap,
+ UInt32* eclass,
+ Int32 loSt,
+ Int32 hiSt )
+{
+ Int32 unLo, unHi, ltLo, gtHi, n, m;
+ Int32 sp, lo, hi;
+ UInt32 med, r, r3;
+ Int32 stackLo[FALLBACK_QSORT_STACK_SIZE];
+ Int32 stackHi[FALLBACK_QSORT_STACK_SIZE];
+
+ r = 0;
+
+ sp = 0;
+ fpush ( loSt, hiSt );
+
+ while (sp > 0) {
+
+ AssertH ( sp < FALLBACK_QSORT_STACK_SIZE - 1, 1004 );
+
+ fpop ( lo, hi );
+ if (hi - lo < FALLBACK_QSORT_SMALL_THRESH) {
+ fallbackSimpleSort ( fmap, eclass, lo, hi );
+ continue;
+ }
+
+ /* Random partitioning. Median of 3 sometimes fails to
+ avoid bad cases. Median of 9 seems to help but
+ looks rather expensive. This too seems to work but
+ is cheaper. Guidance for the magic constants
+ 7621 and 32768 is taken from Sedgewick's algorithms
+ book, chapter 35.
+ */
+ r = ((r * 7621) + 1) % 32768;
+ r3 = r % 3;
+ if (r3 == 0) med = eclass[fmap[lo]]; else
+ if (r3 == 1) med = eclass[fmap[(lo+hi)>>1]]; else
+ med = eclass[fmap[hi]];
+
+ unLo = ltLo = lo;
+ unHi = gtHi = hi;
+
+ while (1) {
+ while (1) {
+ if (unLo > unHi) break;
+ n = (Int32)eclass[fmap[unLo]] - (Int32)med;
+ if (n == 0) {
+ fswap(fmap[unLo], fmap[ltLo]);
+ ltLo++; unLo++;
+ continue;
+ };
+ if (n > 0) break;
+ unLo++;
+ }
+ while (1) {
+ if (unLo > unHi) break;
+ n = (Int32)eclass[fmap[unHi]] - (Int32)med;
+ if (n == 0) {
+ fswap(fmap[unHi], fmap[gtHi]);
+ gtHi--; unHi--;
+ continue;
+ };
+ if (n < 0) break;
+ unHi--;
+ }
+ if (unLo > unHi) break;
+ fswap(fmap[unLo], fmap[unHi]); unLo++; unHi--;
+ }
+
+ AssertD ( unHi == unLo-1, "fallbackQSort3(2)" );
+
+ if (gtHi < ltLo) continue;
+
+ n = fmin(ltLo-lo, unLo-ltLo); fvswap(lo, unLo-n, n);
+ m = fmin(hi-gtHi, gtHi-unHi); fvswap(unLo, hi-m+1, m);
+
+ n = lo + unLo - ltLo - 1;
+ m = hi - (gtHi - unHi) + 1;
+
+ if (n - lo > hi - m) {
+ fpush ( lo, n );
+ fpush ( m, hi );
+ } else {
+ fpush ( m, hi );
+ fpush ( lo, n );
+ }
+ }
+}
+
+#undef fmin
+#undef fpush
+#undef fpop
+#undef fswap
+#undef fvswap
+#undef FALLBACK_QSORT_SMALL_THRESH
+#undef FALLBACK_QSORT_STACK_SIZE
+
+
+/*---------------------------------------------*/
+/* Pre:
+ nblock > 0
+ eclass exists for [0 .. nblock-1]
+ ((UChar*)eclass) [0 .. nblock-1] holds block
+ ptr exists for [0 .. nblock-1]
+
+ Post:
+ ((UChar*)eclass) [0 .. nblock-1] holds block
+ All other areas of eclass destroyed
+ fmap [0 .. nblock-1] holds sorted order
+ bhtab [ 0 .. 2+(nblock/32) ] destroyed
+*/
+
+#define SET_BH(zz) bhtab[(zz) >> 5] |= (1 << ((zz) & 31))
+#define CLEAR_BH(zz) bhtab[(zz) >> 5] &= ~(1 << ((zz) & 31))
+#define ISSET_BH(zz) (bhtab[(zz) >> 5] & (1 << ((zz) & 31)))
+#define WORD_BH(zz) bhtab[(zz) >> 5]
+#define UNALIGNED_BH(zz) ((zz) & 0x01f)
+
+static
+void fallbackSort ( UInt32* fmap,
+ UInt32* eclass,
+ UInt32* bhtab,
+ Int32 nblock,
+ Int32 verb )
+{
+ Int32 ftab[257];
+ Int32 ftabCopy[256];
+ Int32 H, i, j, k, l, r, cc, cc1;
+ Int32 nNotDone;
+ Int32 nBhtab;
+ UChar* eclass8 = (UChar*)eclass;
+
+ /*--
+ Initial 1-char radix sort to generate
+ initial fmap and initial BH bits.
+ --*/
+ if (verb >= 4)
+ VPrintf0 ( " bucket sorting ...\n" );
+ for (i = 0; i < 257; i++) ftab[i] = 0;
+ for (i = 0; i < nblock; i++) ftab[eclass8[i]]++;
+ for (i = 0; i < 256; i++) ftabCopy[i] = ftab[i];
+ for (i = 1; i < 257; i++) ftab[i] += ftab[i-1];
+
+ for (i = 0; i < nblock; i++) {
+ j = eclass8[i];
+ k = ftab[j] - 1;
+ ftab[j] = k;
+ fmap[k] = i;
+ }
+
+ nBhtab = 2 + (nblock / 32);
+ for (i = 0; i < nBhtab; i++) bhtab[i] = 0;
+ for (i = 0; i < 256; i++) SET_BH(ftab[i]);
+
+ /*--
+ Inductively refine the buckets. Kind-of an
+ "exponential radix sort" (!), inspired by the
+ Manber-Myers suffix array construction algorithm.
+ --*/
+
+ /*-- set sentinel bits for block-end detection --*/
+ for (i = 0; i < 32; i++) {
+ SET_BH(nblock + 2*i);
+ CLEAR_BH(nblock + 2*i + 1);
+ }
+
+ /*-- the log(N) loop --*/
+ H = 1;
+ while (1) {
+
+ if (verb >= 4)
+ VPrintf1 ( " depth %6d has ", H );
+
+ j = 0;
+ for (i = 0; i < nblock; i++) {
+ if (ISSET_BH(i)) j = i;
+ k = fmap[i] - H; if (k < 0) k += nblock;
+ eclass[k] = j;
+ }
+
+ nNotDone = 0;
+ r = -1;
+ while (1) {
+
+ /*-- find the next non-singleton bucket --*/
+ k = r + 1;
+ while (ISSET_BH(k) && UNALIGNED_BH(k)) k++;
+ if (ISSET_BH(k)) {
+ while (WORD_BH(k) == 0xffffffff) k += 32;
+ while (ISSET_BH(k)) k++;
+ }
+ l = k - 1;
+ if (l >= nblock) break;
+ while (!ISSET_BH(k) && UNALIGNED_BH(k)) k++;
+ if (!ISSET_BH(k)) {
+ while (WORD_BH(k) == 0x00000000) k += 32;
+ while (!ISSET_BH(k)) k++;
+ }
+ r = k - 1;
+ if (r >= nblock) break;
+
+ /*-- now [l, r] bracket current bucket --*/
+ if (r > l) {
+ nNotDone += (r - l + 1);
+ fallbackQSort3 ( fmap, eclass, l, r );
+
+ /*-- scan bucket and generate header bits-- */
+ cc = -1;
+ for (i = l; i <= r; i++) {
+ cc1 = eclass[fmap[i]];
+ if (cc != cc1) { SET_BH(i); cc = cc1; };
+ }
+ }
+ }
+
+ if (verb >= 4)
+ VPrintf1 ( "%6d unresolved strings\n", nNotDone );
+
+ H *= 2;
+ if (H > nblock || nNotDone == 0) break;
+ }
+
+ /*--
+ Reconstruct the original block in
+ eclass8 [0 .. nblock-1], since the
+ previous phase destroyed it.
+ --*/
+ if (verb >= 4)
+ VPrintf0 ( " reconstructing block ...\n" );
+ j = 0;
+ for (i = 0; i < nblock; i++) {
+ while (ftabCopy[j] == 0) j++;
+ ftabCopy[j]--;
+ eclass8[fmap[i]] = (UChar)j;
+ }
+ AssertH ( j < 256, 1005 );
+}
+
+#undef SET_BH
+#undef CLEAR_BH
+#undef ISSET_BH
+#undef WORD_BH
+#undef UNALIGNED_BH
+
+
+/*---------------------------------------------*/
+/*--- The main, O(N^2 log(N)) sorting ---*/
+/*--- algorithm. Faster for "normal" ---*/
+/*--- non-repetitive blocks. ---*/
+/*---------------------------------------------*/
+
+/*---------------------------------------------*/
+static
+__inline__
+Bool mainGtU ( UInt32 i1,
+ UInt32 i2,
+ UChar* block,
+ UInt16* quadrant,
+ UInt32 nblock,
+ Int32* budget )
+{
+ Int32 k;
+ UChar c1, c2;
+ UInt16 s1, s2;
+
+ AssertD ( i1 != i2, "mainGtU" );
+ /* 1 */
+ c1 = block[i1]; c2 = block[i2];
+ if (c1 != c2) return (c1 > c2);
+ i1++; i2++;
+ /* 2 */
+ c1 = block[i1]; c2 = block[i2];
+ if (c1 != c2) return (c1 > c2);
+ i1++; i2++;
+ /* 3 */
+ c1 = block[i1]; c2 = block[i2];
+ if (c1 != c2) return (c1 > c2);
+ i1++; i2++;
+ /* 4 */
+ c1 = block[i1]; c2 = block[i2];
+ if (c1 != c2) return (c1 > c2);
+ i1++; i2++;
+ /* 5 */
+ c1 = block[i1]; c2 = block[i2];
+ if (c1 != c2) return (c1 > c2);
+ i1++; i2++;
+ /* 6 */
+ c1 = block[i1]; c2 = block[i2];
+ if (c1 != c2) return (c1 > c2);
+ i1++; i2++;
+ /* 7 */
+ c1 = block[i1]; c2 = block[i2];
+ if (c1 != c2) return (c1 > c2);
+ i1++; i2++;
+ /* 8 */
+ c1 = block[i1]; c2 = block[i2];
+ if (c1 != c2) return (c1 > c2);
+ i1++; i2++;
+ /* 9 */
+ c1 = block[i1]; c2 = block[i2];
+ if (c1 != c2) return (c1 > c2);
+ i1++; i2++;
+ /* 10 */
+ c1 = block[i1]; c2 = block[i2];
+ if (c1 != c2) return (c1 > c2);
+ i1++; i2++;
+ /* 11 */
+ c1 = block[i1]; c2 = block[i2];
+ if (c1 != c2) return (c1 > c2);
+ i1++; i2++;
+ /* 12 */
+ c1 = block[i1]; c2 = block[i2];
+ if (c1 != c2) return (c1 > c2);
+ i1++; i2++;
+
+ k = nblock + 8;
+
+ do {
+ /* 1 */
+ c1 = block[i1]; c2 = block[i2];
+ if (c1 != c2) return (c1 > c2);
+ s1 = quadrant[i1]; s2 = quadrant[i2];
+ if (s1 != s2) return (s1 > s2);
+ i1++; i2++;
+ /* 2 */
+ c1 = block[i1]; c2 = block[i2];
+ if (c1 != c2) return (c1 > c2);
+ s1 = quadrant[i1]; s2 = quadrant[i2];
+ if (s1 != s2) return (s1 > s2);
+ i1++; i2++;
+ /* 3 */
+ c1 = block[i1]; c2 = block[i2];
+ if (c1 != c2) return (c1 > c2);
+ s1 = quadrant[i1]; s2 = quadrant[i2];
+ if (s1 != s2) return (s1 > s2);
+ i1++; i2++;
+ /* 4 */
+ c1 = block[i1]; c2 = block[i2];
+ if (c1 != c2) return (c1 > c2);
+ s1 = quadrant[i1]; s2 = quadrant[i2];
+ if (s1 != s2) return (s1 > s2);
+ i1++; i2++;
+ /* 5 */
+ c1 = block[i1]; c2 = block[i2];
+ if (c1 != c2) return (c1 > c2);
+ s1 = quadrant[i1]; s2 = quadrant[i2];
+ if (s1 != s2) return (s1 > s2);
+ i1++; i2++;
+ /* 6 */
+ c1 = block[i1]; c2 = block[i2];
+ if (c1 != c2) return (c1 > c2);
+ s1 = quadrant[i1]; s2 = quadrant[i2];
+ if (s1 != s2) return (s1 > s2);
+ i1++; i2++;
+ /* 7 */
+ c1 = block[i1]; c2 = block[i2];
+ if (c1 != c2) return (c1 > c2);
+ s1 = quadrant[i1]; s2 = quadrant[i2];
+ if (s1 != s2) return (s1 > s2);
+ i1++; i2++;
+ /* 8 */
+ c1 = block[i1]; c2 = block[i2];
+ if (c1 != c2) return (c1 > c2);
+ s1 = quadrant[i1]; s2 = quadrant[i2];
+ if (s1 != s2) return (s1 > s2);
+ i1++; i2++;
+
+ if (i1 >= nblock) i1 -= nblock;
+ if (i2 >= nblock) i2 -= nblock;
+
+ k -= 8;
+ (*budget)--;
+ }
+ while (k >= 0);
+
+ return False;
+}
+
+
+/*---------------------------------------------*/
+/*--
+ Knuth's increments seem to work better
+ than Incerpi-Sedgewick here. Possibly
+ because the number of elems to sort is
+ usually small, typically <= 20.
+--*/
+static
+Int32 incs[14] = { 1, 4, 13, 40, 121, 364, 1093, 3280,
+ 9841, 29524, 88573, 265720,
+ 797161, 2391484 };
+
+static
+void mainSimpleSort ( UInt32* ptr,
+ UChar* block,
+ UInt16* quadrant,
+ Int32 nblock,
+ Int32 lo,
+ Int32 hi,
+ Int32 d,
+ Int32* budget )
+{
+ Int32 i, j, h, bigN, hp;
+ UInt32 v;
+
+ bigN = hi - lo + 1;
+ if (bigN < 2) return;
+
+ hp = 0;
+ while (incs[hp] < bigN) hp++;
+ hp--;
+
+ for (; hp >= 0; hp--) {
+ h = incs[hp];
+
+ i = lo + h;
+ while (True) {
+
+ /*-- copy 1 --*/
+ if (i > hi) break;
+ v = ptr[i];
+ j = i;
+ while ( mainGtU (
+ ptr[j-h]+d, v+d, block, quadrant, nblock, budget
+ ) ) {
+ ptr[j] = ptr[j-h];
+ j = j - h;
+ if (j <= (lo + h - 1)) break;
+ }
+ ptr[j] = v;
+ i++;
+
+ /*-- copy 2 --*/
+ if (i > hi) break;
+ v = ptr[i];
+ j = i;
+ while ( mainGtU (
+ ptr[j-h]+d, v+d, block, quadrant, nblock, budget
+ ) ) {
+ ptr[j] = ptr[j-h];
+ j = j - h;
+ if (j <= (lo + h - 1)) break;
+ }
+ ptr[j] = v;
+ i++;
+
+ /*-- copy 3 --*/
+ if (i > hi) break;
+ v = ptr[i];
+ j = i;
+ while ( mainGtU (
+ ptr[j-h]+d, v+d, block, quadrant, nblock, budget
+ ) ) {
+ ptr[j] = ptr[j-h];
+ j = j - h;
+ if (j <= (lo + h - 1)) break;
+ }
+ ptr[j] = v;
+ i++;
+
+ if (*budget < 0) return;
+ }
+ }
+}
+
+
+/*---------------------------------------------*/
+/*--
+ The following is an implementation of
+ an elegant 3-way quicksort for strings,
+ described in a paper "Fast Algorithms for
+ Sorting and Searching Strings", by Robert
+ Sedgewick and Jon L. Bentley.
+--*/
+
+#define mswap(zz1, zz2) \
+ { Int32 zztmp = zz1; zz1 = zz2; zz2 = zztmp; }
+
+#define mvswap(zzp1, zzp2, zzn) \
+{ \
+ Int32 yyp1 = (zzp1); \
+ Int32 yyp2 = (zzp2); \
+ Int32 yyn = (zzn); \
+ while (yyn > 0) { \
+ mswap(ptr[yyp1], ptr[yyp2]); \
+ yyp1++; yyp2++; yyn--; \
+ } \
+}
+
+static
+__inline__
+UChar mmed3 ( UChar a, UChar b, UChar c )
+{
+ UChar t;
+ if (a > b) { t = a; a = b; b = t; };
+ if (b > c) {
+ b = c;
+ if (a > b) b = a;
+ }
+ return b;
+}
+
+#define mmin(a,b) ((a) < (b)) ? (a) : (b)
+
+#define mpush(lz,hz,dz) { stackLo[sp] = lz; \
+ stackHi[sp] = hz; \
+ stackD [sp] = dz; \
+ sp++; }
+
+#define mpop(lz,hz,dz) { sp--; \
+ lz = stackLo[sp]; \
+ hz = stackHi[sp]; \
+ dz = stackD [sp]; }
+
+
+#define mnextsize(az) (nextHi[az]-nextLo[az])
+
+#define mnextswap(az,bz) \
+ { Int32 tz; \
+ tz = nextLo[az]; nextLo[az] = nextLo[bz]; nextLo[bz] = tz; \
+ tz = nextHi[az]; nextHi[az] = nextHi[bz]; nextHi[bz] = tz; \
+ tz = nextD [az]; nextD [az] = nextD [bz]; nextD [bz] = tz; }
+
+
+#define MAIN_QSORT_SMALL_THRESH 20
+#define MAIN_QSORT_DEPTH_THRESH (BZ_N_RADIX + BZ_N_QSORT)
+#define MAIN_QSORT_STACK_SIZE 100
+
+static
+void mainQSort3 ( UInt32* ptr,
+ UChar* block,
+ UInt16* quadrant,
+ Int32 nblock,
+ Int32 loSt,
+ Int32 hiSt,
+ Int32 dSt,
+ Int32* budget )
+{
+ Int32 unLo, unHi, ltLo, gtHi, n, m, med;
+ Int32 sp, lo, hi, d;
+
+ Int32 stackLo[MAIN_QSORT_STACK_SIZE];
+ Int32 stackHi[MAIN_QSORT_STACK_SIZE];
+ Int32 stackD [MAIN_QSORT_STACK_SIZE];
+
+ Int32 nextLo[3];
+ Int32 nextHi[3];
+ Int32 nextD [3];
+
+ sp = 0;
+ mpush ( loSt, hiSt, dSt );
+
+ while (sp > 0) {
+
+ AssertH ( sp < MAIN_QSORT_STACK_SIZE - 2, 1001 );
+
+ mpop ( lo, hi, d );
+ if (hi - lo < MAIN_QSORT_SMALL_THRESH ||
+ d > MAIN_QSORT_DEPTH_THRESH) {
+ mainSimpleSort ( ptr, block, quadrant, nblock, lo, hi, d, budget );
+ if (*budget < 0) return;
+ continue;
+ }
+
+ med = (Int32)
+ mmed3 ( block[ptr[ lo ]+d],
+ block[ptr[ hi ]+d],
+ block[ptr[ (lo+hi)>>1 ]+d] );
+
+ unLo = ltLo = lo;
+ unHi = gtHi = hi;
+
+ while (True) {
+ while (True) {
+ if (unLo > unHi) break;
+ n = ((Int32)block[ptr[unLo]+d]) - med;
+ if (n == 0) {
+ mswap(ptr[unLo], ptr[ltLo]);
+ ltLo++; unLo++; continue;
+ };
+ if (n > 0) break;
+ unLo++;
+ }
+ while (True) {
+ if (unLo > unHi) break;
+ n = ((Int32)block[ptr[unHi]+d]) - med;
+ if (n == 0) {
+ mswap(ptr[unHi], ptr[gtHi]);
+ gtHi--; unHi--; continue;
+ };
+ if (n < 0) break;
+ unHi--;
+ }
+ if (unLo > unHi) break;
+ mswap(ptr[unLo], ptr[unHi]); unLo++; unHi--;
+ }
+
+ AssertD ( unHi == unLo-1, "mainQSort3(2)" );
+
+ if (gtHi < ltLo) {
+ mpush(lo, hi, d+1 );
+ continue;
+ }
+
+ n = mmin(ltLo-lo, unLo-ltLo); mvswap(lo, unLo-n, n);
+ m = mmin(hi-gtHi, gtHi-unHi); mvswap(unLo, hi-m+1, m);
+
+ n = lo + unLo - ltLo - 1;
+ m = hi - (gtHi - unHi) + 1;
+
+ nextLo[0] = lo; nextHi[0] = n; nextD[0] = d;
+ nextLo[1] = m; nextHi[1] = hi; nextD[1] = d;
+ nextLo[2] = n+1; nextHi[2] = m-1; nextD[2] = d+1;
+
+ if (mnextsize(0) < mnextsize(1)) mnextswap(0,1);
+ if (mnextsize(1) < mnextsize(2)) mnextswap(1,2);
+ if (mnextsize(0) < mnextsize(1)) mnextswap(0,1);
+
+ AssertD (mnextsize(0) >= mnextsize(1), "mainQSort3(8)" );
+ AssertD (mnextsize(1) >= mnextsize(2), "mainQSort3(9)" );
+
+ mpush (nextLo[0], nextHi[0], nextD[0]);
+ mpush (nextLo[1], nextHi[1], nextD[1]);
+ mpush (nextLo[2], nextHi[2], nextD[2]);
+ }
+}
+
+#undef mswap
+#undef mvswap
+#undef mpush
+#undef mpop
+#undef mmin
+#undef mnextsize
+#undef mnextswap
+#undef MAIN_QSORT_SMALL_THRESH
+#undef MAIN_QSORT_DEPTH_THRESH
+#undef MAIN_QSORT_STACK_SIZE
+
+
+/*---------------------------------------------*/
+/* Pre:
+ nblock > N_OVERSHOOT
+ block32 exists for [0 .. nblock-1 +N_OVERSHOOT]
+ ((UChar*)block32) [0 .. nblock-1] holds block
+ ptr exists for [0 .. nblock-1]
+
+ Post:
+ ((UChar*)block32) [0 .. nblock-1] holds block
+ All other areas of block32 destroyed
+ ftab [0 .. 65536 ] destroyed
+ ptr [0 .. nblock-1] holds sorted order
+ if (*budget < 0), sorting was abandoned
+*/
+
+#define BIGFREQ(b) (ftab[((b)+1) << 8] - ftab[(b) << 8])
+#define SETMASK (1 << 21)
+#define CLEARMASK (~(SETMASK))
+
+static
+void mainSort ( UInt32* ptr,
+ UChar* block,
+ UInt16* quadrant,
+ UInt32* ftab,
+ Int32 nblock,
+ Int32 verb,
+ Int32* budget )
+{
+ Int32 i, j, k, ss, sb;
+ Int32 runningOrder[256];
+ Bool bigDone[256];
+ Int32 copyStart[256];
+ Int32 copyEnd [256];
+ UChar c1;
+ Int32 numQSorted;
+ UInt16 s;
+ if (verb >= 4) VPrintf0 ( " main sort initialise ...\n" );
+
+ /*-- set up the 2-byte frequency table --*/
+ for (i = 65536; i >= 0; i--) ftab[i] = 0;
+
+ j = block[0] << 8;
+ i = nblock-1;
+ for (; i >= 3; i -= 4) {
+ quadrant[i] = 0;
+ j = (j >> 8) | ( ((UInt16)block[i]) << 8);
+ ftab[j]++;
+ quadrant[i-1] = 0;
+ j = (j >> 8) | ( ((UInt16)block[i-1]) << 8);
+ ftab[j]++;
+ quadrant[i-2] = 0;
+ j = (j >> 8) | ( ((UInt16)block[i-2]) << 8);
+ ftab[j]++;
+ quadrant[i-3] = 0;
+ j = (j >> 8) | ( ((UInt16)block[i-3]) << 8);
+ ftab[j]++;
+ }
+ for (; i >= 0; i--) {
+ quadrant[i] = 0;
+ j = (j >> 8) | ( ((UInt16)block[i]) << 8);
+ ftab[j]++;
+ }
+
+ /*-- (emphasises close relationship of block & quadrant) --*/
+ for (i = 0; i < BZ_N_OVERSHOOT; i++) {
+ block [nblock+i] = block[i];
+ quadrant[nblock+i] = 0;
+ }
+
+ if (verb >= 4) VPrintf0 ( " bucket sorting ...\n" );
+
+ /*-- Complete the initial radix sort --*/
+ for (i = 1; i <= 65536; i++) ftab[i] += ftab[i-1];
+
+ s = block[0] << 8;
+ i = nblock-1;
+ for (; i >= 3; i -= 4) {
+ s = (s >> 8) | (block[i] << 8);
+ j = ftab[s] -1;
+ ftab[s] = j;
+ ptr[j] = i;
+ s = (s >> 8) | (block[i-1] << 8);
+ j = ftab[s] -1;
+ ftab[s] = j;
+ ptr[j] = i-1;
+ s = (s >> 8) | (block[i-2] << 8);
+ j = ftab[s] -1;
+ ftab[s] = j;
+ ptr[j] = i-2;
+ s = (s >> 8) | (block[i-3] << 8);
+ j = ftab[s] -1;
+ ftab[s] = j;
+ ptr[j] = i-3;
+ }
+ for (; i >= 0; i--) {
+ s = (s >> 8) | (block[i] << 8);
+ j = ftab[s] -1;
+ ftab[s] = j;
+ ptr[j] = i;
+ }
+
+ /*--
+ Now ftab contains the first loc of every small bucket.
+ Calculate the running order, from smallest to largest
+ big bucket.
+ --*/
+ for (i = 0; i <= 255; i++) {
+ bigDone [i] = False;
+ runningOrder[i] = i;
+ }
+
+ {
+ Int32 vv;
+ Int32 h = 1;
+ do h = 3 * h + 1; while (h <= 256);
+ do {
+ h = h / 3;
+ for (i = h; i <= 255; i++) {
+ vv = runningOrder[i];
+ j = i;
+ while ( BIGFREQ(runningOrder[j-h]) > BIGFREQ(vv) ) {
+ runningOrder[j] = runningOrder[j-h];
+ j = j - h;
+ if (j <= (h - 1)) goto zero;
+ }
+ zero:
+ runningOrder[j] = vv;
+ }
+ } while (h != 1);
+ }
+
+ /*--
+ The main sorting loop.
+ --*/
+
+ numQSorted = 0;
+
+ for (i = 0; i <= 255; i++) {
+
+ /*--
+ Process big buckets, starting with the least full.
+ Basically this is a 3-step process in which we call
+ mainQSort3 to sort the small buckets [ss, j], but
+ also make a big effort to avoid the calls if we can.
+ --*/
+ ss = runningOrder[i];
+
+ /*--
+ Step 1:
+ Complete the big bucket [ss] by quicksorting
+ any unsorted small buckets [ss, j], for j != ss.
+ Hopefully previous pointer-scanning phases have already
+ completed many of the small buckets [ss, j], so
+ we don't have to sort them at all.
+ --*/
+ for (j = 0; j <= 255; j++) {
+ if (j != ss) {
+ sb = (ss << 8) + j;
+ if ( ! (ftab[sb] & SETMASK) ) {
+ Int32 lo = ftab[sb] & CLEARMASK;
+ Int32 hi = (ftab[sb+1] & CLEARMASK) - 1;
+ if (hi > lo) {
+ if (verb >= 4)
+ VPrintf4 ( " qsort [0x%x, 0x%x] "
+ "done %d this %d\n",
+ ss, j, numQSorted, hi - lo + 1 );
+ mainQSort3 (
+ ptr, block, quadrant, nblock,
+ lo, hi, BZ_N_RADIX, budget
+ );
+ numQSorted += (hi - lo + 1);
+ if (*budget < 0) return;
+ }
+ }
+ ftab[sb] |= SETMASK;
+ }
+ }
+
+ AssertH ( !bigDone[ss], 1006 );
+
+ /*--
+ Step 2:
+ Now scan this big bucket [ss] so as to synthesise the
+ sorted order for small buckets [t, ss] for all t,
+ including, magically, the bucket [ss,ss] too.
+ This will avoid doing Real Work in subsequent Step 1's.
+ --*/
+ {
+ for (j = 0; j <= 255; j++) {
+ copyStart[j] = ftab[(j << 8) + ss] & CLEARMASK;
+ copyEnd [j] = (ftab[(j << 8) + ss + 1] & CLEARMASK) - 1;
+ }
+ for (j = ftab[ss << 8] & CLEARMASK; j < copyStart[ss]; j++) {
+ k = ptr[j]-1; if (k < 0) k += nblock;
+ c1 = block[k];
+ if (!bigDone[c1])
+ ptr[ copyStart[c1]++ ] = k;
+ }
+ for (j = (ftab[(ss+1) << 8] & CLEARMASK) - 1; j > copyEnd[ss]; j--) {
+ k = ptr[j]-1; if (k < 0) k += nblock;
+ c1 = block[k];
+ if (!bigDone[c1])
+ ptr[ copyEnd[c1]-- ] = k;
+ }
+ }
+
+ AssertH ( (copyStart[ss]-1 == copyEnd[ss])
+ ||
+ /* Extremely rare case missing in bzip2-1.0.0 and 1.0.1.
+ Necessity for this case is demonstrated by compressing
+ a sequence of approximately 48.5 million of character
+ 251; 1.0.0/1.0.1 will then die here. */
+ (copyStart[ss] == 0 && copyEnd[ss] == nblock-1),
+ 1007 )
+
+ for (j = 0; j <= 255; j++) ftab[(j << 8) + ss] |= SETMASK;
+
+ /*--
+ Step 3:
+ The [ss] big bucket is now done. Record this fact,
+ and update the quadrant descriptors. Remember to
+ update quadrants in the overshoot area too, if
+ necessary. The "if (i < 255)" test merely skips
+ this updating for the last bucket processed, since
+ updating for the last bucket is pointless.
+
+ The quadrant array provides a way to incrementally
+ cache sort orderings, as they appear, so as to
+ make subsequent comparisons in fullGtU() complete
+ faster. For repetitive blocks this makes a big
+ difference (but not big enough to be able to avoid
+ the fallback sorting mechanism, exponential radix sort).
+
+ The precise meaning is: at all times:
+
+ for 0 <= i < nblock and 0 <= j <= nblock
+
+ if block[i] != block[j],
+
+ then the relative values of quadrant[i] and
+ quadrant[j] are meaningless.
+
+ else {
+ if quadrant[i] < quadrant[j]
+ then the string starting at i lexicographically
+ precedes the string starting at j
+
+ else if quadrant[i] > quadrant[j]
+ then the string starting at j lexicographically
+ precedes the string starting at i
+
+ else
+ the relative ordering of the strings starting
+ at i and j has not yet been determined.
+ }
+ --*/
+ bigDone[ss] = True;
+
+ if (i < 255) {
+ Int32 bbStart = ftab[ss << 8] & CLEARMASK;
+ Int32 bbSize = (ftab[(ss+1) << 8] & CLEARMASK) - bbStart;
+ Int32 shifts = 0;
+
+ while ((bbSize >> shifts) > 65534) shifts++;
+
+ for (j = bbSize-1; j >= 0; j--) {
+ Int32 a2update = ptr[bbStart + j];
+ UInt16 qVal = (UInt16)(j >> shifts);
+ quadrant[a2update] = qVal;
+ if (a2update < BZ_N_OVERSHOOT)
+ quadrant[a2update + nblock] = qVal;
+ }
+ AssertH ( ((bbSize-1) >> shifts) <= 65535, 1002 );
+ }
+
+ }
+
+ if (verb >= 4)
+ VPrintf3 ( " %d pointers, %d sorted, %d scanned\n",
+ nblock, numQSorted, nblock - numQSorted );
+}
+
+#undef BIGFREQ
+#undef SETMASK
+#undef CLEARMASK
+
+
+/*---------------------------------------------*/
+/* Pre:
+ nblock > 0
+ arr2 exists for [0 .. nblock-1 +N_OVERSHOOT]
+ ((UChar*)arr2) [0 .. nblock-1] holds block
+ arr1 exists for [0 .. nblock-1]
+
+ Post:
+ ((UChar*)arr2) [0 .. nblock-1] holds block
+ All other areas of block destroyed
+ ftab [ 0 .. 65536 ] destroyed
+ arr1 [0 .. nblock-1] holds sorted order
+*/
+void BZ2_blockSort ( EState* s )
+{
+ UInt32* ptr = s->ptr;
+ UChar* block = s->block;
+ UInt32* ftab = s->ftab;
+ Int32 nblock = s->nblock;
+ Int32 verb = s->verbosity;
+ Int32 wfact = s->workFactor;
+ UInt16* quadrant;
+ Int32 budget;
+ Int32 budgetInit;
+ Int32 i;
+
+ if (nblock < 10000) {
+ fallbackSort ( s->arr1, s->arr2, ftab, nblock, verb );
+ } else {
+ /* Calculate the location for quadrant, remembering to get
+ the alignment right. Assumes that &(block[0]) is at least
+ 2-byte aligned -- this should be ok since block is really
+ the first section of arr2.
+ */
+ i = nblock+BZ_N_OVERSHOOT;
+ if (i & 1) i++;
+ quadrant = (UInt16*)(&(block[i]));
+
+ /* (wfact-1) / 3 puts the default-factor-30
+ transition point at very roughly the same place as
+ with v0.1 and v0.9.0.
+ Not that it particularly matters any more, since the
+ resulting compressed stream is now the same regardless
+ of whether or not we use the main sort or fallback sort.
+ */
+ if (wfact < 1 ) wfact = 1;
+ if (wfact > 100) wfact = 100;
+ budgetInit = nblock * ((wfact-1) / 3);
+ budget = budgetInit;
+
+ mainSort ( ptr, block, quadrant, ftab, nblock, verb, &budget );
+ if (verb >= 3)
+ VPrintf3 ( " %d work, %d block, ratio %5.2f\n",
+ budgetInit - budget,
+ nblock,
+ (float)(budgetInit - budget) /
+ (float)(nblock==0 ? 1 : nblock) );
+ if (budget < 0) {
+ if (verb >= 2)
+ VPrintf0 ( " too repetitive; using fallback"
+ " sorting algorithm\n" );
+ fallbackSort ( s->arr1, s->arr2, ftab, nblock, verb );
+ }
+ }
+
+ s->origPtr = -1;
+ for (i = 0; i < s->nblock; i++)
+ if (ptr[i] == 0)
+ { s->origPtr = i; break; };
+
+ AssertH( s->origPtr != -1, 1003 );
+}
+
+
+/*-------------------------------------------------------------*/
+/*--- end blocksort.c ---*/
+/*-------------------------------------------------------------*/
diff --git a/src/misc/bzlib/bzip2.txt b/src/misc/bzlib/bzip2.txt
new file mode 100644
index 00000000..4fb9c743
--- /dev/null
+++ b/src/misc/bzlib/bzip2.txt
@@ -0,0 +1,391 @@
+
+NAME
+ bzip2, bunzip2 - a block-sorting file compressor, v1.0.4
+ bzcat - decompresses files to stdout
+ bzip2recover - recovers data from damaged bzip2 files
+
+
+SYNOPSIS
+ bzip2 [ -cdfkqstvzVL123456789 ] [ filenames ... ]
+ bunzip2 [ -fkvsVL ] [ filenames ... ]
+ bzcat [ -s ] [ filenames ... ]
+ bzip2recover filename
+
+
+DESCRIPTION
+ bzip2 compresses files using the Burrows-Wheeler block
+ sorting text compression algorithm, and Huffman coding.
+ Compression is generally considerably better than that
+ achieved by more conventional LZ77/LZ78-based compressors,
+ and approaches the performance of the PPM family of sta-
+ tistical compressors.
+
+ The command-line options are deliberately very similar to
+ those of GNU gzip, but they are not identical.
+
+ bzip2 expects a list of file names to accompany the com-
+ mand-line flags. Each file is replaced by a compressed
+ version of itself, with the name "original_name.bz2".
+ Each compressed file has the same modification date, per-
+ missions, and, when possible, ownership as the correspond-
+ ing original, so that these properties can be correctly
+ restored at decompression time. File name handling is
+ naive in the sense that there is no mechanism for preserv-
+ ing original file names, permissions, ownerships or dates
+ in filesystems which lack these concepts, or have serious
+ file name length restrictions, such as MS-DOS.
+
+ bzip2 and bunzip2 will by default not overwrite existing
+ files. If you want this to happen, specify the -f flag.
+
+ If no file names are specified, bzip2 compresses from
+ standard input to standard output. In this case, bzip2
+ will decline to write compressed output to a terminal, as
+ this would be entirely incomprehensible and therefore
+ pointless.
+
+ bunzip2 (or bzip2 -d) decompresses all specified files.
+ Files which were not created by bzip2 will be detected and
+ ignored, and a warning issued. bzip2 attempts to guess
+ the filename for the decompressed file from that of the
+ compressed file as follows:
+
+ filename.bz2 becomes filename
+ filename.bz becomes filename
+ filename.tbz2 becomes filename.tar
+ filename.tbz becomes filename.tar
+ anyothername becomes anyothername.out
+
+ If the file does not end in one of the recognised endings,
+ .bz2, .bz, .tbz2 or .tbz, bzip2 complains that it cannot
+ guess the name of the original file, and uses the original
+ name with .out appended.
+
+ As with compression, supplying no filenames causes decom-
+ pression from standard input to standard output.
+
+ bunzip2 will correctly decompress a file which is the con-
+ catenation of two or more compressed files. The result is
+ the concatenation of the corresponding uncompressed files.
+ Integrity testing (-t) of concatenated compressed files is
+ also supported.
+
+ You can also compress or decompress files to the standard
+ output by giving the -c flag. Multiple files may be com-
+ pressed and decompressed like this. The resulting outputs
+ are fed sequentially to stdout. Compression of multiple
+ files in this manner generates a stream containing multi-
+ ple compressed file representations. Such a stream can be
+ decompressed correctly only by bzip2 version 0.9.0 or
+ later. Earlier versions of bzip2 will stop after decom-
+ pressing the first file in the stream.
+
+ bzcat (or bzip2 -dc) decompresses all specified files to
+ the standard output.
+
+ bzip2 will read arguments from the environment variables
+ BZIP2 and BZIP, in that order, and will process them
+ before any arguments read from the command line. This
+ gives a convenient way to supply default arguments.
+
+ Compression is always performed, even if the compressed
+ file is slightly larger than the original. Files of less
+ than about one hundred bytes tend to get larger, since the
+ compression mechanism has a constant overhead in the
+ region of 50 bytes. Random data (including the output of
+ most file compressors) is coded at about 8.05 bits per
+ byte, giving an expansion of around 0.5%.
+
+ As a self-check for your protection, bzip2 uses 32-bit
+ CRCs to make sure that the decompressed version of a file
+ is identical to the original. This guards against corrup-
+ tion of the compressed data, and against undetected bugs
+ in bzip2 (hopefully very unlikely). The chances of data
+ corruption going undetected is microscopic, about one
+ chance in four billion for each file processed. Be aware,
+ though, that the check occurs upon decompression, so it
+ can only tell you that something is wrong. It can't help
+ you recover the original uncompressed data. You can use
+ bzip2recover to try to recover data from damaged files.
+
+ Return values: 0 for a normal exit, 1 for environmental
+ problems (file not found, invalid flags, I/O errors, &c),
+ 2 to indicate a corrupt compressed file, 3 for an internal
+ consistency error (eg, bug) which caused bzip2 to panic.
+
+
+OPTIONS
+ -c --stdout
+ Compress or decompress to standard output.
+
+ -d --decompress
+ Force decompression. bzip2, bunzip2 and bzcat are
+ really the same program, and the decision about
+ what actions to take is done on the basis of which
+ name is used. This flag overrides that mechanism,
+ and forces bzip2 to decompress.
+
+ -z --compress
+ The complement to -d: forces compression,
+ regardless of the invocation name.
+
+ -t --test
+ Check integrity of the specified file(s), but don't
+ decompress them. This really performs a trial
+ decompression and throws away the result.
+
+ -f --force
+ Force overwrite of output files. Normally, bzip2
+ will not overwrite existing output files. Also
+ forces bzip2 to break hard links to files, which it
+ otherwise wouldn't do.
+
+ bzip2 normally declines to decompress files which
+ don't have the correct magic header bytes. If
+ forced (-f), however, it will pass such files
+ through unmodified. This is how GNU gzip behaves.
+
+ -k --keep
+ Keep (don't delete) input files during compression
+ or decompression.
+
+ -s --small
+ Reduce memory usage, for compression, decompression
+ and testing. Files are decompressed and tested
+ using a modified algorithm which only requires 2.5
+ bytes per block byte. This means any file can be
+ decompressed in 2300k of memory, albeit at about
+ half the normal speed.
+
+ During compression, -s selects a block size of
+ 200k, which limits memory use to around the same
+ figure, at the expense of your compression ratio.
+ In short, if your machine is low on memory (8
+ megabytes or less), use -s for everything. See
+ MEMORY MANAGEMENT below.
+
+ -q --quiet
+ Suppress non-essential warning messages. Messages
+ pertaining to I/O errors and other critical events
+ will not be suppressed.
+
+ -v --verbose
+ Verbose mode -- show the compression ratio for each
+ file processed. Further -v's increase the ver-
+ bosity level, spewing out lots of information which
+ is primarily of interest for diagnostic purposes.
+
+ -L --license -V --version
+ Display the software version, license terms and
+ conditions.
+
+ -1 (or --fast) to -9 (or --best)
+ Set the block size to 100 k, 200 k .. 900 k when
+ compressing. Has no effect when decompressing.
+ See MEMORY MANAGEMENT below. The --fast and --best
+ aliases are primarily for GNU gzip compatibility.
+ In particular, --fast doesn't make things signifi-
+ cantly faster. And --best merely selects the
+ default behaviour.
+
+ -- Treats all subsequent arguments as file names, even
+ if they start with a dash. This is so you can han-
+ dle files with names beginning with a dash, for
+ example: bzip2 -- -myfilename.
+
+ --repetitive-fast --repetitive-best
+ These flags are redundant in versions 0.9.5 and
+ above. They provided some coarse control over the
+ behaviour of the sorting algorithm in earlier ver-
+ sions, which was sometimes useful. 0.9.5 and above
+ have an improved algorithm which renders these
+ flags irrelevant.
+
+
+MEMORY MANAGEMENT
+ bzip2 compresses large files in blocks. The block size
+ affects both the compression ratio achieved, and the
+ amount of memory needed for compression and decompression.
+ The flags -1 through -9 specify the block size to be
+ 100,000 bytes through 900,000 bytes (the default) respec-
+ tively. At decompression time, the block size used for
+ compression is read from the header of the compressed
+ file, and bunzip2 then allocates itself just enough memory
+ to decompress the file. Since block sizes are stored in
+ compressed files, it follows that the flags -1 to -9 are
+ irrelevant to and so ignored during decompression.
+
+ Compression and decompression requirements, in bytes, can
+ be estimated as:
+
+ Compression: 400k + ( 8 x block size )
+
+ Decompression: 100k + ( 4 x block size ), or
+ 100k + ( 2.5 x block size )
+
+ Larger block sizes give rapidly diminishing marginal
+ returns. Most of the compression comes from the first two
+ or three hundred k of block size, a fact worth bearing in
+ mind when using bzip2 on small machines. It is also
+ important to appreciate that the decompression memory
+ requirement is set at compression time by the choice of
+ block size.
+
+ For files compressed with the default 900k block size,
+ bunzip2 will require about 3700 kbytes to decompress. To
+ support decompression of any file on a 4 megabyte machine,
+ bunzip2 has an option to decompress using approximately
+ half this amount of memory, about 2300 kbytes. Decompres-
+ sion speed is also halved, so you should use this option
+ only where necessary. The relevant flag is -s.
+
+ In general, try and use the largest block size memory con-
+ straints allow, since that maximises the compression
+ achieved. Compression and decompression speed are virtu-
+ ally unaffected by block size.
+
+ Another significant point applies to files which fit in a
+ single block -- that means most files you'd encounter
+ using a large block size. The amount of real memory
+ touched is proportional to the size of the file, since the
+ file is smaller than a block. For example, compressing a
+ file 20,000 bytes long with the flag -9 will cause the
+ compressor to allocate around 7600k of memory, but only
+ touch 400k + 20000 * 8 = 560 kbytes of it. Similarly, the
+ decompressor will allocate 3700k but only touch 100k +
+ 20000 * 4 = 180 kbytes.
+
+ Here is a table which summarises the maximum memory usage
+ for different block sizes. Also recorded is the total
+ compressed size for 14 files of the Calgary Text Compres-
+ sion Corpus totalling 3,141,622 bytes. This column gives
+ some feel for how compression varies with block size.
+ These figures tend to understate the advantage of larger
+ block sizes for larger files, since the Corpus is domi-
+ nated by smaller files.
+
+ Compress Decompress Decompress Corpus
+ Flag usage usage -s usage Size
+
+ -1 1200k 500k 350k 914704
+ -2 2000k 900k 600k 877703
+ -3 2800k 1300k 850k 860338
+ -4 3600k 1700k 1100k 846899
+ -5 4400k 2100k 1350k 845160
+ -6 5200k 2500k 1600k 838626
+ -7 6100k 2900k 1850k 834096
+ -8 6800k 3300k 2100k 828642
+ -9 7600k 3700k 2350k 828642
+
+
+RECOVERING DATA FROM DAMAGED FILES
+ bzip2 compresses files in blocks, usually 900kbytes long.
+ Each block is handled independently. If a media or trans-
+ mission error causes a multi-block .bz2 file to become
+ damaged, it may be possible to recover data from the
+ undamaged blocks in the file.
+
+ The compressed representation of each block is delimited
+ by a 48-bit pattern, which makes it possible to find the
+ block boundaries with reasonable certainty. Each block
+ also carries its own 32-bit CRC, so damaged blocks can be
+ distinguished from undamaged ones.
+
+ bzip2recover is a simple program whose purpose is to
+ search for blocks in .bz2 files, and write each block out
+ into its own .bz2 file. You can then use bzip2 -t to test
+ the integrity of the resulting files, and decompress those
+ which are undamaged.
+
+ bzip2recover takes a single argument, the name of the dam-
+ aged file, and writes a number of files
+ "rec00001file.bz2", "rec00002file.bz2", etc, containing
+ the extracted blocks. The output filenames are
+ designed so that the use of wildcards in subsequent pro-
+ cessing -- for example, "bzip2 -dc rec*file.bz2 > recov-
+ ered_data" -- processes the files in the correct order.
+
+ bzip2recover should be of most use dealing with large .bz2
+ files, as these will contain many blocks. It is clearly
+ futile to use it on damaged single-block files, since a
+ damaged block cannot be recovered. If you wish to min-
+ imise any potential data loss through media or transmis-
+ sion errors, you might consider compressing with a smaller
+ block size.
+
+
+PERFORMANCE NOTES
+ The sorting phase of compression gathers together similar
+ strings in the file. Because of this, files containing
+ very long runs of repeated symbols, like "aabaabaabaab
+ ..." (repeated several hundred times) may compress more
+ slowly than normal. Versions 0.9.5 and above fare much
+ better than previous versions in this respect. The ratio
+ between worst-case and average-case compression time is in
+ the region of 10:1. For previous versions, this figure
+ was more like 100:1. You can use the -vvvv option to mon-
+ itor progress in great detail, if you want.
+
+ Decompression speed is unaffected by these phenomena.
+
+ bzip2 usually allocates several megabytes of memory to
+ operate in, and then charges all over it in a fairly ran-
+ dom fashion. This means that performance, both for com-
+ pressing and decompressing, is largely determined by the
+ speed at which your machine can service cache misses.
+ Because of this, small changes to the code to reduce the
+ miss rate have been observed to give disproportionately
+ large performance improvements. I imagine bzip2 will per-
+ form best on machines with very large caches.
+
+
+CAVEATS
+ I/O error messages are not as helpful as they could be.
+ bzip2 tries hard to detect I/O errors and exit cleanly,
+ but the details of what the problem is sometimes seem
+ rather misleading.
+
+ This manual page pertains to version 1.0.4 of bzip2. Com-
+ pressed data created by this version is entirely forwards
+ and backwards compatible with the previous public
+ releases, versions 0.1pl2, 0.9.0, 0.9.5, 1.0.0, 1.0.1,
+ 1.0.2 and 1.0.3, but with the following exception: 0.9.0
+ and above can correctly decompress multiple concatenated
+ compressed files. 0.1pl2 cannot do this; it will stop
+ after decompressing just the first file in the stream.
+
+ bzip2recover versions prior to 1.0.2 used 32-bit integers
+ to represent bit positions in compressed files, so they
+ could not handle compressed files more than 512 megabytes
+ long. Versions 1.0.2 and above use 64-bit ints on some
+ platforms which support them (GNU supported targets, and
+ Windows). To establish whether or not bzip2recover was
+ built with such a limitation, run it without arguments.
+ In any event you can build yourself an unlimited version
+ if you can recompile it with MaybeUInt64 set to be an
+ unsigned 64-bit integer.
+
+
+AUTHOR
+ Julian Seward, jsewardbzip.org.
+
+ http://www.bzip.org
+
+ The ideas embodied in bzip2 are due to (at least) the fol-
+ lowing people: Michael Burrows and David Wheeler (for the
+ block sorting transformation), David Wheeler (again, for
+ the Huffman coder), Peter Fenwick (for the structured cod-
+ ing model in the original bzip, and many refinements), and
+ Alistair Moffat, Radford Neal and Ian Witten (for the
+ arithmetic coder in the original bzip). I am much
+ indebted for their help, support and advice. See the man-
+ ual in the source distribution for pointers to sources of
+ documentation. Christian von Roques encouraged me to look
+ for faster sorting algorithms, so as to speed up compres-
+ sion. Bela Lubkin encouraged me to improve the worst-case
+ compression performance. Donna Robinson XMLised the docu-
+ mentation. The bz* scripts are derived from those of GNU
+ gzip. Many people sent patches, helped with portability
+ problems, lent machines, gave advice and were generally
+ helpful.
+
diff --git a/src/misc/bzlib/bzlib.c b/src/misc/bzlib/bzlib.c
new file mode 100644
index 00000000..9d040682
--- /dev/null
+++ b/src/misc/bzlib/bzlib.c
@@ -0,0 +1,1571 @@
+
+/*-------------------------------------------------------------*/
+/*--- Library top-level functions. ---*/
+/*--- bzlib.c ---*/
+/*-------------------------------------------------------------*/
+
+/* ------------------------------------------------------------------
+ This file is part of bzip2/libbzip2, a program and library for
+ lossless, block-sorting data compression.
+
+ bzip2/libbzip2 version 1.0.5 of 10 December 2007
+ Copyright (C) 1996-2007 Julian Seward <jseward@bzip.org>
+
+ Please read the WARNING, DISCLAIMER and PATENTS sections in the
+ README file.
+
+ This program is released under the terms of the license contained
+ in the file LICENSE.
+ ------------------------------------------------------------------ */
+
+/* CHANGES
+ 0.9.0 -- original version.
+ 0.9.0a/b -- no changes in this file.
+ 0.9.0c -- made zero-length BZ_FLUSH work correctly in bzCompress().
+ fixed bzWrite/bzRead to ignore zero-length requests.
+ fixed bzread to correctly handle read requests after EOF.
+ wrong parameter order in call to bzDecompressInit in
+ bzBuffToBuffDecompress. Fixed.
+*/
+
+#include "bzlib_private.h"
+
+/*---------------------------------------------------*/
+/*--- Compression stuff ---*/
+/*---------------------------------------------------*/
+
+
+/*---------------------------------------------------*/
+#ifndef BZ_NO_STDIO
+void BZ2_bz__AssertH__fail ( int errcode )
+{
+ fprintf(stderr,
+ "\n\nbzip2/libbzip2: internal error number %d.\n"
+ "This is a bug in bzip2/libbzip2, %s.\n"
+ "Please report it to me at: jseward@bzip.org. If this happened\n"
+ "when you were using some program which uses libbzip2 as a\n"
+ "component, you should also report this bug to the author(s)\n"
+ "of that program. Please make an effort to report this bug;\n"
+ "timely and accurate bug reports eventually lead to higher\n"
+ "quality software. Thanks. Julian Seward, 10 December 2007.\n\n",
+ errcode,
+ BZ2_bzlibVersion()
+ );
+
+ if (errcode == 1007) {
+ fprintf(stderr,
+ "\n*** A special note about internal error number 1007 ***\n"
+ "\n"
+ "Experience suggests that a common cause of i.e. 1007\n"
+ "is unreliable memory or other hardware. The 1007 assertion\n"
+ "just happens to cross-check the results of huge numbers of\n"
+ "memory reads/writes, and so acts (unintendedly) as a stress\n"
+ "test of your memory system.\n"
+ "\n"
+ "I suggest the following: try compressing the file again,\n"
+ "possibly monitoring progress in detail with the -vv flag.\n"
+ "\n"
+ "* If the error cannot be reproduced, and/or happens at different\n"
+ " points in compression, you may have a flaky memory system.\n"
+ " Try a memory-test program. I have used Memtest86\n"
+ " (www.memtest86.com). At the time of writing it is free (GPLd).\n"
+ " Memtest86 tests memory much more thorougly than your BIOSs\n"
+ " power-on test, and may find failures that the BIOS doesn't.\n"
+ "\n"
+ "* If the error can be repeatably reproduced, this is a bug in\n"
+ " bzip2, and I would very much like to hear about it. Please\n"
+ " let me know, and, ideally, save a copy of the file causing the\n"
+ " problem -- without which I will be unable to investigate it.\n"
+ "\n"
+ );
+ }
+
+ exit(3);
+}
+#endif
+
+
+/*---------------------------------------------------*/
+static
+int bz_config_ok ( void )
+{
+ if (sizeof(int) != 4) return 0;
+ if (sizeof(short) != 2) return 0;
+ if (sizeof(char) != 1) return 0;
+ return 1;
+}
+
+
+/*---------------------------------------------------*/
+static
+void* default_bzalloc ( void* opaque, Int32 items, Int32 size )
+{
+ void* v = malloc ( items * size );
+ return v;
+}
+
+static
+void default_bzfree ( void* opaque, void* addr )
+{
+ if (addr != NULL) free ( addr );
+}
+
+
+/*---------------------------------------------------*/
+static
+void prepare_new_block ( EState* s )
+{
+ Int32 i;
+ s->nblock = 0;
+ s->numZ = 0;
+ s->state_out_pos = 0;
+ BZ_INITIALISE_CRC ( s->blockCRC );
+ for (i = 0; i < 256; i++) s->inUse[i] = False;
+ s->blockNo++;
+}
+
+
+/*---------------------------------------------------*/
+static
+void init_RL ( EState* s )
+{
+ s->state_in_ch = 256;
+ s->state_in_len = 0;
+}
+
+
+static
+Bool isempty_RL ( EState* s )
+{
+ if (s->state_in_ch < 256 && s->state_in_len > 0)
+ return False; else
+ return True;
+}
+
+
+/*---------------------------------------------------*/
+int BZ_API(BZ2_bzCompressInit)
+ ( bz_stream* strm,
+ int blockSize100k,
+ int verbosity,
+ int workFactor )
+{
+ Int32 n;
+ EState* s;
+
+ if (!bz_config_ok()) return BZ_CONFIG_ERROR;
+
+ if (strm == NULL ||
+ blockSize100k < 1 || blockSize100k > 9 ||
+ workFactor < 0 || workFactor > 250)
+ return BZ_PARAM_ERROR;
+
+ if (workFactor == 0) workFactor = 30;
+ if (strm->bzalloc == NULL) strm->bzalloc = default_bzalloc;
+ if (strm->bzfree == NULL) strm->bzfree = default_bzfree;
+
+ s = BZALLOC( sizeof(EState) );
+ if (s == NULL) return BZ_MEM_ERROR;
+ s->strm = strm;
+
+ s->arr1 = NULL;
+ s->arr2 = NULL;
+ s->ftab = NULL;
+
+ n = 100000 * blockSize100k;
+ s->arr1 = BZALLOC( n * sizeof(UInt32) );
+ s->arr2 = BZALLOC( (n+BZ_N_OVERSHOOT) * sizeof(UInt32) );
+ s->ftab = BZALLOC( 65537 * sizeof(UInt32) );
+
+ if (s->arr1 == NULL || s->arr2 == NULL || s->ftab == NULL) {
+ if (s->arr1 != NULL) BZFREE(s->arr1);
+ if (s->arr2 != NULL) BZFREE(s->arr2);
+ if (s->ftab != NULL) BZFREE(s->ftab);
+ if (s != NULL) BZFREE(s);
+ return BZ_MEM_ERROR;
+ }
+
+ s->blockNo = 0;
+ s->state = BZ_S_INPUT;
+ s->mode = BZ_M_RUNNING;
+ s->combinedCRC = 0;
+ s->blockSize100k = blockSize100k;
+ s->nblockMAX = 100000 * blockSize100k - 19;
+ s->verbosity = verbosity;
+ s->workFactor = workFactor;
+
+ s->block = (UChar*)s->arr2;
+ s->mtfv = (UInt16*)s->arr1;
+ s->zbits = NULL;
+ s->ptr = (UInt32*)s->arr1;
+
+ strm->state = s;
+ strm->total_in_lo32 = 0;
+ strm->total_in_hi32 = 0;
+ strm->total_out_lo32 = 0;
+ strm->total_out_hi32 = 0;
+ init_RL ( s );
+ prepare_new_block ( s );
+ return BZ_OK;
+}
+
+
+/*---------------------------------------------------*/
+static
+void add_pair_to_block ( EState* s )
+{
+ Int32 i;
+ UChar ch = (UChar)(s->state_in_ch);
+ for (i = 0; i < s->state_in_len; i++) {
+ BZ_UPDATE_CRC( s->blockCRC, ch );
+ }
+ s->inUse[s->state_in_ch] = True;
+ switch (s->state_in_len) {
+ case 1:
+ s->block[s->nblock] = (UChar)ch; s->nblock++;
+ break;
+ case 2:
+ s->block[s->nblock] = (UChar)ch; s->nblock++;
+ s->block[s->nblock] = (UChar)ch; s->nblock++;
+ break;
+ case 3:
+ s->block[s->nblock] = (UChar)ch; s->nblock++;
+ s->block[s->nblock] = (UChar)ch; s->nblock++;
+ s->block[s->nblock] = (UChar)ch; s->nblock++;
+ break;
+ default:
+ s->inUse[s->state_in_len-4] = True;
+ s->block[s->nblock] = (UChar)ch; s->nblock++;
+ s->block[s->nblock] = (UChar)ch; s->nblock++;
+ s->block[s->nblock] = (UChar)ch; s->nblock++;
+ s->block[s->nblock] = (UChar)ch; s->nblock++;
+ s->block[s->nblock] = ((UChar)(s->state_in_len-4));
+ s->nblock++;
+ break;
+ }
+}
+
+
+/*---------------------------------------------------*/
+static
+void flush_RL ( EState* s )
+{
+ if (s->state_in_ch < 256) add_pair_to_block ( s );
+ init_RL ( s );
+}
+
+
+/*---------------------------------------------------*/
+#define ADD_CHAR_TO_BLOCK(zs,zchh0) \
+{ \
+ UInt32 zchh = (UInt32)(zchh0); \
+ /*-- fast track the common case --*/ \
+ if (zchh != zs->state_in_ch && \
+ zs->state_in_len == 1) { \
+ UChar ch = (UChar)(zs->state_in_ch); \
+ BZ_UPDATE_CRC( zs->blockCRC, ch ); \
+ zs->inUse[zs->state_in_ch] = True; \
+ zs->block[zs->nblock] = (UChar)ch; \
+ zs->nblock++; \
+ zs->state_in_ch = zchh; \
+ } \
+ else \
+ /*-- general, uncommon cases --*/ \
+ if (zchh != zs->state_in_ch || \
+ zs->state_in_len == 255) { \
+ if (zs->state_in_ch < 256) \
+ add_pair_to_block ( zs ); \
+ zs->state_in_ch = zchh; \
+ zs->state_in_len = 1; \
+ } else { \
+ zs->state_in_len++; \
+ } \
+}
+
+
+/*---------------------------------------------------*/
+static
+Bool copy_input_until_stop ( EState* s )
+{
+ Bool progress_in = False;
+
+ if (s->mode == BZ_M_RUNNING) {
+
+ /*-- fast track the common case --*/
+ while (True) {
+ /*-- block full? --*/
+ if (s->nblock >= s->nblockMAX) break;
+ /*-- no input? --*/
+ if (s->strm->avail_in == 0) break;
+ progress_in = True;
+ ADD_CHAR_TO_BLOCK ( s, (UInt32)(*((UChar*)(s->strm->next_in))) );
+ s->strm->next_in++;
+ s->strm->avail_in--;
+ s->strm->total_in_lo32++;
+ if (s->strm->total_in_lo32 == 0) s->strm->total_in_hi32++;
+ }
+
+ } else {
+
+ /*-- general, uncommon case --*/
+ while (True) {
+ /*-- block full? --*/
+ if (s->nblock >= s->nblockMAX) break;
+ /*-- no input? --*/
+ if (s->strm->avail_in == 0) break;
+ /*-- flush/finish end? --*/
+ if (s->avail_in_expect == 0) break;
+ progress_in = True;
+ ADD_CHAR_TO_BLOCK ( s, (UInt32)(*((UChar*)(s->strm->next_in))) );
+ s->strm->next_in++;
+ s->strm->avail_in--;
+ s->strm->total_in_lo32++;
+ if (s->strm->total_in_lo32 == 0) s->strm->total_in_hi32++;
+ s->avail_in_expect--;
+ }
+ }
+ return progress_in;
+}
+
+
+/*---------------------------------------------------*/
+static
+Bool copy_output_until_stop ( EState* s )
+{
+ Bool progress_out = False;
+
+ while (True) {
+
+ /*-- no output space? --*/
+ if (s->strm->avail_out == 0) break;
+
+ /*-- block done? --*/
+ if (s->state_out_pos >= s->numZ) break;
+
+ progress_out = True;
+ *(s->strm->next_out) = s->zbits[s->state_out_pos];
+ s->state_out_pos++;
+ s->strm->avail_out--;
+ s->strm->next_out++;
+ s->strm->total_out_lo32++;
+ if (s->strm->total_out_lo32 == 0) s->strm->total_out_hi32++;
+ }
+
+ return progress_out;
+}
+
+
+/*---------------------------------------------------*/
+static
+Bool handle_compress ( bz_stream* strm )
+{
+ Bool progress_in = False;
+ Bool progress_out = False;
+ EState* s = strm->state;
+
+ while (True) {
+
+ if (s->state == BZ_S_OUTPUT) {
+ progress_out |= copy_output_until_stop ( s );
+ if (s->state_out_pos < s->numZ) break;
+ if (s->mode == BZ_M_FINISHING &&
+ s->avail_in_expect == 0 &&
+ isempty_RL(s)) break;
+ prepare_new_block ( s );
+ s->state = BZ_S_INPUT;
+ if (s->mode == BZ_M_FLUSHING &&
+ s->avail_in_expect == 0 &&
+ isempty_RL(s)) break;
+ }
+
+ if (s->state == BZ_S_INPUT) {
+ progress_in |= copy_input_until_stop ( s );
+ if (s->mode != BZ_M_RUNNING && s->avail_in_expect == 0) {
+ flush_RL ( s );
+ BZ2_compressBlock ( s, (Bool)(s->mode == BZ_M_FINISHING) );
+ s->state = BZ_S_OUTPUT;
+ }
+ else
+ if (s->nblock >= s->nblockMAX) {
+ BZ2_compressBlock ( s, False );
+ s->state = BZ_S_OUTPUT;
+ }
+ else
+ if (s->strm->avail_in == 0) {
+ break;
+ }
+ }
+
+ }
+
+ return progress_in || progress_out;
+}
+
+
+/*---------------------------------------------------*/
+int BZ_API(BZ2_bzCompress) ( bz_stream *strm, int action )
+{
+ Bool progress;
+ EState* s;
+ if (strm == NULL) return BZ_PARAM_ERROR;
+ s = strm->state;
+ if (s == NULL) return BZ_PARAM_ERROR;
+ if (s->strm != strm) return BZ_PARAM_ERROR;
+
+ preswitch:
+ switch (s->mode) {
+
+ case BZ_M_IDLE:
+ return BZ_SEQUENCE_ERROR;
+
+ case BZ_M_RUNNING:
+ if (action == BZ_RUN) {
+ progress = handle_compress ( strm );
+ return progress ? BZ_RUN_OK : BZ_PARAM_ERROR;
+ }
+ else
+ if (action == BZ_FLUSH) {
+ s->avail_in_expect = strm->avail_in;
+ s->mode = BZ_M_FLUSHING;
+ goto preswitch;
+ }
+ else
+ if (action == BZ_FINISH) {
+ s->avail_in_expect = strm->avail_in;
+ s->mode = BZ_M_FINISHING;
+ goto preswitch;
+ }
+ else
+ return BZ_PARAM_ERROR;
+
+ case BZ_M_FLUSHING:
+ if (action != BZ_FLUSH) return BZ_SEQUENCE_ERROR;
+ if (s->avail_in_expect != s->strm->avail_in)
+ return BZ_SEQUENCE_ERROR;
+ progress = handle_compress ( strm );
+ if (s->avail_in_expect > 0 || !isempty_RL(s) ||
+ s->state_out_pos < s->numZ) return BZ_FLUSH_OK;
+ s->mode = BZ_M_RUNNING;
+ return BZ_RUN_OK;
+
+ case BZ_M_FINISHING:
+ if (action != BZ_FINISH) return BZ_SEQUENCE_ERROR;
+ if (s->avail_in_expect != s->strm->avail_in)
+ return BZ_SEQUENCE_ERROR;
+ progress = handle_compress ( strm );
+ if (!progress) return BZ_SEQUENCE_ERROR;
+ if (s->avail_in_expect > 0 || !isempty_RL(s) ||
+ s->state_out_pos < s->numZ) return BZ_FINISH_OK;
+ s->mode = BZ_M_IDLE;
+ return BZ_STREAM_END;
+ }
+ return BZ_OK; /*--not reached--*/
+}
+
+
+/*---------------------------------------------------*/
+int BZ_API(BZ2_bzCompressEnd) ( bz_stream *strm )
+{
+ EState* s;
+ if (strm == NULL) return BZ_PARAM_ERROR;
+ s = strm->state;
+ if (s == NULL) return BZ_PARAM_ERROR;
+ if (s->strm != strm) return BZ_PARAM_ERROR;
+
+ if (s->arr1 != NULL) BZFREE(s->arr1);
+ if (s->arr2 != NULL) BZFREE(s->arr2);
+ if (s->ftab != NULL) BZFREE(s->ftab);
+ BZFREE(strm->state);
+
+ strm->state = NULL;
+
+ return BZ_OK;
+}
+
+
+/*---------------------------------------------------*/
+/*--- Decompression stuff ---*/
+/*---------------------------------------------------*/
+
+/*---------------------------------------------------*/
+int BZ_API(BZ2_bzDecompressInit)
+ ( bz_stream* strm,
+ int verbosity,
+ int small )
+{
+ DState* s;
+
+ if (!bz_config_ok()) return BZ_CONFIG_ERROR;
+
+ if (strm == NULL) return BZ_PARAM_ERROR;
+ if (small != 0 && small != 1) return BZ_PARAM_ERROR;
+ if (verbosity < 0 || verbosity > 4) return BZ_PARAM_ERROR;
+
+ if (strm->bzalloc == NULL) strm->bzalloc = default_bzalloc;
+ if (strm->bzfree == NULL) strm->bzfree = default_bzfree;
+
+ s = BZALLOC( sizeof(DState) );
+ if (s == NULL) return BZ_MEM_ERROR;
+ s->strm = strm;
+ strm->state = s;
+ s->state = BZ_X_MAGIC_1;
+ s->bsLive = 0;
+ s->bsBuff = 0;
+ s->calculatedCombinedCRC = 0;
+ strm->total_in_lo32 = 0;
+ strm->total_in_hi32 = 0;
+ strm->total_out_lo32 = 0;
+ strm->total_out_hi32 = 0;
+ s->smallDecompress = (Bool)small;
+ s->ll4 = NULL;
+ s->ll16 = NULL;
+ s->tt = NULL;
+ s->currBlockNo = 0;
+ s->verbosity = verbosity;
+
+ return BZ_OK;
+}
+
+
+/*---------------------------------------------------*/
+/* Return True iff data corruption is discovered.
+ Returns False if there is no problem.
+*/
+static
+Bool unRLE_obuf_to_output_FAST ( DState* s )
+{
+ UChar k1;
+
+ if (s->blockRandomised) {
+
+ while (True) {
+ /* try to finish existing run */
+ while (True) {
+ if (s->strm->avail_out == 0) return False;
+ if (s->state_out_len == 0) break;
+ *( (UChar*)(s->strm->next_out) ) = s->state_out_ch;
+ BZ_UPDATE_CRC ( s->calculatedBlockCRC, s->state_out_ch );
+ s->state_out_len--;
+ s->strm->next_out++;
+ s->strm->avail_out--;
+ s->strm->total_out_lo32++;
+ if (s->strm->total_out_lo32 == 0) s->strm->total_out_hi32++;
+ }
+
+ /* can a new run be started? */
+ if (s->nblock_used == s->save_nblock+1) return False;
+
+ /* Only caused by corrupt data stream? */
+ if (s->nblock_used > s->save_nblock+1)
+ return True;
+
+ s->state_out_len = 1;
+ s->state_out_ch = s->k0;
+ BZ_GET_FAST(k1); BZ_RAND_UPD_MASK;
+ k1 ^= BZ_RAND_MASK; s->nblock_used++;
+ if (s->nblock_used == s->save_nblock+1) continue;
+ if (k1 != s->k0) { s->k0 = k1; continue; };
+
+ s->state_out_len = 2;
+ BZ_GET_FAST(k1); BZ_RAND_UPD_MASK;
+ k1 ^= BZ_RAND_MASK; s->nblock_used++;
+ if (s->nblock_used == s->save_nblock+1) continue;
+ if (k1 != s->k0) { s->k0 = k1; continue; };
+
+ s->state_out_len = 3;
+ BZ_GET_FAST(k1); BZ_RAND_UPD_MASK;
+ k1 ^= BZ_RAND_MASK; s->nblock_used++;
+ if (s->nblock_used == s->save_nblock+1) continue;
+ if (k1 != s->k0) { s->k0 = k1; continue; };
+
+ BZ_GET_FAST(k1); BZ_RAND_UPD_MASK;
+ k1 ^= BZ_RAND_MASK; s->nblock_used++;
+ s->state_out_len = ((Int32)k1) + 4;
+ BZ_GET_FAST(s->k0); BZ_RAND_UPD_MASK;
+ s->k0 ^= BZ_RAND_MASK; s->nblock_used++;
+ }
+
+ } else {
+
+ /* restore */
+ UInt32 c_calculatedBlockCRC = s->calculatedBlockCRC;
+ UChar c_state_out_ch = s->state_out_ch;
+ Int32 c_state_out_len = s->state_out_len;
+ Int32 c_nblock_used = s->nblock_used;
+ Int32 c_k0 = s->k0;
+ UInt32* c_tt = s->tt;
+ UInt32 c_tPos = s->tPos;
+ char* cs_next_out = s->strm->next_out;
+ unsigned int cs_avail_out = s->strm->avail_out;
+ Int32 ro_blockSize100k = s->blockSize100k;
+ /* end restore */
+
+ UInt32 avail_out_INIT = cs_avail_out;
+ Int32 s_save_nblockPP = s->save_nblock+1;
+ unsigned int total_out_lo32_old;
+
+ while (True) {
+
+ /* try to finish existing run */
+ if (c_state_out_len > 0) {
+ while (True) {
+ if (cs_avail_out == 0) goto return_notr;
+ if (c_state_out_len == 1) break;
+ *( (UChar*)(cs_next_out) ) = c_state_out_ch;
+ BZ_UPDATE_CRC ( c_calculatedBlockCRC, c_state_out_ch );
+ c_state_out_len--;
+ cs_next_out++;
+ cs_avail_out--;
+ }
+ s_state_out_len_eq_one:
+ {
+ if (cs_avail_out == 0) {
+ c_state_out_len = 1; goto return_notr;
+ };
+ *( (UChar*)(cs_next_out) ) = c_state_out_ch;
+ BZ_UPDATE_CRC ( c_calculatedBlockCRC, c_state_out_ch );
+ cs_next_out++;
+ cs_avail_out--;
+ }
+ }
+ /* Only caused by corrupt data stream? */
+ if (c_nblock_used > s_save_nblockPP)
+ return True;
+
+ /* can a new run be started? */
+ if (c_nblock_used == s_save_nblockPP) {
+ c_state_out_len = 0; goto return_notr;
+ };
+ c_state_out_ch = c_k0;
+ BZ_GET_FAST_C(k1); c_nblock_used++;
+ if (k1 != c_k0) {
+ c_k0 = k1; goto s_state_out_len_eq_one;
+ };
+ if (c_nblock_used == s_save_nblockPP)
+ goto s_state_out_len_eq_one;
+
+ c_state_out_len = 2;
+ BZ_GET_FAST_C(k1); c_nblock_used++;
+ if (c_nblock_used == s_save_nblockPP) continue;
+ if (k1 != c_k0) { c_k0 = k1; continue; };
+
+ c_state_out_len = 3;
+ BZ_GET_FAST_C(k1); c_nblock_used++;
+ if (c_nblock_used == s_save_nblockPP) continue;
+ if (k1 != c_k0) { c_k0 = k1; continue; };
+
+ BZ_GET_FAST_C(k1); c_nblock_used++;
+ c_state_out_len = ((Int32)k1) + 4;
+ BZ_GET_FAST_C(c_k0); c_nblock_used++;
+ }
+
+ return_notr:
+ total_out_lo32_old = s->strm->total_out_lo32;
+ s->strm->total_out_lo32 += (avail_out_INIT - cs_avail_out);
+ if (s->strm->total_out_lo32 < total_out_lo32_old)
+ s->strm->total_out_hi32++;
+
+ /* save */
+ s->calculatedBlockCRC = c_calculatedBlockCRC;
+ s->state_out_ch = c_state_out_ch;
+ s->state_out_len = c_state_out_len;
+ s->nblock_used = c_nblock_used;
+ s->k0 = c_k0;
+ s->tt = c_tt;
+ s->tPos = c_tPos;
+ s->strm->next_out = cs_next_out;
+ s->strm->avail_out = cs_avail_out;
+ /* end save */
+ }
+ return False;
+}
+
+
+
+/*---------------------------------------------------*/
+__inline__ Int32 BZ2_indexIntoF ( Int32 indx, Int32 *cftab )
+{
+ Int32 nb, na, mid;
+ nb = 0;
+ na = 256;
+ do {
+ mid = (nb + na) >> 1;
+ if (indx >= cftab[mid]) nb = mid; else na = mid;
+ }
+ while (na - nb != 1);
+ return nb;
+}
+
+
+/*---------------------------------------------------*/
+/* Return True iff data corruption is discovered.
+ Returns False if there is no problem.
+*/
+static
+Bool unRLE_obuf_to_output_SMALL ( DState* s )
+{
+ UChar k1;
+
+ if (s->blockRandomised) {
+
+ while (True) {
+ /* try to finish existing run */
+ while (True) {
+ if (s->strm->avail_out == 0) return False;
+ if (s->state_out_len == 0) break;
+ *( (UChar*)(s->strm->next_out) ) = s->state_out_ch;
+ BZ_UPDATE_CRC ( s->calculatedBlockCRC, s->state_out_ch );
+ s->state_out_len--;
+ s->strm->next_out++;
+ s->strm->avail_out--;
+ s->strm->total_out_lo32++;
+ if (s->strm->total_out_lo32 == 0) s->strm->total_out_hi32++;
+ }
+
+ /* can a new run be started? */
+ if (s->nblock_used == s->save_nblock+1) return False;
+
+ /* Only caused by corrupt data stream? */
+ if (s->nblock_used > s->save_nblock+1)
+ return True;
+
+ s->state_out_len = 1;
+ s->state_out_ch = s->k0;
+ BZ_GET_SMALL(k1); BZ_RAND_UPD_MASK;
+ k1 ^= BZ_RAND_MASK; s->nblock_used++;
+ if (s->nblock_used == s->save_nblock+1) continue;
+ if (k1 != s->k0) { s->k0 = k1; continue; };
+
+ s->state_out_len = 2;
+ BZ_GET_SMALL(k1); BZ_RAND_UPD_MASK;
+ k1 ^= BZ_RAND_MASK; s->nblock_used++;
+ if (s->nblock_used == s->save_nblock+1) continue;
+ if (k1 != s->k0) { s->k0 = k1; continue; };
+
+ s->state_out_len = 3;
+ BZ_GET_SMALL(k1); BZ_RAND_UPD_MASK;
+ k1 ^= BZ_RAND_MASK; s->nblock_used++;
+ if (s->nblock_used == s->save_nblock+1) continue;
+ if (k1 != s->k0) { s->k0 = k1; continue; };
+
+ BZ_GET_SMALL(k1); BZ_RAND_UPD_MASK;
+ k1 ^= BZ_RAND_MASK; s->nblock_used++;
+ s->state_out_len = ((Int32)k1) + 4;
+ BZ_GET_SMALL(s->k0); BZ_RAND_UPD_MASK;
+ s->k0 ^= BZ_RAND_MASK; s->nblock_used++;
+ }
+
+ } else {
+
+ while (True) {
+ /* try to finish existing run */
+ while (True) {
+ if (s->strm->avail_out == 0) return False;
+ if (s->state_out_len == 0) break;
+ *( (UChar*)(s->strm->next_out) ) = s->state_out_ch;
+ BZ_UPDATE_CRC ( s->calculatedBlockCRC, s->state_out_ch );
+ s->state_out_len--;
+ s->strm->next_out++;
+ s->strm->avail_out--;
+ s->strm->total_out_lo32++;
+ if (s->strm->total_out_lo32 == 0) s->strm->total_out_hi32++;
+ }
+
+ /* can a new run be started? */
+ if (s->nblock_used == s->save_nblock+1) return False;
+
+ /* Only caused by corrupt data stream? */
+ if (s->nblock_used > s->save_nblock+1)
+ return True;
+
+ s->state_out_len = 1;
+ s->state_out_ch = s->k0;
+ BZ_GET_SMALL(k1); s->nblock_used++;
+ if (s->nblock_used == s->save_nblock+1) continue;
+ if (k1 != s->k0) { s->k0 = k1; continue; };
+
+ s->state_out_len = 2;
+ BZ_GET_SMALL(k1); s->nblock_used++;
+ if (s->nblock_used == s->save_nblock+1) continue;
+ if (k1 != s->k0) { s->k0 = k1; continue; };
+
+ s->state_out_len = 3;
+ BZ_GET_SMALL(k1); s->nblock_used++;
+ if (s->nblock_used == s->save_nblock+1) continue;
+ if (k1 != s->k0) { s->k0 = k1; continue; };
+
+ BZ_GET_SMALL(k1); s->nblock_used++;
+ s->state_out_len = ((Int32)k1) + 4;
+ BZ_GET_SMALL(s->k0); s->nblock_used++;
+ }
+
+ }
+}
+
+
+/*---------------------------------------------------*/
+int BZ_API(BZ2_bzDecompress) ( bz_stream *strm )
+{
+ Bool corrupt;
+ DState* s;
+ if (strm == NULL) return BZ_PARAM_ERROR;
+ s = strm->state;
+ if (s == NULL) return BZ_PARAM_ERROR;
+ if (s->strm != strm) return BZ_PARAM_ERROR;
+
+ while (True) {
+ if (s->state == BZ_X_IDLE) return BZ_SEQUENCE_ERROR;
+ if (s->state == BZ_X_OUTPUT) {
+ if (s->smallDecompress)
+ corrupt = unRLE_obuf_to_output_SMALL ( s ); else
+ corrupt = unRLE_obuf_to_output_FAST ( s );
+ if (corrupt) return BZ_DATA_ERROR;
+ if (s->nblock_used == s->save_nblock+1 && s->state_out_len == 0) {
+ BZ_FINALISE_CRC ( s->calculatedBlockCRC );
+ if (s->verbosity >= 3)
+ VPrintf2 ( " {0x%08x, 0x%08x}", s->storedBlockCRC,
+ s->calculatedBlockCRC );
+ if (s->verbosity >= 2) VPrintf0 ( "]" );
+ if (s->calculatedBlockCRC != s->storedBlockCRC)
+ return BZ_DATA_ERROR;
+ s->calculatedCombinedCRC
+ = (s->calculatedCombinedCRC << 1) |
+ (s->calculatedCombinedCRC >> 31);
+ s->calculatedCombinedCRC ^= s->calculatedBlockCRC;
+ s->state = BZ_X_BLKHDR_1;
+ } else {
+ return BZ_OK;
+ }
+ }
+ if (s->state >= BZ_X_MAGIC_1) {
+ Int32 r = BZ2_decompress ( s );
+ if (r == BZ_STREAM_END) {
+ if (s->verbosity >= 3)
+ VPrintf2 ( "\n combined CRCs: stored = 0x%08x, computed = 0x%08x",
+ s->storedCombinedCRC, s->calculatedCombinedCRC );
+ if (s->calculatedCombinedCRC != s->storedCombinedCRC)
+ return BZ_DATA_ERROR;
+ return r;
+ }
+ if (s->state != BZ_X_OUTPUT) return r;
+ }
+ }
+
+ AssertH ( 0, 6001 );
+
+ return 0; /*NOTREACHED*/
+}
+
+
+/*---------------------------------------------------*/
+int BZ_API(BZ2_bzDecompressEnd) ( bz_stream *strm )
+{
+ DState* s;
+ if (strm == NULL) return BZ_PARAM_ERROR;
+ s = strm->state;
+ if (s == NULL) return BZ_PARAM_ERROR;
+ if (s->strm != strm) return BZ_PARAM_ERROR;
+
+ if (s->tt != NULL) BZFREE(s->tt);
+ if (s->ll16 != NULL) BZFREE(s->ll16);
+ if (s->ll4 != NULL) BZFREE(s->ll4);
+
+ BZFREE(strm->state);
+ strm->state = NULL;
+
+ return BZ_OK;
+}
+
+
+#ifndef BZ_NO_STDIO
+/*---------------------------------------------------*/
+/*--- File I/O stuff ---*/
+/*---------------------------------------------------*/
+
+#define BZ_SETERR(eee) \
+{ \
+ if (bzerror != NULL) *bzerror = eee; \
+ if (bzf != NULL) bzf->lastErr = eee; \
+}
+
+typedef
+ struct {
+ FILE* handle;
+ Char buf[BZ_MAX_UNUSED];
+ Int32 bufN;
+ Bool writing;
+ bz_stream strm;
+ Int32 lastErr;
+ Bool initialisedOk;
+ }
+ bzFile;
+
+
+/*---------------------------------------------*/
+static Bool myfeof ( FILE* f )
+{
+ Int32 c = fgetc ( f );
+ if (c == EOF) return True;
+ ungetc ( c, f );
+ return False;
+}
+
+
+/*---------------------------------------------------*/
+BZFILE* BZ_API(BZ2_bzWriteOpen)
+ ( int* bzerror,
+ FILE* f,
+ int blockSize100k,
+ int verbosity,
+ int workFactor )
+{
+ Int32 ret;
+ bzFile* bzf = NULL;
+
+ BZ_SETERR(BZ_OK);
+
+ if (f == NULL ||
+ (blockSize100k < 1 || blockSize100k > 9) ||
+ (workFactor < 0 || workFactor > 250) ||
+ (verbosity < 0 || verbosity > 4))
+ { BZ_SETERR(BZ_PARAM_ERROR); return NULL; };
+
+ if (ferror(f))
+ { BZ_SETERR(BZ_IO_ERROR); return NULL; };
+
+ bzf = malloc ( sizeof(bzFile) );
+ if (bzf == NULL)
+ { BZ_SETERR(BZ_MEM_ERROR); return NULL; };
+
+ BZ_SETERR(BZ_OK);
+ bzf->initialisedOk = False;
+ bzf->bufN = 0;
+ bzf->handle = f;
+ bzf->writing = True;
+ bzf->strm.bzalloc = NULL;
+ bzf->strm.bzfree = NULL;
+ bzf->strm.opaque = NULL;
+
+ if (workFactor == 0) workFactor = 30;
+ ret = BZ2_bzCompressInit ( &(bzf->strm), blockSize100k,
+ verbosity, workFactor );
+ if (ret != BZ_OK)
+ { BZ_SETERR(ret); free(bzf); return NULL; };
+
+ bzf->strm.avail_in = 0;
+ bzf->initialisedOk = True;
+ return bzf;
+}
+
+
+
+/*---------------------------------------------------*/
+void BZ_API(BZ2_bzWrite)
+ ( int* bzerror,
+ BZFILE* b,
+ void* buf,
+ int len )
+{
+ Int32 n, n2, ret;
+ bzFile* bzf = (bzFile*)b;
+
+ BZ_SETERR(BZ_OK);
+ if (bzf == NULL || buf == NULL || len < 0)
+ { BZ_SETERR(BZ_PARAM_ERROR); return; };
+ if (!(bzf->writing))
+ { BZ_SETERR(BZ_SEQUENCE_ERROR); return; };
+ if (ferror(bzf->handle))
+ { BZ_SETERR(BZ_IO_ERROR); return; };
+
+ if (len == 0)
+ { BZ_SETERR(BZ_OK); return; };
+
+ bzf->strm.avail_in = len;
+ bzf->strm.next_in = buf;
+
+ while (True) {
+ bzf->strm.avail_out = BZ_MAX_UNUSED;
+ bzf->strm.next_out = bzf->buf;
+ ret = BZ2_bzCompress ( &(bzf->strm), BZ_RUN );
+ if (ret != BZ_RUN_OK)
+ { BZ_SETERR(ret); return; };
+
+ if (bzf->strm.avail_out < BZ_MAX_UNUSED) {
+ n = BZ_MAX_UNUSED - bzf->strm.avail_out;
+ n2 = fwrite ( (void*)(bzf->buf), sizeof(UChar),
+ n, bzf->handle );
+ if (n != n2 || ferror(bzf->handle))
+ { BZ_SETERR(BZ_IO_ERROR); return; };
+ }
+
+ if (bzf->strm.avail_in == 0)
+ { BZ_SETERR(BZ_OK); return; };
+ }
+}
+
+
+/*---------------------------------------------------*/
+void BZ_API(BZ2_bzWriteClose)
+ ( int* bzerror,
+ BZFILE* b,
+ int abandon,
+ unsigned int* nbytes_in,
+ unsigned int* nbytes_out )
+{
+ BZ2_bzWriteClose64 ( bzerror, b, abandon,
+ nbytes_in, NULL, nbytes_out, NULL );
+}
+
+
+void BZ_API(BZ2_bzWriteClose64)
+ ( int* bzerror,
+ BZFILE* b,
+ int abandon,
+ unsigned int* nbytes_in_lo32,
+ unsigned int* nbytes_in_hi32,
+ unsigned int* nbytes_out_lo32,
+ unsigned int* nbytes_out_hi32 )
+{
+ Int32 n, n2, ret;
+ bzFile* bzf = (bzFile*)b;
+
+ if (bzf == NULL)
+ { BZ_SETERR(BZ_OK); return; };
+ if (!(bzf->writing))
+ { BZ_SETERR(BZ_SEQUENCE_ERROR); return; };
+ if (ferror(bzf->handle))
+ { BZ_SETERR(BZ_IO_ERROR); return; };
+
+ if (nbytes_in_lo32 != NULL) *nbytes_in_lo32 = 0;
+ if (nbytes_in_hi32 != NULL) *nbytes_in_hi32 = 0;
+ if (nbytes_out_lo32 != NULL) *nbytes_out_lo32 = 0;
+ if (nbytes_out_hi32 != NULL) *nbytes_out_hi32 = 0;
+
+ if ((!abandon) && bzf->lastErr == BZ_OK) {
+ while (True) {
+ bzf->strm.avail_out = BZ_MAX_UNUSED;
+ bzf->strm.next_out = bzf->buf;
+ ret = BZ2_bzCompress ( &(bzf->strm), BZ_FINISH );
+ if (ret != BZ_FINISH_OK && ret != BZ_STREAM_END)
+ { BZ_SETERR(ret); return; };
+
+ if (bzf->strm.avail_out < BZ_MAX_UNUSED) {
+ n = BZ_MAX_UNUSED - bzf->strm.avail_out;
+ n2 = fwrite ( (void*)(bzf->buf), sizeof(UChar),
+ n, bzf->handle );
+ if (n != n2 || ferror(bzf->handle))
+ { BZ_SETERR(BZ_IO_ERROR); return; };
+ }
+
+ if (ret == BZ_STREAM_END) break;
+ }
+ }
+
+ if ( !abandon && !ferror ( bzf->handle ) ) {
+ fflush ( bzf->handle );
+ if (ferror(bzf->handle))
+ { BZ_SETERR(BZ_IO_ERROR); return; };
+ }
+
+ if (nbytes_in_lo32 != NULL)
+ *nbytes_in_lo32 = bzf->strm.total_in_lo32;
+ if (nbytes_in_hi32 != NULL)
+ *nbytes_in_hi32 = bzf->strm.total_in_hi32;
+ if (nbytes_out_lo32 != NULL)
+ *nbytes_out_lo32 = bzf->strm.total_out_lo32;
+ if (nbytes_out_hi32 != NULL)
+ *nbytes_out_hi32 = bzf->strm.total_out_hi32;
+
+ BZ_SETERR(BZ_OK);
+ BZ2_bzCompressEnd ( &(bzf->strm) );
+ free ( bzf );
+}
+
+
+/*---------------------------------------------------*/
+BZFILE* BZ_API(BZ2_bzReadOpen)
+ ( int* bzerror,
+ FILE* f,
+ int verbosity,
+ int small,
+ void* unused,
+ int nUnused )
+{
+ bzFile* bzf = NULL;
+ int ret;
+
+ BZ_SETERR(BZ_OK);
+
+ if (f == NULL ||
+ (small != 0 && small != 1) ||
+ (verbosity < 0 || verbosity > 4) ||
+ (unused == NULL && nUnused != 0) ||
+ (unused != NULL && (nUnused < 0 || nUnused > BZ_MAX_UNUSED)))
+ { BZ_SETERR(BZ_PARAM_ERROR); return NULL; };
+
+ if (ferror(f))
+ { BZ_SETERR(BZ_IO_ERROR); return NULL; };
+
+ bzf = malloc ( sizeof(bzFile) );
+ if (bzf == NULL)
+ { BZ_SETERR(BZ_MEM_ERROR); return NULL; };
+
+ BZ_SETERR(BZ_OK);
+
+ bzf->initialisedOk = False;
+ bzf->handle = f;
+ bzf->bufN = 0;
+ bzf->writing = False;
+ bzf->strm.bzalloc = NULL;
+ bzf->strm.bzfree = NULL;
+ bzf->strm.opaque = NULL;
+
+ while (nUnused > 0) {
+ bzf->buf[bzf->bufN] = *((UChar*)(unused)); bzf->bufN++;
+ unused = ((void*)( 1 + ((UChar*)(unused)) ));
+ nUnused--;
+ }
+
+ ret = BZ2_bzDecompressInit ( &(bzf->strm), verbosity, small );
+ if (ret != BZ_OK)
+ { BZ_SETERR(ret); free(bzf); return NULL; };
+
+ bzf->strm.avail_in = bzf->bufN;
+ bzf->strm.next_in = bzf->buf;
+
+ bzf->initialisedOk = True;
+ return bzf;
+}
+
+
+/*---------------------------------------------------*/
+void BZ_API(BZ2_bzReadClose) ( int *bzerror, BZFILE *b )
+{
+ bzFile* bzf = (bzFile*)b;
+
+ BZ_SETERR(BZ_OK);
+ if (bzf == NULL)
+ { BZ_SETERR(BZ_OK); return; };
+
+ if (bzf->writing)
+ { BZ_SETERR(BZ_SEQUENCE_ERROR); return; };
+
+ if (bzf->initialisedOk)
+ (void)BZ2_bzDecompressEnd ( &(bzf->strm) );
+ free ( bzf );
+}
+
+
+/*---------------------------------------------------*/
+int BZ_API(BZ2_bzRead)
+ ( int* bzerror,
+ BZFILE* b,
+ void* buf,
+ int len )
+{
+ Int32 n, ret;
+ bzFile* bzf = (bzFile*)b;
+
+ BZ_SETERR(BZ_OK);
+
+ if (bzf == NULL || buf == NULL || len < 0)
+ { BZ_SETERR(BZ_PARAM_ERROR); return 0; };
+
+ if (bzf->writing)
+ { BZ_SETERR(BZ_SEQUENCE_ERROR); return 0; };
+
+ if (len == 0)
+ { BZ_SETERR(BZ_OK); return 0; };
+
+ bzf->strm.avail_out = len;
+ bzf->strm.next_out = buf;
+
+ while (True) {
+
+ if (ferror(bzf->handle))
+ { BZ_SETERR(BZ_IO_ERROR); return 0; };
+
+ if (bzf->strm.avail_in == 0 && !myfeof(bzf->handle)) {
+ n = fread ( bzf->buf, sizeof(UChar),
+ BZ_MAX_UNUSED, bzf->handle );
+ if (ferror(bzf->handle))
+ { BZ_SETERR(BZ_IO_ERROR); return 0; };
+ bzf->bufN = n;
+ bzf->strm.avail_in = bzf->bufN;
+ bzf->strm.next_in = bzf->buf;
+ }
+
+ ret = BZ2_bzDecompress ( &(bzf->strm) );
+
+ if (ret != BZ_OK && ret != BZ_STREAM_END)
+ { BZ_SETERR(ret); return 0; };
+
+ if (ret == BZ_OK && myfeof(bzf->handle) &&
+ bzf->strm.avail_in == 0 && bzf->strm.avail_out > 0)
+ { BZ_SETERR(BZ_UNEXPECTED_EOF); return 0; };
+
+ if (ret == BZ_STREAM_END)
+ { BZ_SETERR(BZ_STREAM_END);
+ return len - bzf->strm.avail_out; };
+ if (bzf->strm.avail_out == 0)
+ { BZ_SETERR(BZ_OK); return len; };
+
+ }
+
+ return 0; /*not reached*/
+}
+
+
+/*---------------------------------------------------*/
+void BZ_API(BZ2_bzReadGetUnused)
+ ( int* bzerror,
+ BZFILE* b,
+ void** unused,
+ int* nUnused )
+{
+ bzFile* bzf = (bzFile*)b;
+ if (bzf == NULL)
+ { BZ_SETERR(BZ_PARAM_ERROR); return; };
+ if (bzf->lastErr != BZ_STREAM_END)
+ { BZ_SETERR(BZ_SEQUENCE_ERROR); return; };
+ if (unused == NULL || nUnused == NULL)
+ { BZ_SETERR(BZ_PARAM_ERROR); return; };
+
+ BZ_SETERR(BZ_OK);
+ *nUnused = bzf->strm.avail_in;
+ *unused = bzf->strm.next_in;
+}
+#endif
+
+
+/*---------------------------------------------------*/
+/*--- Misc convenience stuff ---*/
+/*---------------------------------------------------*/
+
+/*---------------------------------------------------*/
+int BZ_API(BZ2_bzBuffToBuffCompress)
+ ( char* dest,
+ unsigned int* destLen,
+ char* source,
+ unsigned int sourceLen,
+ int blockSize100k,
+ int verbosity,
+ int workFactor )
+{
+ bz_stream strm;
+ int ret;
+
+ if (dest == NULL || destLen == NULL ||
+ source == NULL ||
+ blockSize100k < 1 || blockSize100k > 9 ||
+ verbosity < 0 || verbosity > 4 ||
+ workFactor < 0 || workFactor > 250)
+ return BZ_PARAM_ERROR;
+
+ if (workFactor == 0) workFactor = 30;
+ strm.bzalloc = NULL;
+ strm.bzfree = NULL;
+ strm.opaque = NULL;
+ ret = BZ2_bzCompressInit ( &strm, blockSize100k,
+ verbosity, workFactor );
+ if (ret != BZ_OK) return ret;
+
+ strm.next_in = source;
+ strm.next_out = dest;
+ strm.avail_in = sourceLen;
+ strm.avail_out = *destLen;
+
+ ret = BZ2_bzCompress ( &strm, BZ_FINISH );
+ if (ret == BZ_FINISH_OK) goto output_overflow;
+ if (ret != BZ_STREAM_END) goto errhandler;
+
+ /* normal termination */
+ *destLen -= strm.avail_out;
+ BZ2_bzCompressEnd ( &strm );
+ return BZ_OK;
+
+ output_overflow:
+ BZ2_bzCompressEnd ( &strm );
+ return BZ_OUTBUFF_FULL;
+
+ errhandler:
+ BZ2_bzCompressEnd ( &strm );
+ return ret;
+}
+
+
+/*---------------------------------------------------*/
+int BZ_API(BZ2_bzBuffToBuffDecompress)
+ ( char* dest,
+ unsigned int* destLen,
+ char* source,
+ unsigned int sourceLen,
+ int small,
+ int verbosity )
+{
+ bz_stream strm;
+ int ret;
+
+ if (dest == NULL || destLen == NULL ||
+ source == NULL ||
+ (small != 0 && small != 1) ||
+ verbosity < 0 || verbosity > 4)
+ return BZ_PARAM_ERROR;
+
+ strm.bzalloc = NULL;
+ strm.bzfree = NULL;
+ strm.opaque = NULL;
+ ret = BZ2_bzDecompressInit ( &strm, verbosity, small );
+ if (ret != BZ_OK) return ret;
+
+ strm.next_in = source;
+ strm.next_out = dest;
+ strm.avail_in = sourceLen;
+ strm.avail_out = *destLen;
+
+ ret = BZ2_bzDecompress ( &strm );
+ if (ret == BZ_OK) goto output_overflow_or_eof;
+ if (ret != BZ_STREAM_END) goto errhandler;
+
+ /* normal termination */
+ *destLen -= strm.avail_out;
+ BZ2_bzDecompressEnd ( &strm );
+ return BZ_OK;
+
+ output_overflow_or_eof:
+ if (strm.avail_out > 0) {
+ BZ2_bzDecompressEnd ( &strm );
+ return BZ_UNEXPECTED_EOF;
+ } else {
+ BZ2_bzDecompressEnd ( &strm );
+ return BZ_OUTBUFF_FULL;
+ };
+
+ errhandler:
+ BZ2_bzDecompressEnd ( &strm );
+ return ret;
+}
+
+
+/*---------------------------------------------------*/
+/*--
+ Code contributed by Yoshioka Tsuneo (tsuneo@rr.iij4u.or.jp)
+ to support better zlib compatibility.
+ This code is not _officially_ part of libbzip2 (yet);
+ I haven't tested it, documented it, or considered the
+ threading-safeness of it.
+ If this code breaks, please contact both Yoshioka and me.
+--*/
+/*---------------------------------------------------*/
+
+/*---------------------------------------------------*/
+/*--
+ return version like "0.9.5d, 4-Sept-1999".
+--*/
+const char * BZ_API(BZ2_bzlibVersion)(void)
+{
+ return BZ_VERSION;
+}
+
+
+#ifndef BZ_NO_STDIO
+/*---------------------------------------------------*/
+
+#if defined(_WIN32) || defined(OS2) || defined(MSDOS)
+# include <fcntl.h>
+# include <io.h>
+# define SET_BINARY_MODE(file) setmode(fileno(file),O_BINARY)
+#else
+# define SET_BINARY_MODE(file)
+#endif
+static
+BZFILE * bzopen_or_bzdopen
+ ( const char *path, /* no use when bzdopen */
+ int fd, /* no use when bzdopen */
+ const char *mode,
+ int open_mode) /* bzopen: 0, bzdopen:1 */
+{
+ int bzerr;
+ char unused[BZ_MAX_UNUSED];
+ int blockSize100k = 9;
+ int writing = 0;
+ char mode2[10] = "";
+ FILE *fp = NULL;
+ BZFILE *bzfp = NULL;
+ int verbosity = 0;
+ int workFactor = 30;
+ int smallMode = 0;
+ int nUnused = 0;
+
+ if (mode == NULL) return NULL;
+ while (*mode) {
+ switch (*mode) {
+ case 'r':
+ writing = 0; break;
+ case 'w':
+ writing = 1; break;
+ case 's':
+ smallMode = 1; break;
+ default:
+ if (isdigit((int)(*mode))) {
+ blockSize100k = *mode-BZ_HDR_0;
+ }
+ }
+ mode++;
+ }
+ strcat(mode2, writing ? "w" : "r" );
+ strcat(mode2,"b"); /* binary mode */
+
+ if (open_mode==0) {
+ if (path==NULL || strcmp(path,"")==0) {
+ fp = (writing ? stdout : stdin);
+ SET_BINARY_MODE(fp);
+ } else {
+ fp = fopen(path,mode2);
+ }
+ } else {
+#ifdef BZ_STRICT_ANSI
+ fp = NULL;
+#else
+ fp = fdopen(fd,mode2);
+#endif
+ }
+ if (fp == NULL) return NULL;
+
+ if (writing) {
+ /* Guard against total chaos and anarchy -- JRS */
+ if (blockSize100k < 1) blockSize100k = 1;
+ if (blockSize100k > 9) blockSize100k = 9;
+ bzfp = BZ2_bzWriteOpen(&bzerr,fp,blockSize100k,
+ verbosity,workFactor);
+ } else {
+ bzfp = BZ2_bzReadOpen(&bzerr,fp,verbosity,smallMode,
+ unused,nUnused);
+ }
+ if (bzfp == NULL) {
+ if (fp != stdin && fp != stdout) fclose(fp);
+ return NULL;
+ }
+ return bzfp;
+}
+
+
+/*---------------------------------------------------*/
+/*--
+ open file for read or write.
+ ex) bzopen("file","w9")
+ case path="" or NULL => use stdin or stdout.
+--*/
+BZFILE * BZ_API(BZ2_bzopen)
+ ( const char *path,
+ const char *mode )
+{
+ return bzopen_or_bzdopen(path,-1,mode,/*bzopen*/0);
+}
+
+
+/*---------------------------------------------------*/
+BZFILE * BZ_API(BZ2_bzdopen)
+ ( int fd,
+ const char *mode )
+{
+ return bzopen_or_bzdopen(NULL,fd,mode,/*bzdopen*/1);
+}
+
+
+/*---------------------------------------------------*/
+int BZ_API(BZ2_bzread) (BZFILE* b, void* buf, int len )
+{
+ int bzerr, nread;
+ if (((bzFile*)b)->lastErr == BZ_STREAM_END) return 0;
+ nread = BZ2_bzRead(&bzerr,b,buf,len);
+ if (bzerr == BZ_OK || bzerr == BZ_STREAM_END) {
+ return nread;
+ } else {
+ return -1;
+ }
+}
+
+
+/*---------------------------------------------------*/
+int BZ_API(BZ2_bzwrite) (BZFILE* b, void* buf, int len )
+{
+ int bzerr;
+
+ BZ2_bzWrite(&bzerr,b,buf,len);
+ if(bzerr == BZ_OK){
+ return len;
+ }else{
+ return -1;
+ }
+}
+
+
+/*---------------------------------------------------*/
+int BZ_API(BZ2_bzflush) (BZFILE *b)
+{
+ /* do nothing now... */
+ return 0;
+}
+
+
+/*---------------------------------------------------*/
+void BZ_API(BZ2_bzclose) (BZFILE* b)
+{
+ int bzerr;
+ FILE *fp;
+
+ if (b==NULL) {return;}
+ fp = ((bzFile *)b)->handle;
+ if(((bzFile*)b)->writing){
+ BZ2_bzWriteClose(&bzerr,b,0,NULL,NULL);
+ if(bzerr != BZ_OK){
+ BZ2_bzWriteClose(NULL,b,1,NULL,NULL);
+ }
+ }else{
+ BZ2_bzReadClose(&bzerr,b);
+ }
+ if(fp!=stdin && fp!=stdout){
+ fclose(fp);
+ }
+}
+
+
+/*---------------------------------------------------*/
+/*--
+ return last error code
+--*/
+static const char *bzerrorstrings[] = {
+ "OK"
+ ,"SEQUENCE_ERROR"
+ ,"PARAM_ERROR"
+ ,"MEM_ERROR"
+ ,"DATA_ERROR"
+ ,"DATA_ERROR_MAGIC"
+ ,"IO_ERROR"
+ ,"UNEXPECTED_EOF"
+ ,"OUTBUFF_FULL"
+ ,"CONFIG_ERROR"
+ ,"???" /* for future */
+ ,"???" /* for future */
+ ,"???" /* for future */
+ ,"???" /* for future */
+ ,"???" /* for future */
+ ,"???" /* for future */
+};
+
+
+const char * BZ_API(BZ2_bzerror) (BZFILE *b, int *errnum)
+{
+ int err = ((bzFile *)b)->lastErr;
+
+ if(err>0) err = 0;
+ *errnum = err;
+ return bzerrorstrings[err*-1];
+}
+#endif
+
+
+/*-------------------------------------------------------------*/
+/*--- end bzlib.c ---*/
+/*-------------------------------------------------------------*/
diff --git a/src/misc/bzlib/bzlib.h b/src/misc/bzlib/bzlib.h
new file mode 100644
index 00000000..798f248c
--- /dev/null
+++ b/src/misc/bzlib/bzlib.h
@@ -0,0 +1,286 @@
+
+/*-------------------------------------------------------------*/
+/*--- Public header file for the library. ---*/
+/*--- bzlib.h ---*/
+/*-------------------------------------------------------------*/
+
+/* ------------------------------------------------------------------
+ This file is part of bzip2/libbzip2, a program and library for
+ lossless, block-sorting data compression.
+
+ bzip2/libbzip2 version 1.0.5 of 10 December 2007
+ Copyright (C) 1996-2007 Julian Seward <jseward@bzip.org>
+
+ Please read the WARNING, DISCLAIMER and PATENTS sections in the
+ README file.
+
+ This program is released under the terms of the license contained
+ in the file LICENSE.
+ ------------------------------------------------------------------ */
+
+//#define O_BINARY 0
+#ifdef __STDC__
+#undef __STDC__
+#endif
+
+#ifndef _BZLIB_H
+#define _BZLIB_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define BZ_RUN 0
+#define BZ_FLUSH 1
+#define BZ_FINISH 2
+
+#define BZ_OK 0
+#define BZ_RUN_OK 1
+#define BZ_FLUSH_OK 2
+#define BZ_FINISH_OK 3
+#define BZ_STREAM_END 4
+#define BZ_SEQUENCE_ERROR (-1)
+#define BZ_PARAM_ERROR (-2)
+#define BZ_MEM_ERROR (-3)
+#define BZ_DATA_ERROR (-4)
+#define BZ_DATA_ERROR_MAGIC (-5)
+#define BZ_IO_ERROR (-6)
+#define BZ_UNEXPECTED_EOF (-7)
+#define BZ_OUTBUFF_FULL (-8)
+#define BZ_CONFIG_ERROR (-9)
+
+typedef
+ struct {
+ char *next_in;
+ unsigned int avail_in;
+ unsigned int total_in_lo32;
+ unsigned int total_in_hi32;
+
+ char *next_out;
+ unsigned int avail_out;
+ unsigned int total_out_lo32;
+ unsigned int total_out_hi32;
+
+ void *state;
+
+ void *(*bzalloc)(void *,int,int);
+ void (*bzfree)(void *,void *);
+ void *opaque;
+ }
+ bz_stream;
+
+
+#ifndef BZ_IMPORT
+#define BZ_EXPORT
+#endif
+
+#ifndef BZ_NO_STDIO
+/* Need a definitition for FILE */
+#include <stdio.h>
+#endif
+
+#ifdef _WIN32
+# include <windows.h>
+# ifdef small
+ /* windows.h define small to char */
+# undef small
+# endif
+# ifdef BZ_EXPORT
+# define BZ_API(func) WINAPI func
+# define BZ_EXTERN extern
+# else
+ /* import windows dll dynamically */
+# define BZ_API(func) (WINAPI * func)
+# define BZ_EXTERN
+# endif
+#else
+# define BZ_API(func) func
+# define BZ_EXTERN extern
+#endif
+
+
+/*-- Core (low-level) library functions --*/
+
+BZ_EXTERN int BZ_API(BZ2_bzCompressInit) (
+ bz_stream* strm,
+ int blockSize100k,
+ int verbosity,
+ int workFactor
+ );
+
+BZ_EXTERN int BZ_API(BZ2_bzCompress) (
+ bz_stream* strm,
+ int action
+ );
+
+BZ_EXTERN int BZ_API(BZ2_bzCompressEnd) (
+ bz_stream* strm
+ );
+
+BZ_EXTERN int BZ_API(BZ2_bzDecompressInit) (
+ bz_stream *strm,
+ int verbosity,
+ int small
+ );
+
+BZ_EXTERN int BZ_API(BZ2_bzDecompress) (
+ bz_stream* strm
+ );
+
+BZ_EXTERN int BZ_API(BZ2_bzDecompressEnd) (
+ bz_stream *strm
+ );
+
+
+
+/*-- High(er) level library functions --*/
+
+#ifndef BZ_NO_STDIO
+#define BZ_MAX_UNUSED 5000
+
+typedef void BZFILE;
+
+BZ_EXTERN BZFILE* BZ_API(BZ2_bzReadOpen) (
+ int* bzerror,
+ FILE* f,
+ int verbosity,
+ int small,
+ void* unused,
+ int nUnused
+ );
+
+BZ_EXTERN void BZ_API(BZ2_bzReadClose) (
+ int* bzerror,
+ BZFILE* b
+ );
+
+BZ_EXTERN void BZ_API(BZ2_bzReadGetUnused) (
+ int* bzerror,
+ BZFILE* b,
+ void** unused,
+ int* nUnused
+ );
+
+BZ_EXTERN int BZ_API(BZ2_bzRead) (
+ int* bzerror,
+ BZFILE* b,
+ void* buf,
+ int len
+ );
+
+BZ_EXTERN BZFILE* BZ_API(BZ2_bzWriteOpen) (
+ int* bzerror,
+ FILE* f,
+ int blockSize100k,
+ int verbosity,
+ int workFactor
+ );
+
+BZ_EXTERN void BZ_API(BZ2_bzWrite) (
+ int* bzerror,
+ BZFILE* b,
+ void* buf,
+ int len
+ );
+
+BZ_EXTERN void BZ_API(BZ2_bzWriteClose) (
+ int* bzerror,
+ BZFILE* b,
+ int abandon,
+ unsigned int* nbytes_in,
+ unsigned int* nbytes_out
+ );
+
+BZ_EXTERN void BZ_API(BZ2_bzWriteClose64) (
+ int* bzerror,
+ BZFILE* b,
+ int abandon,
+ unsigned int* nbytes_in_lo32,
+ unsigned int* nbytes_in_hi32,
+ unsigned int* nbytes_out_lo32,
+ unsigned int* nbytes_out_hi32
+ );
+#endif
+
+
+/*-- Utility functions --*/
+
+BZ_EXTERN int BZ_API(BZ2_bzBuffToBuffCompress) (
+ char* dest,
+ unsigned int* destLen,
+ char* source,
+ unsigned int sourceLen,
+ int blockSize100k,
+ int verbosity,
+ int workFactor
+ );
+
+BZ_EXTERN int BZ_API(BZ2_bzBuffToBuffDecompress) (
+ char* dest,
+ unsigned int* destLen,
+ char* source,
+ unsigned int sourceLen,
+ int small,
+ int verbosity
+ );
+
+
+/*--
+ Code contributed by Yoshioka Tsuneo (tsuneo@rr.iij4u.or.jp)
+ to support better zlib compatibility.
+ This code is not _officially_ part of libbzip2 (yet);
+ I haven't tested it, documented it, or considered the
+ threading-safeness of it.
+ If this code breaks, please contact both Yoshioka and me.
+--*/
+
+BZ_EXTERN const char * BZ_API(BZ2_bzlibVersion) (
+ void
+ );
+
+#ifndef BZ_NO_STDIO
+BZ_EXTERN BZFILE * BZ_API(BZ2_bzopen) (
+ const char *path,
+ const char *mode
+ );
+
+BZ_EXTERN BZFILE * BZ_API(BZ2_bzdopen) (
+ int fd,
+ const char *mode
+ );
+
+BZ_EXTERN int BZ_API(BZ2_bzread) (
+ BZFILE* b,
+ void* buf,
+ int len
+ );
+
+BZ_EXTERN int BZ_API(BZ2_bzwrite) (
+ BZFILE* b,
+ void* buf,
+ int len
+ );
+
+BZ_EXTERN int BZ_API(BZ2_bzflush) (
+ BZFILE* b
+ );
+
+BZ_EXTERN void BZ_API(BZ2_bzclose) (
+ BZFILE* b
+ );
+
+BZ_EXTERN const char * BZ_API(BZ2_bzerror) (
+ BZFILE *b,
+ int *errnum
+ );
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
+
+/*-------------------------------------------------------------*/
+/*--- end bzlib.h ---*/
+/*-------------------------------------------------------------*/
diff --git a/src/misc/bzlib/bzlib_private.h b/src/misc/bzlib/bzlib_private.h
new file mode 100644
index 00000000..23427879
--- /dev/null
+++ b/src/misc/bzlib/bzlib_private.h
@@ -0,0 +1,509 @@
+
+/*-------------------------------------------------------------*/
+/*--- Private header file for the library. ---*/
+/*--- bzlib_private.h ---*/
+/*-------------------------------------------------------------*/
+
+/* ------------------------------------------------------------------
+ This file is part of bzip2/libbzip2, a program and library for
+ lossless, block-sorting data compression.
+
+ bzip2/libbzip2 version 1.0.5 of 10 December 2007
+ Copyright (C) 1996-2007 Julian Seward <jseward@bzip.org>
+
+ Please read the WARNING, DISCLAIMER and PATENTS sections in the
+ README file.
+
+ This program is released under the terms of the license contained
+ in the file LICENSE.
+ ------------------------------------------------------------------ */
+
+
+#ifndef _BZLIB_PRIVATE_H
+#define _BZLIB_PRIVATE_H
+
+#include <stdlib.h>
+
+#ifndef BZ_NO_STDIO
+#include <stdio.h>
+#include <ctype.h>
+#include <string.h>
+#endif
+
+#include "bzlib.h"
+
+
+
+/*-- General stuff. --*/
+
+#define BZ_VERSION "1.0.5, 10-Dec-2007"
+
+typedef char Char;
+typedef unsigned char Bool;
+typedef unsigned char UChar;
+typedef int Int32;
+typedef unsigned int UInt32;
+typedef short Int16;
+typedef unsigned short UInt16;
+
+#define True ((Bool)1)
+#define False ((Bool)0)
+
+#ifndef __GNUC__
+#define __inline__ /* */
+#endif
+
+#ifndef BZ_NO_STDIO
+
+extern void BZ2_bz__AssertH__fail ( int errcode );
+#define AssertH(cond,errcode) \
+ { if (!(cond)) BZ2_bz__AssertH__fail ( errcode ); }
+
+#if BZ_DEBUG
+#define AssertD(cond,msg) \
+ { if (!(cond)) { \
+ fprintf ( stderr, \
+ "\n\nlibbzip2(debug build): internal error\n\t%s\n", msg );\
+ exit(1); \
+ }}
+#else
+#define AssertD(cond,msg) /* */
+#endif
+
+#define VPrintf0(zf) \
+ fprintf(stderr,zf)
+#define VPrintf1(zf,za1) \
+ fprintf(stderr,zf,za1)
+#define VPrintf2(zf,za1,za2) \
+ fprintf(stderr,zf,za1,za2)
+#define VPrintf3(zf,za1,za2,za3) \
+ fprintf(stderr,zf,za1,za2,za3)
+#define VPrintf4(zf,za1,za2,za3,za4) \
+ fprintf(stderr,zf,za1,za2,za3,za4)
+#define VPrintf5(zf,za1,za2,za3,za4,za5) \
+ fprintf(stderr,zf,za1,za2,za3,za4,za5)
+
+#else
+
+extern void bz_internal_error ( int errcode );
+#define AssertH(cond,errcode) \
+ { if (!(cond)) bz_internal_error ( errcode ); }
+#define AssertD(cond,msg) do { } while (0)
+#define VPrintf0(zf) do { } while (0)
+#define VPrintf1(zf,za1) do { } while (0)
+#define VPrintf2(zf,za1,za2) do { } while (0)
+#define VPrintf3(zf,za1,za2,za3) do { } while (0)
+#define VPrintf4(zf,za1,za2,za3,za4) do { } while (0)
+#define VPrintf5(zf,za1,za2,za3,za4,za5) do { } while (0)
+
+#endif
+
+
+#define BZALLOC(nnn) (strm->bzalloc)(strm->opaque,(nnn),1)
+#define BZFREE(ppp) (strm->bzfree)(strm->opaque,(ppp))
+
+
+/*-- Header bytes. --*/
+
+#define BZ_HDR_B 0x42 /* 'B' */
+#define BZ_HDR_Z 0x5a /* 'Z' */
+#define BZ_HDR_h 0x68 /* 'h' */
+#define BZ_HDR_0 0x30 /* '0' */
+
+/*-- Constants for the back end. --*/
+
+#define BZ_MAX_ALPHA_SIZE 258
+#define BZ_MAX_CODE_LEN 23
+
+#define BZ_RUNA 0
+#define BZ_RUNB 1
+
+#define BZ_N_GROUPS 6
+#define BZ_G_SIZE 50
+#define BZ_N_ITERS 4
+
+#define BZ_MAX_SELECTORS (2 + (900000 / BZ_G_SIZE))
+
+
+
+/*-- Stuff for randomising repetitive blocks. --*/
+
+extern Int32 BZ2_rNums[512];
+
+#define BZ_RAND_DECLS \
+ Int32 rNToGo; \
+ Int32 rTPos \
+
+#define BZ_RAND_INIT_MASK \
+ s->rNToGo = 0; \
+ s->rTPos = 0 \
+
+#define BZ_RAND_MASK ((s->rNToGo == 1) ? 1 : 0)
+
+#define BZ_RAND_UPD_MASK \
+ if (s->rNToGo == 0) { \
+ s->rNToGo = BZ2_rNums[s->rTPos]; \
+ s->rTPos++; \
+ if (s->rTPos == 512) s->rTPos = 0; \
+ } \
+ s->rNToGo--;
+
+
+
+/*-- Stuff for doing CRCs. --*/
+
+extern UInt32 BZ2_crc32Table[256];
+
+#define BZ_INITIALISE_CRC(crcVar) \
+{ \
+ crcVar = 0xffffffffL; \
+}
+
+#define BZ_FINALISE_CRC(crcVar) \
+{ \
+ crcVar = ~(crcVar); \
+}
+
+#define BZ_UPDATE_CRC(crcVar,cha) \
+{ \
+ crcVar = (crcVar << 8) ^ \
+ BZ2_crc32Table[(crcVar >> 24) ^ \
+ ((UChar)cha)]; \
+}
+
+
+
+/*-- States and modes for compression. --*/
+
+#define BZ_M_IDLE 1
+#define BZ_M_RUNNING 2
+#define BZ_M_FLUSHING 3
+#define BZ_M_FINISHING 4
+
+#define BZ_S_OUTPUT 1
+#define BZ_S_INPUT 2
+
+#define BZ_N_RADIX 2
+#define BZ_N_QSORT 12
+#define BZ_N_SHELL 18
+#define BZ_N_OVERSHOOT (BZ_N_RADIX + BZ_N_QSORT + BZ_N_SHELL + 2)
+
+
+
+
+/*-- Structure holding all the compression-side stuff. --*/
+
+typedef
+ struct {
+ /* pointer back to the struct bz_stream */
+ bz_stream* strm;
+
+ /* mode this stream is in, and whether inputting */
+ /* or outputting data */
+ Int32 mode;
+ Int32 state;
+
+ /* remembers avail_in when flush/finish requested */
+ UInt32 avail_in_expect;
+
+ /* for doing the block sorting */
+ UInt32* arr1;
+ UInt32* arr2;
+ UInt32* ftab;
+ Int32 origPtr;
+
+ /* aliases for arr1 and arr2 */
+ UInt32* ptr;
+ UChar* block;
+ UInt16* mtfv;
+ UChar* zbits;
+
+ /* for deciding when to use the fallback sorting algorithm */
+ Int32 workFactor;
+
+ /* run-length-encoding of the input */
+ UInt32 state_in_ch;
+ Int32 state_in_len;
+ BZ_RAND_DECLS;
+
+ /* input and output limits and current posns */
+ Int32 nblock;
+ Int32 nblockMAX;
+ Int32 numZ;
+ Int32 state_out_pos;
+
+ /* map of bytes used in block */
+ Int32 nInUse;
+ Bool inUse[256];
+ UChar unseqToSeq[256];
+
+ /* the buffer for bit stream creation */
+ UInt32 bsBuff;
+ Int32 bsLive;
+
+ /* block and combined CRCs */
+ UInt32 blockCRC;
+ UInt32 combinedCRC;
+
+ /* misc administratium */
+ Int32 verbosity;
+ Int32 blockNo;
+ Int32 blockSize100k;
+
+ /* stuff for coding the MTF values */
+ Int32 nMTF;
+ Int32 mtfFreq [BZ_MAX_ALPHA_SIZE];
+ UChar selector [BZ_MAX_SELECTORS];
+ UChar selectorMtf[BZ_MAX_SELECTORS];
+
+ UChar len [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
+ Int32 code [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
+ Int32 rfreq [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
+ /* second dimension: only 3 needed; 4 makes index calculations faster */
+ UInt32 len_pack[BZ_MAX_ALPHA_SIZE][4];
+
+ }
+ EState;
+
+
+
+/*-- externs for compression. --*/
+
+extern void
+BZ2_blockSort ( EState* );
+
+extern void
+BZ2_compressBlock ( EState*, Bool );
+
+extern void
+BZ2_bsInitWrite ( EState* );
+
+extern void
+BZ2_hbAssignCodes ( Int32*, UChar*, Int32, Int32, Int32 );
+
+extern void
+BZ2_hbMakeCodeLengths ( UChar*, Int32*, Int32, Int32 );
+
+
+
+/*-- states for decompression. --*/
+
+#define BZ_X_IDLE 1
+#define BZ_X_OUTPUT 2
+
+#define BZ_X_MAGIC_1 10
+#define BZ_X_MAGIC_2 11
+#define BZ_X_MAGIC_3 12
+#define BZ_X_MAGIC_4 13
+#define BZ_X_BLKHDR_1 14
+#define BZ_X_BLKHDR_2 15
+#define BZ_X_BLKHDR_3 16
+#define BZ_X_BLKHDR_4 17
+#define BZ_X_BLKHDR_5 18
+#define BZ_X_BLKHDR_6 19
+#define BZ_X_BCRC_1 20
+#define BZ_X_BCRC_2 21
+#define BZ_X_BCRC_3 22
+#define BZ_X_BCRC_4 23
+#define BZ_X_RANDBIT 24
+#define BZ_X_ORIGPTR_1 25
+#define BZ_X_ORIGPTR_2 26
+#define BZ_X_ORIGPTR_3 27
+#define BZ_X_MAPPING_1 28
+#define BZ_X_MAPPING_2 29
+#define BZ_X_SELECTOR_1 30
+#define BZ_X_SELECTOR_2 31
+#define BZ_X_SELECTOR_3 32
+#define BZ_X_CODING_1 33
+#define BZ_X_CODING_2 34
+#define BZ_X_CODING_3 35
+#define BZ_X_MTF_1 36
+#define BZ_X_MTF_2 37
+#define BZ_X_MTF_3 38
+#define BZ_X_MTF_4 39
+#define BZ_X_MTF_5 40
+#define BZ_X_MTF_6 41
+#define BZ_X_ENDHDR_2 42
+#define BZ_X_ENDHDR_3 43
+#define BZ_X_ENDHDR_4 44
+#define BZ_X_ENDHDR_5 45
+#define BZ_X_ENDHDR_6 46
+#define BZ_X_CCRC_1 47
+#define BZ_X_CCRC_2 48
+#define BZ_X_CCRC_3 49
+#define BZ_X_CCRC_4 50
+
+
+
+/*-- Constants for the fast MTF decoder. --*/
+
+#define MTFA_SIZE 4096
+#define MTFL_SIZE 16
+
+
+
+/*-- Structure holding all the decompression-side stuff. --*/
+
+typedef
+ struct {
+ /* pointer back to the struct bz_stream */
+ bz_stream* strm;
+
+ /* state indicator for this stream */
+ Int32 state;
+
+ /* for doing the final run-length decoding */
+ UChar state_out_ch;
+ Int32 state_out_len;
+ Bool blockRandomised;
+ BZ_RAND_DECLS;
+
+ /* the buffer for bit stream reading */
+ UInt32 bsBuff;
+ Int32 bsLive;
+
+ /* misc administratium */
+ Int32 blockSize100k;
+ Bool smallDecompress;
+ Int32 currBlockNo;
+ Int32 verbosity;
+
+ /* for undoing the Burrows-Wheeler transform */
+ Int32 origPtr;
+ UInt32 tPos;
+ Int32 k0;
+ Int32 unzftab[256];
+ Int32 nblock_used;
+ Int32 cftab[257];
+ Int32 cftabCopy[257];
+
+ /* for undoing the Burrows-Wheeler transform (FAST) */
+ UInt32 *tt;
+
+ /* for undoing the Burrows-Wheeler transform (SMALL) */
+ UInt16 *ll16;
+ UChar *ll4;
+
+ /* stored and calculated CRCs */
+ UInt32 storedBlockCRC;
+ UInt32 storedCombinedCRC;
+ UInt32 calculatedBlockCRC;
+ UInt32 calculatedCombinedCRC;
+
+ /* map of bytes used in block */
+ Int32 nInUse;
+ Bool inUse[256];
+ Bool inUse16[16];
+ UChar seqToUnseq[256];
+
+ /* for decoding the MTF values */
+ UChar mtfa [MTFA_SIZE];
+ Int32 mtfbase[256 / MTFL_SIZE];
+ UChar selector [BZ_MAX_SELECTORS];
+ UChar selectorMtf[BZ_MAX_SELECTORS];
+ UChar len [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
+
+ Int32 limit [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
+ Int32 base [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
+ Int32 perm [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
+ Int32 minLens[BZ_N_GROUPS];
+
+ /* save area for scalars in the main decompress code */
+ Int32 save_i;
+ Int32 save_j;
+ Int32 save_t;
+ Int32 save_alphaSize;
+ Int32 save_nGroups;
+ Int32 save_nSelectors;
+ Int32 save_EOB;
+ Int32 save_groupNo;
+ Int32 save_groupPos;
+ Int32 save_nextSym;
+ Int32 save_nblockMAX;
+ Int32 save_nblock;
+ Int32 save_es;
+ Int32 save_N;
+ Int32 save_curr;
+ Int32 save_zt;
+ Int32 save_zn;
+ Int32 save_zvec;
+ Int32 save_zj;
+ Int32 save_gSel;
+ Int32 save_gMinlen;
+ Int32* save_gLimit;
+ Int32* save_gBase;
+ Int32* save_gPerm;
+
+ }
+ DState;
+
+
+
+/*-- Macros for decompression. --*/
+
+#define BZ_GET_FAST(cccc) \
+ /* c_tPos is unsigned, hence test < 0 is pointless. */ \
+ if (s->tPos >= (UInt32)100000 * (UInt32)s->blockSize100k) return True; \
+ s->tPos = s->tt[s->tPos]; \
+ cccc = (UChar)(s->tPos & 0xff); \
+ s->tPos >>= 8;
+
+#define BZ_GET_FAST_C(cccc) \
+ /* c_tPos is unsigned, hence test < 0 is pointless. */ \
+ if (c_tPos >= (UInt32)100000 * (UInt32)ro_blockSize100k) return True; \
+ c_tPos = c_tt[c_tPos]; \
+ cccc = (UChar)(c_tPos & 0xff); \
+ c_tPos >>= 8;
+
+#define SET_LL4(i,n) \
+ { if (((i) & 0x1) == 0) \
+ s->ll4[(i) >> 1] = (s->ll4[(i) >> 1] & 0xf0) | (n); else \
+ s->ll4[(i) >> 1] = (s->ll4[(i) >> 1] & 0x0f) | ((n) << 4); \
+ }
+
+#define GET_LL4(i) \
+ ((((UInt32)(s->ll4[(i) >> 1])) >> (((i) << 2) & 0x4)) & 0xF)
+
+#define SET_LL(i,n) \
+ { s->ll16[i] = (UInt16)(n & 0x0000ffff); \
+ SET_LL4(i, n >> 16); \
+ }
+
+#define GET_LL(i) \
+ (((UInt32)s->ll16[i]) | (GET_LL4(i) << 16))
+
+#define BZ_GET_SMALL(cccc) \
+ /* c_tPos is unsigned, hence test < 0 is pointless. */ \
+ if (s->tPos >= (UInt32)100000 * (UInt32)s->blockSize100k) return True; \
+ cccc = BZ2_indexIntoF ( s->tPos, s->cftab ); \
+ s->tPos = GET_LL(s->tPos);
+
+
+/*-- externs for decompression. --*/
+
+extern Int32
+BZ2_indexIntoF ( Int32, Int32* );
+
+extern Int32
+BZ2_decompress ( DState* );
+
+extern void
+BZ2_hbCreateDecodeTables ( Int32*, Int32*, Int32*, UChar*,
+ Int32, Int32, Int32 );
+
+
+#endif
+
+
+/*-- BZ_NO_STDIO seems to make NULL disappear on some platforms. --*/
+
+#ifdef BZ_NO_STDIO
+#ifndef NULL
+#define NULL 0
+#endif
+#endif
+
+
+/*-------------------------------------------------------------*/
+/*--- end bzlib_private.h ---*/
+/*-------------------------------------------------------------*/
diff --git a/src/misc/bzlib/compress.c b/src/misc/bzlib/compress.c
new file mode 100644
index 00000000..7d9b3da7
--- /dev/null
+++ b/src/misc/bzlib/compress.c
@@ -0,0 +1,672 @@
+
+/*-------------------------------------------------------------*/
+/*--- Compression machinery (not incl block sorting) ---*/
+/*--- compress.c ---*/
+/*-------------------------------------------------------------*/
+
+/* ------------------------------------------------------------------
+ This file is part of bzip2/libbzip2, a program and library for
+ lossless, block-sorting data compression.
+
+ bzip2/libbzip2 version 1.0.5 of 10 December 2007
+ Copyright (C) 1996-2007 Julian Seward <jseward@bzip.org>
+
+ Please read the WARNING, DISCLAIMER and PATENTS sections in the
+ README file.
+
+ This program is released under the terms of the license contained
+ in the file LICENSE.
+ ------------------------------------------------------------------ */
+
+
+/* CHANGES
+ 0.9.0 -- original version.
+ 0.9.0a/b -- no changes in this file.
+ 0.9.0c -- changed setting of nGroups in sendMTFValues()
+ so as to do a bit better on small files
+*/
+
+#include "bzlib_private.h"
+
+
+/*---------------------------------------------------*/
+/*--- Bit stream I/O ---*/
+/*---------------------------------------------------*/
+
+/*---------------------------------------------------*/
+void BZ2_bsInitWrite ( EState* s )
+{
+ s->bsLive = 0;
+ s->bsBuff = 0;
+}
+
+
+/*---------------------------------------------------*/
+static
+void bsFinishWrite ( EState* s )
+{
+ while (s->bsLive > 0) {
+ s->zbits[s->numZ] = (UChar)(s->bsBuff >> 24);
+ s->numZ++;
+ s->bsBuff <<= 8;
+ s->bsLive -= 8;
+ }
+}
+
+
+/*---------------------------------------------------*/
+#define bsNEEDW(nz) \
+{ \
+ while (s->bsLive >= 8) { \
+ s->zbits[s->numZ] \
+ = (UChar)(s->bsBuff >> 24); \
+ s->numZ++; \
+ s->bsBuff <<= 8; \
+ s->bsLive -= 8; \
+ } \
+}
+
+
+/*---------------------------------------------------*/
+static
+__inline__
+void bsW ( EState* s, Int32 n, UInt32 v )
+{
+ bsNEEDW ( n );
+ s->bsBuff |= (v << (32 - s->bsLive - n));
+ s->bsLive += n;
+}
+
+
+/*---------------------------------------------------*/
+static
+void bsPutUInt32 ( EState* s, UInt32 u )
+{
+ bsW ( s, 8, (u >> 24) & 0xffL );
+ bsW ( s, 8, (u >> 16) & 0xffL );
+ bsW ( s, 8, (u >> 8) & 0xffL );
+ bsW ( s, 8, u & 0xffL );
+}
+
+
+/*---------------------------------------------------*/
+static
+void bsPutUChar ( EState* s, UChar c )
+{
+ bsW( s, 8, (UInt32)c );
+}
+
+
+/*---------------------------------------------------*/
+/*--- The back end proper ---*/
+/*---------------------------------------------------*/
+
+/*---------------------------------------------------*/
+static
+void makeMaps_e ( EState* s )
+{
+ Int32 i;
+ s->nInUse = 0;
+ for (i = 0; i < 256; i++)
+ if (s->inUse[i]) {
+ s->unseqToSeq[i] = s->nInUse;
+ s->nInUse++;
+ }
+}
+
+
+/*---------------------------------------------------*/
+static
+void generateMTFValues ( EState* s )
+{
+ UChar yy[256];
+ Int32 i, j;
+ Int32 zPend;
+ Int32 wr;
+ Int32 EOB;
+
+ /*
+ After sorting (eg, here),
+ s->arr1 [ 0 .. s->nblock-1 ] holds sorted order,
+ and
+ ((UChar*)s->arr2) [ 0 .. s->nblock-1 ]
+ holds the original block data.
+
+ The first thing to do is generate the MTF values,
+ and put them in
+ ((UInt16*)s->arr1) [ 0 .. s->nblock-1 ].
+ Because there are strictly fewer or equal MTF values
+ than block values, ptr values in this area are overwritten
+ with MTF values only when they are no longer needed.
+
+ The final compressed bitstream is generated into the
+ area starting at
+ (UChar*) (&((UChar*)s->arr2)[s->nblock])
+
+ These storage aliases are set up in bzCompressInit(),
+ except for the last one, which is arranged in
+ compressBlock().
+ */
+ UInt32* ptr = s->ptr;
+ UChar* block = s->block;
+ UInt16* mtfv = s->mtfv;
+
+ makeMaps_e ( s );
+ EOB = s->nInUse+1;
+
+ for (i = 0; i <= EOB; i++) s->mtfFreq[i] = 0;
+
+ wr = 0;
+ zPend = 0;
+ for (i = 0; i < s->nInUse; i++) yy[i] = (UChar) i;
+
+ for (i = 0; i < s->nblock; i++) {
+ UChar ll_i;
+ AssertD ( wr <= i, "generateMTFValues(1)" );
+ j = ptr[i]-1; if (j < 0) j += s->nblock;
+ ll_i = s->unseqToSeq[block[j]];
+ AssertD ( ll_i < s->nInUse, "generateMTFValues(2a)" );
+
+ if (yy[0] == ll_i) {
+ zPend++;
+ } else {
+
+ if (zPend > 0) {
+ zPend--;
+ while (True) {
+ if (zPend & 1) {
+ mtfv[wr] = BZ_RUNB; wr++;
+ s->mtfFreq[BZ_RUNB]++;
+ } else {
+ mtfv[wr] = BZ_RUNA; wr++;
+ s->mtfFreq[BZ_RUNA]++;
+ }
+ if (zPend < 2) break;
+ zPend = (zPend - 2) / 2;
+ };
+ zPend = 0;
+ }
+ {
+ register UChar rtmp;
+ register UChar* ryy_j;
+ register UChar rll_i;
+ rtmp = yy[1];
+ yy[1] = yy[0];
+ ryy_j = &(yy[1]);
+ rll_i = ll_i;
+ while ( rll_i != rtmp ) {
+ register UChar rtmp2;
+ ryy_j++;
+ rtmp2 = rtmp;
+ rtmp = *ryy_j;
+ *ryy_j = rtmp2;
+ };
+ yy[0] = rtmp;
+ j = ryy_j - &(yy[0]);
+ mtfv[wr] = j+1; wr++; s->mtfFreq[j+1]++;
+ }
+
+ }
+ }
+
+ if (zPend > 0) {
+ zPend--;
+ while (True) {
+ if (zPend & 1) {
+ mtfv[wr] = BZ_RUNB; wr++;
+ s->mtfFreq[BZ_RUNB]++;
+ } else {
+ mtfv[wr] = BZ_RUNA; wr++;
+ s->mtfFreq[BZ_RUNA]++;
+ }
+ if (zPend < 2) break;
+ zPend = (zPend - 2) / 2;
+ };
+ zPend = 0;
+ }
+
+ mtfv[wr] = EOB; wr++; s->mtfFreq[EOB]++;
+
+ s->nMTF = wr;
+}
+
+
+/*---------------------------------------------------*/
+#define BZ_LESSER_ICOST 0
+#define BZ_GREATER_ICOST 15
+
+static
+void sendMTFValues ( EState* s )
+{
+ Int32 v, t, i, j, gs, ge, totc, bt, bc, iter;
+ Int32 nSelectors, alphaSize, minLen, maxLen, selCtr;
+ Int32 nGroups, nBytes;
+
+ /*--
+ UChar len [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
+ is a global since the decoder also needs it.
+
+ Int32 code[BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
+ Int32 rfreq[BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
+ are also globals only used in this proc.
+ Made global to keep stack frame size small.
+ --*/
+
+
+ UInt16 cost[BZ_N_GROUPS];
+ Int32 fave[BZ_N_GROUPS];
+
+ UInt16* mtfv = s->mtfv;
+
+ if (s->verbosity >= 3)
+ VPrintf3( " %d in block, %d after MTF & 1-2 coding, "
+ "%d+2 syms in use\n",
+ s->nblock, s->nMTF, s->nInUse );
+
+ alphaSize = s->nInUse+2;
+ for (t = 0; t < BZ_N_GROUPS; t++)
+ for (v = 0; v < alphaSize; v++)
+ s->len[t][v] = BZ_GREATER_ICOST;
+
+ /*--- Decide how many coding tables to use ---*/
+ AssertH ( s->nMTF > 0, 3001 );
+ if (s->nMTF < 200) nGroups = 2; else
+ if (s->nMTF < 600) nGroups = 3; else
+ if (s->nMTF < 1200) nGroups = 4; else
+ if (s->nMTF < 2400) nGroups = 5; else
+ nGroups = 6;
+
+ /*--- Generate an initial set of coding tables ---*/
+ {
+ Int32 nPart, remF, tFreq, aFreq;
+
+ nPart = nGroups;
+ remF = s->nMTF;
+ gs = 0;
+ while (nPart > 0) {
+ tFreq = remF / nPart;
+ ge = gs-1;
+ aFreq = 0;
+ while (aFreq < tFreq && ge < alphaSize-1) {
+ ge++;
+ aFreq += s->mtfFreq[ge];
+ }
+
+ if (ge > gs
+ && nPart != nGroups && nPart != 1
+ && ((nGroups-nPart) % 2 == 1)) {
+ aFreq -= s->mtfFreq[ge];
+ ge--;
+ }
+
+ if (s->verbosity >= 3)
+ VPrintf5( " initial group %d, [%d .. %d], "
+ "has %d syms (%4.1f%%)\n",
+ nPart, gs, ge, aFreq,
+ (100.0 * (float)aFreq) / (float)(s->nMTF) );
+
+ for (v = 0; v < alphaSize; v++)
+ if (v >= gs && v <= ge)
+ s->len[nPart-1][v] = BZ_LESSER_ICOST; else
+ s->len[nPart-1][v] = BZ_GREATER_ICOST;
+
+ nPart--;
+ gs = ge+1;
+ remF -= aFreq;
+ }
+ }
+
+ /*---
+ Iterate up to BZ_N_ITERS times to improve the tables.
+ ---*/
+ for (iter = 0; iter < BZ_N_ITERS; iter++) {
+
+ for (t = 0; t < nGroups; t++) fave[t] = 0;
+
+ for (t = 0; t < nGroups; t++)
+ for (v = 0; v < alphaSize; v++)
+ s->rfreq[t][v] = 0;
+
+ /*---
+ Set up an auxiliary length table which is used to fast-track
+ the common case (nGroups == 6).
+ ---*/
+ if (nGroups == 6) {
+ for (v = 0; v < alphaSize; v++) {
+ s->len_pack[v][0] = (s->len[1][v] << 16) | s->len[0][v];
+ s->len_pack[v][1] = (s->len[3][v] << 16) | s->len[2][v];
+ s->len_pack[v][2] = (s->len[5][v] << 16) | s->len[4][v];
+ }
+ }
+
+ nSelectors = 0;
+ totc = 0;
+ gs = 0;
+ while (True) {
+
+ /*--- Set group start & end marks. --*/
+ if (gs >= s->nMTF) break;
+ ge = gs + BZ_G_SIZE - 1;
+ if (ge >= s->nMTF) ge = s->nMTF-1;
+
+ /*--
+ Calculate the cost of this group as coded
+ by each of the coding tables.
+ --*/
+ for (t = 0; t < nGroups; t++) cost[t] = 0;
+
+ if (nGroups == 6 && 50 == ge-gs+1) {
+ /*--- fast track the common case ---*/
+ register UInt32 cost01, cost23, cost45;
+ register UInt16 icv;
+ cost01 = cost23 = cost45 = 0;
+
+# define BZ_ITER(nn) \
+ icv = mtfv[gs+(nn)]; \
+ cost01 += s->len_pack[icv][0]; \
+ cost23 += s->len_pack[icv][1]; \
+ cost45 += s->len_pack[icv][2]; \
+
+ BZ_ITER(0); BZ_ITER(1); BZ_ITER(2); BZ_ITER(3); BZ_ITER(4);
+ BZ_ITER(5); BZ_ITER(6); BZ_ITER(7); BZ_ITER(8); BZ_ITER(9);
+ BZ_ITER(10); BZ_ITER(11); BZ_ITER(12); BZ_ITER(13); BZ_ITER(14);
+ BZ_ITER(15); BZ_ITER(16); BZ_ITER(17); BZ_ITER(18); BZ_ITER(19);
+ BZ_ITER(20); BZ_ITER(21); BZ_ITER(22); BZ_ITER(23); BZ_ITER(24);
+ BZ_ITER(25); BZ_ITER(26); BZ_ITER(27); BZ_ITER(28); BZ_ITER(29);
+ BZ_ITER(30); BZ_ITER(31); BZ_ITER(32); BZ_ITER(33); BZ_ITER(34);
+ BZ_ITER(35); BZ_ITER(36); BZ_ITER(37); BZ_ITER(38); BZ_ITER(39);
+ BZ_ITER(40); BZ_ITER(41); BZ_ITER(42); BZ_ITER(43); BZ_ITER(44);
+ BZ_ITER(45); BZ_ITER(46); BZ_ITER(47); BZ_ITER(48); BZ_ITER(49);
+
+# undef BZ_ITER
+
+ cost[0] = cost01 & 0xffff; cost[1] = cost01 >> 16;
+ cost[2] = cost23 & 0xffff; cost[3] = cost23 >> 16;
+ cost[4] = cost45 & 0xffff; cost[5] = cost45 >> 16;
+
+ } else {
+ /*--- slow version which correctly handles all situations ---*/
+ for (i = gs; i <= ge; i++) {
+ UInt16 icv = mtfv[i];
+ for (t = 0; t < nGroups; t++) cost[t] += s->len[t][icv];
+ }
+ }
+
+ /*--
+ Find the coding table which is best for this group,
+ and record its identity in the selector table.
+ --*/
+ bc = 999999999; bt = -1;
+ for (t = 0; t < nGroups; t++)
+ if (cost[t] < bc) { bc = cost[t]; bt = t; };
+ totc += bc;
+ fave[bt]++;
+ s->selector[nSelectors] = bt;
+ nSelectors++;
+
+ /*--
+ Increment the symbol frequencies for the selected table.
+ --*/
+ if (nGroups == 6 && 50 == ge-gs+1) {
+ /*--- fast track the common case ---*/
+
+# define BZ_ITUR(nn) s->rfreq[bt][ mtfv[gs+(nn)] ]++
+
+ BZ_ITUR(0); BZ_ITUR(1); BZ_ITUR(2); BZ_ITUR(3); BZ_ITUR(4);
+ BZ_ITUR(5); BZ_ITUR(6); BZ_ITUR(7); BZ_ITUR(8); BZ_ITUR(9);
+ BZ_ITUR(10); BZ_ITUR(11); BZ_ITUR(12); BZ_ITUR(13); BZ_ITUR(14);
+ BZ_ITUR(15); BZ_ITUR(16); BZ_ITUR(17); BZ_ITUR(18); BZ_ITUR(19);
+ BZ_ITUR(20); BZ_ITUR(21); BZ_ITUR(22); BZ_ITUR(23); BZ_ITUR(24);
+ BZ_ITUR(25); BZ_ITUR(26); BZ_ITUR(27); BZ_ITUR(28); BZ_ITUR(29);
+ BZ_ITUR(30); BZ_ITUR(31); BZ_ITUR(32); BZ_ITUR(33); BZ_ITUR(34);
+ BZ_ITUR(35); BZ_ITUR(36); BZ_ITUR(37); BZ_ITUR(38); BZ_ITUR(39);
+ BZ_ITUR(40); BZ_ITUR(41); BZ_ITUR(42); BZ_ITUR(43); BZ_ITUR(44);
+ BZ_ITUR(45); BZ_ITUR(46); BZ_ITUR(47); BZ_ITUR(48); BZ_ITUR(49);
+
+# undef BZ_ITUR
+
+ } else {
+ /*--- slow version which correctly handles all situations ---*/
+ for (i = gs; i <= ge; i++)
+ s->rfreq[bt][ mtfv[i] ]++;
+ }
+
+ gs = ge+1;
+ }
+ if (s->verbosity >= 3) {
+ VPrintf2 ( " pass %d: size is %d, grp uses are ",
+ iter+1, totc/8 );
+ for (t = 0; t < nGroups; t++)
+ VPrintf1 ( "%d ", fave[t] );
+ VPrintf0 ( "\n" );
+ }
+
+ /*--
+ Recompute the tables based on the accumulated frequencies.
+ --*/
+ /* maxLen was changed from 20 to 17 in bzip2-1.0.3. See
+ comment in huffman.c for details. */
+ for (t = 0; t < nGroups; t++)
+ BZ2_hbMakeCodeLengths ( &(s->len[t][0]), &(s->rfreq[t][0]),
+ alphaSize, 17 /*20*/ );
+ }
+
+
+ AssertH( nGroups < 8, 3002 );
+ AssertH( nSelectors < 32768 &&
+ nSelectors <= (2 + (900000 / BZ_G_SIZE)),
+ 3003 );
+
+
+ /*--- Compute MTF values for the selectors. ---*/
+ {
+ UChar pos[BZ_N_GROUPS], ll_i, tmp2, tmp;
+ for (i = 0; i < nGroups; i++) pos[i] = i;
+ for (i = 0; i < nSelectors; i++) {
+ ll_i = s->selector[i];
+ j = 0;
+ tmp = pos[j];
+ while ( ll_i != tmp ) {
+ j++;
+ tmp2 = tmp;
+ tmp = pos[j];
+ pos[j] = tmp2;
+ };
+ pos[0] = tmp;
+ s->selectorMtf[i] = j;
+ }
+ };
+
+ /*--- Assign actual codes for the tables. --*/
+ for (t = 0; t < nGroups; t++) {
+ minLen = 32;
+ maxLen = 0;
+ for (i = 0; i < alphaSize; i++) {
+ if (s->len[t][i] > maxLen) maxLen = s->len[t][i];
+ if (s->len[t][i] < minLen) minLen = s->len[t][i];
+ }
+ AssertH ( !(maxLen > 17 /*20*/ ), 3004 );
+ AssertH ( !(minLen < 1), 3005 );
+ BZ2_hbAssignCodes ( &(s->code[t][0]), &(s->len[t][0]),
+ minLen, maxLen, alphaSize );
+ }
+
+ /*--- Transmit the mapping table. ---*/
+ {
+ Bool inUse16[16];
+ for (i = 0; i < 16; i++) {
+ inUse16[i] = False;
+ for (j = 0; j < 16; j++)
+ if (s->inUse[i * 16 + j]) inUse16[i] = True;
+ }
+
+ nBytes = s->numZ;
+ for (i = 0; i < 16; i++)
+ if (inUse16[i]) bsW(s,1,1); else bsW(s,1,0);
+
+ for (i = 0; i < 16; i++)
+ if (inUse16[i])
+ for (j = 0; j < 16; j++) {
+ if (s->inUse[i * 16 + j]) bsW(s,1,1); else bsW(s,1,0);
+ }
+
+ if (s->verbosity >= 3)
+ VPrintf1( " bytes: mapping %d, ", s->numZ-nBytes );
+ }
+
+ /*--- Now the selectors. ---*/
+ nBytes = s->numZ;
+ bsW ( s, 3, nGroups );
+ bsW ( s, 15, nSelectors );
+ for (i = 0; i < nSelectors; i++) {
+ for (j = 0; j < s->selectorMtf[i]; j++) bsW(s,1,1);
+ bsW(s,1,0);
+ }
+ if (s->verbosity >= 3)
+ VPrintf1( "selectors %d, ", s->numZ-nBytes );
+
+ /*--- Now the coding tables. ---*/
+ nBytes = s->numZ;
+
+ for (t = 0; t < nGroups; t++) {
+ Int32 curr = s->len[t][0];
+ bsW ( s, 5, curr );
+ for (i = 0; i < alphaSize; i++) {
+ while (curr < s->len[t][i]) { bsW(s,2,2); curr++; /* 10 */ };
+ while (curr > s->len[t][i]) { bsW(s,2,3); curr--; /* 11 */ };
+ bsW ( s, 1, 0 );
+ }
+ }
+
+ if (s->verbosity >= 3)
+ VPrintf1 ( "code lengths %d, ", s->numZ-nBytes );
+
+ /*--- And finally, the block data proper ---*/
+ nBytes = s->numZ;
+ selCtr = 0;
+ gs = 0;
+ while (True) {
+ if (gs >= s->nMTF) break;
+ ge = gs + BZ_G_SIZE - 1;
+ if (ge >= s->nMTF) ge = s->nMTF-1;
+ AssertH ( s->selector[selCtr] < nGroups, 3006 );
+
+ if (nGroups == 6 && 50 == ge-gs+1) {
+ /*--- fast track the common case ---*/
+ UInt16 mtfv_i;
+ UChar* s_len_sel_selCtr
+ = &(s->len[s->selector[selCtr]][0]);
+ Int32* s_code_sel_selCtr
+ = &(s->code[s->selector[selCtr]][0]);
+
+# define BZ_ITAH(nn) \
+ mtfv_i = mtfv[gs+(nn)]; \
+ bsW ( s, \
+ s_len_sel_selCtr[mtfv_i], \
+ s_code_sel_selCtr[mtfv_i] )
+
+ BZ_ITAH(0); BZ_ITAH(1); BZ_ITAH(2); BZ_ITAH(3); BZ_ITAH(4);
+ BZ_ITAH(5); BZ_ITAH(6); BZ_ITAH(7); BZ_ITAH(8); BZ_ITAH(9);
+ BZ_ITAH(10); BZ_ITAH(11); BZ_ITAH(12); BZ_ITAH(13); BZ_ITAH(14);
+ BZ_ITAH(15); BZ_ITAH(16); BZ_ITAH(17); BZ_ITAH(18); BZ_ITAH(19);
+ BZ_ITAH(20); BZ_ITAH(21); BZ_ITAH(22); BZ_ITAH(23); BZ_ITAH(24);
+ BZ_ITAH(25); BZ_ITAH(26); BZ_ITAH(27); BZ_ITAH(28); BZ_ITAH(29);
+ BZ_ITAH(30); BZ_ITAH(31); BZ_ITAH(32); BZ_ITAH(33); BZ_ITAH(34);
+ BZ_ITAH(35); BZ_ITAH(36); BZ_ITAH(37); BZ_ITAH(38); BZ_ITAH(39);
+ BZ_ITAH(40); BZ_ITAH(41); BZ_ITAH(42); BZ_ITAH(43); BZ_ITAH(44);
+ BZ_ITAH(45); BZ_ITAH(46); BZ_ITAH(47); BZ_ITAH(48); BZ_ITAH(49);
+
+# undef BZ_ITAH
+
+ } else {
+ /*--- slow version which correctly handles all situations ---*/
+ for (i = gs; i <= ge; i++) {
+ bsW ( s,
+ s->len [s->selector[selCtr]] [mtfv[i]],
+ s->code [s->selector[selCtr]] [mtfv[i]] );
+ }
+ }
+
+
+ gs = ge+1;
+ selCtr++;
+ }
+ AssertH( selCtr == nSelectors, 3007 );
+
+ if (s->verbosity >= 3)
+ VPrintf1( "codes %d\n", s->numZ-nBytes );
+}
+
+
+/*---------------------------------------------------*/
+void BZ2_compressBlock ( EState* s, Bool is_last_block )
+{
+ if (s->nblock > 0) {
+
+ BZ_FINALISE_CRC ( s->blockCRC );
+ s->combinedCRC = (s->combinedCRC << 1) | (s->combinedCRC >> 31);
+ s->combinedCRC ^= s->blockCRC;
+ if (s->blockNo > 1) s->numZ = 0;
+
+ if (s->verbosity >= 2)
+ VPrintf4( " block %d: crc = 0x%08x, "
+ "combined CRC = 0x%08x, size = %d\n",
+ s->blockNo, s->blockCRC, s->combinedCRC, s->nblock );
+
+ BZ2_blockSort ( s );
+ }
+
+ s->zbits = (UChar*) (&((UChar*)s->arr2)[s->nblock]);
+
+ /*-- If this is the first block, create the stream header. --*/
+ if (s->blockNo == 1) {
+ BZ2_bsInitWrite ( s );
+ bsPutUChar ( s, BZ_HDR_B );
+ bsPutUChar ( s, BZ_HDR_Z );
+ bsPutUChar ( s, BZ_HDR_h );
+ bsPutUChar ( s, (UChar)(BZ_HDR_0 + s->blockSize100k) );
+ }
+
+ if (s->nblock > 0) {
+
+ bsPutUChar ( s, 0x31 ); bsPutUChar ( s, 0x41 );
+ bsPutUChar ( s, 0x59 ); bsPutUChar ( s, 0x26 );
+ bsPutUChar ( s, 0x53 ); bsPutUChar ( s, 0x59 );
+
+ /*-- Now the block's CRC, so it is in a known place. --*/
+ bsPutUInt32 ( s, s->blockCRC );
+
+ /*--
+ Now a single bit indicating (non-)randomisation.
+ As of version 0.9.5, we use a better sorting algorithm
+ which makes randomisation unnecessary. So always set
+ the randomised bit to 'no'. Of course, the decoder
+ still needs to be able to handle randomised blocks
+ so as to maintain backwards compatibility with
+ older versions of bzip2.
+ --*/
+ bsW(s,1,0);
+
+ bsW ( s, 24, s->origPtr );
+ generateMTFValues ( s );
+ sendMTFValues ( s );
+ }
+
+
+ /*-- If this is the last block, add the stream trailer. --*/
+ if (is_last_block) {
+
+ bsPutUChar ( s, 0x17 ); bsPutUChar ( s, 0x72 );
+ bsPutUChar ( s, 0x45 ); bsPutUChar ( s, 0x38 );
+ bsPutUChar ( s, 0x50 ); bsPutUChar ( s, 0x90 );
+ bsPutUInt32 ( s, s->combinedCRC );
+ if (s->verbosity >= 2)
+ VPrintf1( " final combined CRC = 0x%08x\n ", s->combinedCRC );
+ bsFinishWrite ( s );
+ }
+}
+
+
+/*-------------------------------------------------------------*/
+/*--- end compress.c ---*/
+/*-------------------------------------------------------------*/
diff --git a/src/misc/bzlib/crctable.c b/src/misc/bzlib/crctable.c
new file mode 100644
index 00000000..215687b2
--- /dev/null
+++ b/src/misc/bzlib/crctable.c
@@ -0,0 +1,104 @@
+
+/*-------------------------------------------------------------*/
+/*--- Table for doing CRCs ---*/
+/*--- crctable.c ---*/
+/*-------------------------------------------------------------*/
+
+/* ------------------------------------------------------------------
+ This file is part of bzip2/libbzip2, a program and library for
+ lossless, block-sorting data compression.
+
+ bzip2/libbzip2 version 1.0.5 of 10 December 2007
+ Copyright (C) 1996-2007 Julian Seward <jseward@bzip.org>
+
+ Please read the WARNING, DISCLAIMER and PATENTS sections in the
+ README file.
+
+ This program is released under the terms of the license contained
+ in the file LICENSE.
+ ------------------------------------------------------------------ */
+
+
+#include "bzlib_private.h"
+
+/*--
+ I think this is an implementation of the AUTODIN-II,
+ Ethernet & FDDI 32-bit CRC standard. Vaguely derived
+ from code by Rob Warnock, in Section 51 of the
+ comp.compression FAQ.
+--*/
+
+UInt32 BZ2_crc32Table[256] = {
+
+ /*-- Ugly, innit? --*/
+
+ 0x00000000L, 0x04c11db7L, 0x09823b6eL, 0x0d4326d9L,
+ 0x130476dcL, 0x17c56b6bL, 0x1a864db2L, 0x1e475005L,
+ 0x2608edb8L, 0x22c9f00fL, 0x2f8ad6d6L, 0x2b4bcb61L,
+ 0x350c9b64L, 0x31cd86d3L, 0x3c8ea00aL, 0x384fbdbdL,
+ 0x4c11db70L, 0x48d0c6c7L, 0x4593e01eL, 0x4152fda9L,
+ 0x5f15adacL, 0x5bd4b01bL, 0x569796c2L, 0x52568b75L,
+ 0x6a1936c8L, 0x6ed82b7fL, 0x639b0da6L, 0x675a1011L,
+ 0x791d4014L, 0x7ddc5da3L, 0x709f7b7aL, 0x745e66cdL,
+ 0x9823b6e0L, 0x9ce2ab57L, 0x91a18d8eL, 0x95609039L,
+ 0x8b27c03cL, 0x8fe6dd8bL, 0x82a5fb52L, 0x8664e6e5L,
+ 0xbe2b5b58L, 0xbaea46efL, 0xb7a96036L, 0xb3687d81L,
+ 0xad2f2d84L, 0xa9ee3033L, 0xa4ad16eaL, 0xa06c0b5dL,
+ 0xd4326d90L, 0xd0f37027L, 0xddb056feL, 0xd9714b49L,
+ 0xc7361b4cL, 0xc3f706fbL, 0xceb42022L, 0xca753d95L,
+ 0xf23a8028L, 0xf6fb9d9fL, 0xfbb8bb46L, 0xff79a6f1L,
+ 0xe13ef6f4L, 0xe5ffeb43L, 0xe8bccd9aL, 0xec7dd02dL,
+ 0x34867077L, 0x30476dc0L, 0x3d044b19L, 0x39c556aeL,
+ 0x278206abL, 0x23431b1cL, 0x2e003dc5L, 0x2ac12072L,
+ 0x128e9dcfL, 0x164f8078L, 0x1b0ca6a1L, 0x1fcdbb16L,
+ 0x018aeb13L, 0x054bf6a4L, 0x0808d07dL, 0x0cc9cdcaL,
+ 0x7897ab07L, 0x7c56b6b0L, 0x71159069L, 0x75d48ddeL,
+ 0x6b93dddbL, 0x6f52c06cL, 0x6211e6b5L, 0x66d0fb02L,
+ 0x5e9f46bfL, 0x5a5e5b08L, 0x571d7dd1L, 0x53dc6066L,
+ 0x4d9b3063L, 0x495a2dd4L, 0x44190b0dL, 0x40d816baL,
+ 0xaca5c697L, 0xa864db20L, 0xa527fdf9L, 0xa1e6e04eL,
+ 0xbfa1b04bL, 0xbb60adfcL, 0xb6238b25L, 0xb2e29692L,
+ 0x8aad2b2fL, 0x8e6c3698L, 0x832f1041L, 0x87ee0df6L,
+ 0x99a95df3L, 0x9d684044L, 0x902b669dL, 0x94ea7b2aL,
+ 0xe0b41de7L, 0xe4750050L, 0xe9362689L, 0xedf73b3eL,
+ 0xf3b06b3bL, 0xf771768cL, 0xfa325055L, 0xfef34de2L,
+ 0xc6bcf05fL, 0xc27dede8L, 0xcf3ecb31L, 0xcbffd686L,
+ 0xd5b88683L, 0xd1799b34L, 0xdc3abdedL, 0xd8fba05aL,
+ 0x690ce0eeL, 0x6dcdfd59L, 0x608edb80L, 0x644fc637L,
+ 0x7a089632L, 0x7ec98b85L, 0x738aad5cL, 0x774bb0ebL,
+ 0x4f040d56L, 0x4bc510e1L, 0x46863638L, 0x42472b8fL,
+ 0x5c007b8aL, 0x58c1663dL, 0x558240e4L, 0x51435d53L,
+ 0x251d3b9eL, 0x21dc2629L, 0x2c9f00f0L, 0x285e1d47L,
+ 0x36194d42L, 0x32d850f5L, 0x3f9b762cL, 0x3b5a6b9bL,
+ 0x0315d626L, 0x07d4cb91L, 0x0a97ed48L, 0x0e56f0ffL,
+ 0x1011a0faL, 0x14d0bd4dL, 0x19939b94L, 0x1d528623L,
+ 0xf12f560eL, 0xf5ee4bb9L, 0xf8ad6d60L, 0xfc6c70d7L,
+ 0xe22b20d2L, 0xe6ea3d65L, 0xeba91bbcL, 0xef68060bL,
+ 0xd727bbb6L, 0xd3e6a601L, 0xdea580d8L, 0xda649d6fL,
+ 0xc423cd6aL, 0xc0e2d0ddL, 0xcda1f604L, 0xc960ebb3L,
+ 0xbd3e8d7eL, 0xb9ff90c9L, 0xb4bcb610L, 0xb07daba7L,
+ 0xae3afba2L, 0xaafbe615L, 0xa7b8c0ccL, 0xa379dd7bL,
+ 0x9b3660c6L, 0x9ff77d71L, 0x92b45ba8L, 0x9675461fL,
+ 0x8832161aL, 0x8cf30badL, 0x81b02d74L, 0x857130c3L,
+ 0x5d8a9099L, 0x594b8d2eL, 0x5408abf7L, 0x50c9b640L,
+ 0x4e8ee645L, 0x4a4ffbf2L, 0x470cdd2bL, 0x43cdc09cL,
+ 0x7b827d21L, 0x7f436096L, 0x7200464fL, 0x76c15bf8L,
+ 0x68860bfdL, 0x6c47164aL, 0x61043093L, 0x65c52d24L,
+ 0x119b4be9L, 0x155a565eL, 0x18197087L, 0x1cd86d30L,
+ 0x029f3d35L, 0x065e2082L, 0x0b1d065bL, 0x0fdc1becL,
+ 0x3793a651L, 0x3352bbe6L, 0x3e119d3fL, 0x3ad08088L,
+ 0x2497d08dL, 0x2056cd3aL, 0x2d15ebe3L, 0x29d4f654L,
+ 0xc5a92679L, 0xc1683bceL, 0xcc2b1d17L, 0xc8ea00a0L,
+ 0xd6ad50a5L, 0xd26c4d12L, 0xdf2f6bcbL, 0xdbee767cL,
+ 0xe3a1cbc1L, 0xe760d676L, 0xea23f0afL, 0xeee2ed18L,
+ 0xf0a5bd1dL, 0xf464a0aaL, 0xf9278673L, 0xfde69bc4L,
+ 0x89b8fd09L, 0x8d79e0beL, 0x803ac667L, 0x84fbdbd0L,
+ 0x9abc8bd5L, 0x9e7d9662L, 0x933eb0bbL, 0x97ffad0cL,
+ 0xafb010b1L, 0xab710d06L, 0xa6322bdfL, 0xa2f33668L,
+ 0xbcb4666dL, 0xb8757bdaL, 0xb5365d03L, 0xb1f740b4L
+};
+
+
+/*-------------------------------------------------------------*/
+/*--- end crctable.c ---*/
+/*-------------------------------------------------------------*/
diff --git a/src/misc/bzlib/decompress.c b/src/misc/bzlib/decompress.c
new file mode 100644
index 00000000..bba5e0fa
--- /dev/null
+++ b/src/misc/bzlib/decompress.c
@@ -0,0 +1,626 @@
+
+/*-------------------------------------------------------------*/
+/*--- Decompression machinery ---*/
+/*--- decompress.c ---*/
+/*-------------------------------------------------------------*/
+
+/* ------------------------------------------------------------------
+ This file is part of bzip2/libbzip2, a program and library for
+ lossless, block-sorting data compression.
+
+ bzip2/libbzip2 version 1.0.5 of 10 December 2007
+ Copyright (C) 1996-2007 Julian Seward <jseward@bzip.org>
+
+ Please read the WARNING, DISCLAIMER and PATENTS sections in the
+ README file.
+
+ This program is released under the terms of the license contained
+ in the file LICENSE.
+ ------------------------------------------------------------------ */
+
+
+#include "bzlib_private.h"
+
+
+/*---------------------------------------------------*/
+static
+void makeMaps_d ( DState* s )
+{
+ Int32 i;
+ s->nInUse = 0;
+ for (i = 0; i < 256; i++)
+ if (s->inUse[i]) {
+ s->seqToUnseq[s->nInUse] = i;
+ s->nInUse++;
+ }
+}
+
+
+/*---------------------------------------------------*/
+#define RETURN(rrr) \
+ { retVal = rrr; goto save_state_and_return; };
+
+#define GET_BITS(lll,vvv,nnn) \
+ case lll: s->state = lll; \
+ while (True) { \
+ if (s->bsLive >= nnn) { \
+ UInt32 v; \
+ v = (s->bsBuff >> \
+ (s->bsLive-nnn)) & ((1 << nnn)-1); \
+ s->bsLive -= nnn; \
+ vvv = v; \
+ break; \
+ } \
+ if (s->strm->avail_in == 0) RETURN(BZ_OK); \
+ s->bsBuff \
+ = (s->bsBuff << 8) | \
+ ((UInt32) \
+ (*((UChar*)(s->strm->next_in)))); \
+ s->bsLive += 8; \
+ s->strm->next_in++; \
+ s->strm->avail_in--; \
+ s->strm->total_in_lo32++; \
+ if (s->strm->total_in_lo32 == 0) \
+ s->strm->total_in_hi32++; \
+ }
+
+#define GET_UCHAR(lll,uuu) \
+ GET_BITS(lll,uuu,8)
+
+#define GET_BIT(lll,uuu) \
+ GET_BITS(lll,uuu,1)
+
+/*---------------------------------------------------*/
+#define GET_MTF_VAL(label1,label2,lval) \
+{ \
+ if (groupPos == 0) { \
+ groupNo++; \
+ if (groupNo >= nSelectors) \
+ RETURN(BZ_DATA_ERROR); \
+ groupPos = BZ_G_SIZE; \
+ gSel = s->selector[groupNo]; \
+ gMinlen = s->minLens[gSel]; \
+ gLimit = &(s->limit[gSel][0]); \
+ gPerm = &(s->perm[gSel][0]); \
+ gBase = &(s->base[gSel][0]); \
+ } \
+ groupPos--; \
+ zn = gMinlen; \
+ GET_BITS(label1, zvec, zn); \
+ while (1) { \
+ if (zn > 20 /* the longest code */) \
+ RETURN(BZ_DATA_ERROR); \
+ if (zvec <= gLimit[zn]) break; \
+ zn++; \
+ GET_BIT(label2, zj); \
+ zvec = (zvec << 1) | zj; \
+ }; \
+ if (zvec - gBase[zn] < 0 \
+ || zvec - gBase[zn] >= BZ_MAX_ALPHA_SIZE) \
+ RETURN(BZ_DATA_ERROR); \
+ lval = gPerm[zvec - gBase[zn]]; \
+}
+
+
+/*---------------------------------------------------*/
+Int32 BZ2_decompress ( DState* s )
+{
+ UChar uc;
+ Int32 retVal;
+ Int32 minLen, maxLen;
+ bz_stream* strm = s->strm;
+
+ /* stuff that needs to be saved/restored */
+ Int32 i;
+ Int32 j;
+ Int32 t;
+ Int32 alphaSize;
+ Int32 nGroups;
+ Int32 nSelectors;
+ Int32 EOB;
+ Int32 groupNo;
+ Int32 groupPos;
+ Int32 nextSym;
+ Int32 nblockMAX;
+ Int32 nblock;
+ Int32 es;
+ Int32 N;
+ Int32 curr;
+ Int32 zt;
+ Int32 zn;
+ Int32 zvec;
+ Int32 zj;
+ Int32 gSel;
+ Int32 gMinlen;
+ Int32* gLimit;
+ Int32* gBase;
+ Int32* gPerm;
+
+ if (s->state == BZ_X_MAGIC_1) {
+ /*initialise the save area*/
+ s->save_i = 0;
+ s->save_j = 0;
+ s->save_t = 0;
+ s->save_alphaSize = 0;
+ s->save_nGroups = 0;
+ s->save_nSelectors = 0;
+ s->save_EOB = 0;
+ s->save_groupNo = 0;
+ s->save_groupPos = 0;
+ s->save_nextSym = 0;
+ s->save_nblockMAX = 0;
+ s->save_nblock = 0;
+ s->save_es = 0;
+ s->save_N = 0;
+ s->save_curr = 0;
+ s->save_zt = 0;
+ s->save_zn = 0;
+ s->save_zvec = 0;
+ s->save_zj = 0;
+ s->save_gSel = 0;
+ s->save_gMinlen = 0;
+ s->save_gLimit = NULL;
+ s->save_gBase = NULL;
+ s->save_gPerm = NULL;
+ }
+
+ /*restore from the save area*/
+ i = s->save_i;
+ j = s->save_j;
+ t = s->save_t;
+ alphaSize = s->save_alphaSize;
+ nGroups = s->save_nGroups;
+ nSelectors = s->save_nSelectors;
+ EOB = s->save_EOB;
+ groupNo = s->save_groupNo;
+ groupPos = s->save_groupPos;
+ nextSym = s->save_nextSym;
+ nblockMAX = s->save_nblockMAX;
+ nblock = s->save_nblock;
+ es = s->save_es;
+ N = s->save_N;
+ curr = s->save_curr;
+ zt = s->save_zt;
+ zn = s->save_zn;
+ zvec = s->save_zvec;
+ zj = s->save_zj;
+ gSel = s->save_gSel;
+ gMinlen = s->save_gMinlen;
+ gLimit = s->save_gLimit;
+ gBase = s->save_gBase;
+ gPerm = s->save_gPerm;
+
+ retVal = BZ_OK;
+
+ switch (s->state) {
+
+ GET_UCHAR(BZ_X_MAGIC_1, uc);
+ if (uc != BZ_HDR_B) RETURN(BZ_DATA_ERROR_MAGIC);
+
+ GET_UCHAR(BZ_X_MAGIC_2, uc);
+ if (uc != BZ_HDR_Z) RETURN(BZ_DATA_ERROR_MAGIC);
+
+ GET_UCHAR(BZ_X_MAGIC_3, uc)
+ if (uc != BZ_HDR_h) RETURN(BZ_DATA_ERROR_MAGIC);
+
+ GET_BITS(BZ_X_MAGIC_4, s->blockSize100k, 8)
+ if (s->blockSize100k < (BZ_HDR_0 + 1) ||
+ s->blockSize100k > (BZ_HDR_0 + 9)) RETURN(BZ_DATA_ERROR_MAGIC);
+ s->blockSize100k -= BZ_HDR_0;
+
+ if (s->smallDecompress) {
+ s->ll16 = BZALLOC( s->blockSize100k * 100000 * sizeof(UInt16) );
+ s->ll4 = BZALLOC(
+ ((1 + s->blockSize100k * 100000) >> 1) * sizeof(UChar)
+ );
+ if (s->ll16 == NULL || s->ll4 == NULL) RETURN(BZ_MEM_ERROR);
+ } else {
+ s->tt = BZALLOC( s->blockSize100k * 100000 * sizeof(Int32) );
+ if (s->tt == NULL) RETURN(BZ_MEM_ERROR);
+ }
+
+ GET_UCHAR(BZ_X_BLKHDR_1, uc);
+
+ if (uc == 0x17) goto endhdr_2;
+ if (uc != 0x31) RETURN(BZ_DATA_ERROR);
+ GET_UCHAR(BZ_X_BLKHDR_2, uc);
+ if (uc != 0x41) RETURN(BZ_DATA_ERROR);
+ GET_UCHAR(BZ_X_BLKHDR_3, uc);
+ if (uc != 0x59) RETURN(BZ_DATA_ERROR);
+ GET_UCHAR(BZ_X_BLKHDR_4, uc);
+ if (uc != 0x26) RETURN(BZ_DATA_ERROR);
+ GET_UCHAR(BZ_X_BLKHDR_5, uc);
+ if (uc != 0x53) RETURN(BZ_DATA_ERROR);
+ GET_UCHAR(BZ_X_BLKHDR_6, uc);
+ if (uc != 0x59) RETURN(BZ_DATA_ERROR);
+
+ s->currBlockNo++;
+ if (s->verbosity >= 2)
+ VPrintf1 ( "\n [%d: huff+mtf ", s->currBlockNo );
+
+ s->storedBlockCRC = 0;
+ GET_UCHAR(BZ_X_BCRC_1, uc);
+ s->storedBlockCRC = (s->storedBlockCRC << 8) | ((UInt32)uc);
+ GET_UCHAR(BZ_X_BCRC_2, uc);
+ s->storedBlockCRC = (s->storedBlockCRC << 8) | ((UInt32)uc);
+ GET_UCHAR(BZ_X_BCRC_3, uc);
+ s->storedBlockCRC = (s->storedBlockCRC << 8) | ((UInt32)uc);
+ GET_UCHAR(BZ_X_BCRC_4, uc);
+ s->storedBlockCRC = (s->storedBlockCRC << 8) | ((UInt32)uc);
+
+ GET_BITS(BZ_X_RANDBIT, s->blockRandomised, 1);
+
+ s->origPtr = 0;
+ GET_UCHAR(BZ_X_ORIGPTR_1, uc);
+ s->origPtr = (s->origPtr << 8) | ((Int32)uc);
+ GET_UCHAR(BZ_X_ORIGPTR_2, uc);
+ s->origPtr = (s->origPtr << 8) | ((Int32)uc);
+ GET_UCHAR(BZ_X_ORIGPTR_3, uc);
+ s->origPtr = (s->origPtr << 8) | ((Int32)uc);
+
+ if (s->origPtr < 0)
+ RETURN(BZ_DATA_ERROR);
+ if (s->origPtr > 10 + 100000*s->blockSize100k)
+ RETURN(BZ_DATA_ERROR);
+
+ /*--- Receive the mapping table ---*/
+ for (i = 0; i < 16; i++) {
+ GET_BIT(BZ_X_MAPPING_1, uc);
+ if (uc == 1)
+ s->inUse16[i] = True; else
+ s->inUse16[i] = False;
+ }
+
+ for (i = 0; i < 256; i++) s->inUse[i] = False;
+
+ for (i = 0; i < 16; i++)
+ if (s->inUse16[i])
+ for (j = 0; j < 16; j++) {
+ GET_BIT(BZ_X_MAPPING_2, uc);
+ if (uc == 1) s->inUse[i * 16 + j] = True;
+ }
+ makeMaps_d ( s );
+ if (s->nInUse == 0) RETURN(BZ_DATA_ERROR);
+ alphaSize = s->nInUse+2;
+
+ /*--- Now the selectors ---*/
+ GET_BITS(BZ_X_SELECTOR_1, nGroups, 3);
+ if (nGroups < 2 || nGroups > 6) RETURN(BZ_DATA_ERROR);
+ GET_BITS(BZ_X_SELECTOR_2, nSelectors, 15);
+ if (nSelectors < 1) RETURN(BZ_DATA_ERROR);
+ for (i = 0; i < nSelectors; i++) {
+ j = 0;
+ while (True) {
+ GET_BIT(BZ_X_SELECTOR_3, uc);
+ if (uc == 0) break;
+ j++;
+ if (j >= nGroups) RETURN(BZ_DATA_ERROR);
+ }
+ s->selectorMtf[i] = j;
+ }
+
+ /*--- Undo the MTF values for the selectors. ---*/
+ {
+ UChar pos[BZ_N_GROUPS], tmp, v;
+ for (v = 0; v < nGroups; v++) pos[v] = v;
+
+ for (i = 0; i < nSelectors; i++) {
+ v = s->selectorMtf[i];
+ tmp = pos[v];
+ while (v > 0) { pos[v] = pos[v-1]; v--; }
+ pos[0] = tmp;
+ s->selector[i] = tmp;
+ }
+ }
+
+ /*--- Now the coding tables ---*/
+ for (t = 0; t < nGroups; t++) {
+ GET_BITS(BZ_X_CODING_1, curr, 5);
+ for (i = 0; i < alphaSize; i++) {
+ while (True) {
+ if (curr < 1 || curr > 20) RETURN(BZ_DATA_ERROR);
+ GET_BIT(BZ_X_CODING_2, uc);
+ if (uc == 0) break;
+ GET_BIT(BZ_X_CODING_3, uc);
+ if (uc == 0) curr++; else curr--;
+ }
+ s->len[t][i] = curr;
+ }
+ }
+
+ /*--- Create the Huffman decoding tables ---*/
+ for (t = 0; t < nGroups; t++) {
+ minLen = 32;
+ maxLen = 0;
+ for (i = 0; i < alphaSize; i++) {
+ if (s->len[t][i] > maxLen) maxLen = s->len[t][i];
+ if (s->len[t][i] < minLen) minLen = s->len[t][i];
+ }
+ BZ2_hbCreateDecodeTables (
+ &(s->limit[t][0]),
+ &(s->base[t][0]),
+ &(s->perm[t][0]),
+ &(s->len[t][0]),
+ minLen, maxLen, alphaSize
+ );
+ s->minLens[t] = minLen;
+ }
+
+ /*--- Now the MTF values ---*/
+
+ EOB = s->nInUse+1;
+ nblockMAX = 100000 * s->blockSize100k;
+ groupNo = -1;
+ groupPos = 0;
+
+ for (i = 0; i <= 255; i++) s->unzftab[i] = 0;
+
+ /*-- MTF init --*/
+ {
+ Int32 ii, jj, kk;
+ kk = MTFA_SIZE-1;
+ for (ii = 256 / MTFL_SIZE - 1; ii >= 0; ii--) {
+ for (jj = MTFL_SIZE-1; jj >= 0; jj--) {
+ s->mtfa[kk] = (UChar)(ii * MTFL_SIZE + jj);
+ kk--;
+ }
+ s->mtfbase[ii] = kk + 1;
+ }
+ }
+ /*-- end MTF init --*/
+
+ nblock = 0;
+ GET_MTF_VAL(BZ_X_MTF_1, BZ_X_MTF_2, nextSym);
+
+ while (True) {
+
+ if (nextSym == EOB) break;
+
+ if (nextSym == BZ_RUNA || nextSym == BZ_RUNB) {
+
+ es = -1;
+ N = 1;
+ do {
+ if (nextSym == BZ_RUNA) es = es + (0+1) * N; else
+ if (nextSym == BZ_RUNB) es = es + (1+1) * N;
+ N = N * 2;
+ GET_MTF_VAL(BZ_X_MTF_3, BZ_X_MTF_4, nextSym);
+ }
+ while (nextSym == BZ_RUNA || nextSym == BZ_RUNB);
+
+ es++;
+ uc = s->seqToUnseq[ s->mtfa[s->mtfbase[0]] ];
+ s->unzftab[uc] += es;
+
+ if (s->smallDecompress)
+ while (es > 0) {
+ if (nblock >= nblockMAX) RETURN(BZ_DATA_ERROR);
+ s->ll16[nblock] = (UInt16)uc;
+ nblock++;
+ es--;
+ }
+ else
+ while (es > 0) {
+ if (nblock >= nblockMAX) RETURN(BZ_DATA_ERROR);
+ s->tt[nblock] = (UInt32)uc;
+ nblock++;
+ es--;
+ };
+
+ continue;
+
+ } else {
+
+ if (nblock >= nblockMAX) RETURN(BZ_DATA_ERROR);
+
+ /*-- uc = MTF ( nextSym-1 ) --*/
+ {
+ Int32 ii, jj, kk, pp, lno, off;
+ UInt32 nn;
+ nn = (UInt32)(nextSym - 1);
+
+ if (nn < MTFL_SIZE) {
+ /* avoid general-case expense */
+ pp = s->mtfbase[0];
+ uc = s->mtfa[pp+nn];
+ while (nn > 3) {
+ Int32 z = pp+nn;
+ s->mtfa[(z) ] = s->mtfa[(z)-1];
+ s->mtfa[(z)-1] = s->mtfa[(z)-2];
+ s->mtfa[(z)-2] = s->mtfa[(z)-3];
+ s->mtfa[(z)-3] = s->mtfa[(z)-4];
+ nn -= 4;
+ }
+ while (nn > 0) {
+ s->mtfa[(pp+nn)] = s->mtfa[(pp+nn)-1]; nn--;
+ };
+ s->mtfa[pp] = uc;
+ } else {
+ /* general case */
+ lno = nn / MTFL_SIZE;
+ off = nn % MTFL_SIZE;
+ pp = s->mtfbase[lno] + off;
+ uc = s->mtfa[pp];
+ while (pp > s->mtfbase[lno]) {
+ s->mtfa[pp] = s->mtfa[pp-1]; pp--;
+ };
+ s->mtfbase[lno]++;
+ while (lno > 0) {
+ s->mtfbase[lno]--;
+ s->mtfa[s->mtfbase[lno]]
+ = s->mtfa[s->mtfbase[lno-1] + MTFL_SIZE - 1];
+ lno--;
+ }
+ s->mtfbase[0]--;
+ s->mtfa[s->mtfbase[0]] = uc;
+ if (s->mtfbase[0] == 0) {
+ kk = MTFA_SIZE-1;
+ for (ii = 256 / MTFL_SIZE-1; ii >= 0; ii--) {
+ for (jj = MTFL_SIZE-1; jj >= 0; jj--) {
+ s->mtfa[kk] = s->mtfa[s->mtfbase[ii] + jj];
+ kk--;
+ }
+ s->mtfbase[ii] = kk + 1;
+ }
+ }
+ }
+ }
+ /*-- end uc = MTF ( nextSym-1 ) --*/
+
+ s->unzftab[s->seqToUnseq[uc]]++;
+ if (s->smallDecompress)
+ s->ll16[nblock] = (UInt16)(s->seqToUnseq[uc]); else
+ s->tt[nblock] = (UInt32)(s->seqToUnseq[uc]);
+ nblock++;
+
+ GET_MTF_VAL(BZ_X_MTF_5, BZ_X_MTF_6, nextSym);
+ continue;
+ }
+ }
+
+ /* Now we know what nblock is, we can do a better sanity
+ check on s->origPtr.
+ */
+ if (s->origPtr < 0 || s->origPtr >= nblock)
+ RETURN(BZ_DATA_ERROR);
+
+ /*-- Set up cftab to facilitate generation of T^(-1) --*/
+ s->cftab[0] = 0;
+ for (i = 1; i <= 256; i++) s->cftab[i] = s->unzftab[i-1];
+ for (i = 1; i <= 256; i++) s->cftab[i] += s->cftab[i-1];
+ for (i = 0; i <= 256; i++) {
+ if (s->cftab[i] < 0 || s->cftab[i] > nblock) {
+ /* s->cftab[i] can legitimately be == nblock */
+ RETURN(BZ_DATA_ERROR);
+ }
+ }
+
+ s->state_out_len = 0;
+ s->state_out_ch = 0;
+ BZ_INITIALISE_CRC ( s->calculatedBlockCRC );
+ s->state = BZ_X_OUTPUT;
+ if (s->verbosity >= 2) VPrintf0 ( "rt+rld" );
+
+ if (s->smallDecompress) {
+
+ /*-- Make a copy of cftab, used in generation of T --*/
+ for (i = 0; i <= 256; i++) s->cftabCopy[i] = s->cftab[i];
+
+ /*-- compute the T vector --*/
+ for (i = 0; i < nblock; i++) {
+ uc = (UChar)(s->ll16[i]);
+ SET_LL(i, s->cftabCopy[uc]);
+ s->cftabCopy[uc]++;
+ }
+
+ /*-- Compute T^(-1) by pointer reversal on T --*/
+ i = s->origPtr;
+ j = GET_LL(i);
+ do {
+ Int32 tmp = GET_LL(j);
+ SET_LL(j, i);
+ i = j;
+ j = tmp;
+ }
+ while (i != s->origPtr);
+
+ s->tPos = s->origPtr;
+ s->nblock_used = 0;
+ if (s->blockRandomised) {
+ BZ_RAND_INIT_MASK;
+ BZ_GET_SMALL(s->k0); s->nblock_used++;
+ BZ_RAND_UPD_MASK; s->k0 ^= BZ_RAND_MASK;
+ } else {
+ BZ_GET_SMALL(s->k0); s->nblock_used++;
+ }
+
+ } else {
+
+ /*-- compute the T^(-1) vector --*/
+ for (i = 0; i < nblock; i++) {
+ uc = (UChar)(s->tt[i] & 0xff);
+ s->tt[s->cftab[uc]] |= (i << 8);
+ s->cftab[uc]++;
+ }
+
+ s->tPos = s->tt[s->origPtr] >> 8;
+ s->nblock_used = 0;
+ if (s->blockRandomised) {
+ BZ_RAND_INIT_MASK;
+ BZ_GET_FAST(s->k0); s->nblock_used++;
+ BZ_RAND_UPD_MASK; s->k0 ^= BZ_RAND_MASK;
+ } else {
+ BZ_GET_FAST(s->k0); s->nblock_used++;
+ }
+
+ }
+
+ RETURN(BZ_OK);
+
+
+
+ endhdr_2:
+
+ GET_UCHAR(BZ_X_ENDHDR_2, uc);
+ if (uc != 0x72) RETURN(BZ_DATA_ERROR);
+ GET_UCHAR(BZ_X_ENDHDR_3, uc);
+ if (uc != 0x45) RETURN(BZ_DATA_ERROR);
+ GET_UCHAR(BZ_X_ENDHDR_4, uc);
+ if (uc != 0x38) RETURN(BZ_DATA_ERROR);
+ GET_UCHAR(BZ_X_ENDHDR_5, uc);
+ if (uc != 0x50) RETURN(BZ_DATA_ERROR);
+ GET_UCHAR(BZ_X_ENDHDR_6, uc);
+ if (uc != 0x90) RETURN(BZ_DATA_ERROR);
+
+ s->storedCombinedCRC = 0;
+ GET_UCHAR(BZ_X_CCRC_1, uc);
+ s->storedCombinedCRC = (s->storedCombinedCRC << 8) | ((UInt32)uc);
+ GET_UCHAR(BZ_X_CCRC_2, uc);
+ s->storedCombinedCRC = (s->storedCombinedCRC << 8) | ((UInt32)uc);
+ GET_UCHAR(BZ_X_CCRC_3, uc);
+ s->storedCombinedCRC = (s->storedCombinedCRC << 8) | ((UInt32)uc);
+ GET_UCHAR(BZ_X_CCRC_4, uc);
+ s->storedCombinedCRC = (s->storedCombinedCRC << 8) | ((UInt32)uc);
+
+ s->state = BZ_X_IDLE;
+ RETURN(BZ_STREAM_END);
+
+ default: AssertH ( False, 4001 );
+ }
+
+ AssertH ( False, 4002 );
+
+ save_state_and_return:
+
+ s->save_i = i;
+ s->save_j = j;
+ s->save_t = t;
+ s->save_alphaSize = alphaSize;
+ s->save_nGroups = nGroups;
+ s->save_nSelectors = nSelectors;
+ s->save_EOB = EOB;
+ s->save_groupNo = groupNo;
+ s->save_groupPos = groupPos;
+ s->save_nextSym = nextSym;
+ s->save_nblockMAX = nblockMAX;
+ s->save_nblock = nblock;
+ s->save_es = es;
+ s->save_N = N;
+ s->save_curr = curr;
+ s->save_zt = zt;
+ s->save_zn = zn;
+ s->save_zvec = zvec;
+ s->save_zj = zj;
+ s->save_gSel = gSel;
+ s->save_gMinlen = gMinlen;
+ s->save_gLimit = gLimit;
+ s->save_gBase = gBase;
+ s->save_gPerm = gPerm;
+
+ return retVal;
+}
+
+
+/*-------------------------------------------------------------*/
+/*--- end decompress.c ---*/
+/*-------------------------------------------------------------*/
diff --git a/src/misc/bzlib/huffman.c b/src/misc/bzlib/huffman.c
new file mode 100644
index 00000000..87e79e38
--- /dev/null
+++ b/src/misc/bzlib/huffman.c
@@ -0,0 +1,205 @@
+
+/*-------------------------------------------------------------*/
+/*--- Huffman coding low-level stuff ---*/
+/*--- huffman.c ---*/
+/*-------------------------------------------------------------*/
+
+/* ------------------------------------------------------------------
+ This file is part of bzip2/libbzip2, a program and library for
+ lossless, block-sorting data compression.
+
+ bzip2/libbzip2 version 1.0.5 of 10 December 2007
+ Copyright (C) 1996-2007 Julian Seward <jseward@bzip.org>
+
+ Please read the WARNING, DISCLAIMER and PATENTS sections in the
+ README file.
+
+ This program is released under the terms of the license contained
+ in the file LICENSE.
+ ------------------------------------------------------------------ */
+
+
+#include "bzlib_private.h"
+
+/*---------------------------------------------------*/
+#define WEIGHTOF(zz0) ((zz0) & 0xffffff00)
+#define DEPTHOF(zz1) ((zz1) & 0x000000ff)
+#define MYMAX(zz2,zz3) ((zz2) > (zz3) ? (zz2) : (zz3))
+
+#define ADDWEIGHTS(zw1,zw2) \
+ (WEIGHTOF(zw1)+WEIGHTOF(zw2)) | \
+ (1 + MYMAX(DEPTHOF(zw1),DEPTHOF(zw2)))
+
+#define UPHEAP(z) \
+{ \
+ Int32 zz, tmp; \
+ zz = z; tmp = heap[zz]; \
+ while (weight[tmp] < weight[heap[zz >> 1]]) { \
+ heap[zz] = heap[zz >> 1]; \
+ zz >>= 1; \
+ } \
+ heap[zz] = tmp; \
+}
+
+#define DOWNHEAP(z) \
+{ \
+ Int32 zz, yy, tmp; \
+ zz = z; tmp = heap[zz]; \
+ while (True) { \
+ yy = zz << 1; \
+ if (yy > nHeap) break; \
+ if (yy < nHeap && \
+ weight[heap[yy+1]] < weight[heap[yy]]) \
+ yy++; \
+ if (weight[tmp] < weight[heap[yy]]) break; \
+ heap[zz] = heap[yy]; \
+ zz = yy; \
+ } \
+ heap[zz] = tmp; \
+}
+
+
+/*---------------------------------------------------*/
+void BZ2_hbMakeCodeLengths ( UChar *len,
+ Int32 *freq,
+ Int32 alphaSize,
+ Int32 maxLen )
+{
+ /*--
+ Nodes and heap entries run from 1. Entry 0
+ for both the heap and nodes is a sentinel.
+ --*/
+ Int32 nNodes, nHeap, n1, n2, i, j, k;
+ Bool tooLong;
+
+ Int32 heap [ BZ_MAX_ALPHA_SIZE + 2 ];
+ Int32 weight [ BZ_MAX_ALPHA_SIZE * 2 ];
+ Int32 parent [ BZ_MAX_ALPHA_SIZE * 2 ];
+
+ for (i = 0; i < alphaSize; i++)
+ weight[i+1] = (freq[i] == 0 ? 1 : freq[i]) << 8;
+
+ while (True) {
+
+ nNodes = alphaSize;
+ nHeap = 0;
+
+ heap[0] = 0;
+ weight[0] = 0;
+ parent[0] = -2;
+
+ for (i = 1; i <= alphaSize; i++) {
+ parent[i] = -1;
+ nHeap++;
+ heap[nHeap] = i;
+ UPHEAP(nHeap);
+ }
+
+ AssertH( nHeap < (BZ_MAX_ALPHA_SIZE+2), 2001 );
+
+ while (nHeap > 1) {
+ n1 = heap[1]; heap[1] = heap[nHeap]; nHeap--; DOWNHEAP(1);
+ n2 = heap[1]; heap[1] = heap[nHeap]; nHeap--; DOWNHEAP(1);
+ nNodes++;
+ parent[n1] = parent[n2] = nNodes;
+ weight[nNodes] = ADDWEIGHTS(weight[n1], weight[n2]);
+ parent[nNodes] = -1;
+ nHeap++;
+ heap[nHeap] = nNodes;
+ UPHEAP(nHeap);
+ }
+
+ AssertH( nNodes < (BZ_MAX_ALPHA_SIZE * 2), 2002 );
+
+ tooLong = False;
+ for (i = 1; i <= alphaSize; i++) {
+ j = 0;
+ k = i;
+ while (parent[k] >= 0) { k = parent[k]; j++; }
+ len[i-1] = j;
+ if (j > maxLen) tooLong = True;
+ }
+
+ if (! tooLong) break;
+
+ /* 17 Oct 04: keep-going condition for the following loop used
+ to be 'i < alphaSize', which missed the last element,
+ theoretically leading to the possibility of the compressor
+ looping. However, this count-scaling step is only needed if
+ one of the generated Huffman code words is longer than
+ maxLen, which up to and including version 1.0.2 was 20 bits,
+ which is extremely unlikely. In version 1.0.3 maxLen was
+ changed to 17 bits, which has minimal effect on compression
+ ratio, but does mean this scaling step is used from time to
+ time, enough to verify that it works.
+
+ This means that bzip2-1.0.3 and later will only produce
+ Huffman codes with a maximum length of 17 bits. However, in
+ order to preserve backwards compatibility with bitstreams
+ produced by versions pre-1.0.3, the decompressor must still
+ handle lengths of up to 20. */
+
+ for (i = 1; i <= alphaSize; i++) {
+ j = weight[i] >> 8;
+ j = 1 + (j / 2);
+ weight[i] = j << 8;
+ }
+ }
+}
+
+
+/*---------------------------------------------------*/
+void BZ2_hbAssignCodes ( Int32 *code,
+ UChar *length,
+ Int32 minLen,
+ Int32 maxLen,
+ Int32 alphaSize )
+{
+ Int32 n, vec, i;
+
+ vec = 0;
+ for (n = minLen; n <= maxLen; n++) {
+ for (i = 0; i < alphaSize; i++)
+ if (length[i] == n) { code[i] = vec; vec++; };
+ vec <<= 1;
+ }
+}
+
+
+/*---------------------------------------------------*/
+void BZ2_hbCreateDecodeTables ( Int32 *limit,
+ Int32 *base,
+ Int32 *perm,
+ UChar *length,
+ Int32 minLen,
+ Int32 maxLen,
+ Int32 alphaSize )
+{
+ Int32 pp, i, j, vec;
+
+ pp = 0;
+ for (i = minLen; i <= maxLen; i++)
+ for (j = 0; j < alphaSize; j++)
+ if (length[j] == i) { perm[pp] = j; pp++; };
+
+ for (i = 0; i < BZ_MAX_CODE_LEN; i++) base[i] = 0;
+ for (i = 0; i < alphaSize; i++) base[length[i]+1]++;
+
+ for (i = 1; i < BZ_MAX_CODE_LEN; i++) base[i] += base[i-1];
+
+ for (i = 0; i < BZ_MAX_CODE_LEN; i++) limit[i] = 0;
+ vec = 0;
+
+ for (i = minLen; i <= maxLen; i++) {
+ vec += (base[i+1] - base[i]);
+ limit[i] = vec-1;
+ vec <<= 1;
+ }
+ for (i = minLen + 1; i <= maxLen; i++)
+ base[i] = ((limit[i-1] + 1) << 1) - base[i];
+}
+
+
+/*-------------------------------------------------------------*/
+/*--- end huffman.c ---*/
+/*-------------------------------------------------------------*/
diff --git a/src/misc/bzlib/link.txt b/src/misc/bzlib/link.txt
new file mode 100644
index 00000000..cdfca2df
--- /dev/null
+++ b/src/misc/bzlib/link.txt
@@ -0,0 +1,2 @@
+http://www.bzip.org
+http://gnuwin32.sourceforge.net/packages/bzip2.htm
diff --git a/src/misc/bzlib/manual.html b/src/misc/bzlib/manual.html
new file mode 100644
index 00000000..68259b27
--- /dev/null
+++ b/src/misc/bzlib/manual.html
@@ -0,0 +1,2540 @@
+<html>
+<head>
+<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
+<title>bzip2 and libbzip2, version 1.0.5</title>
+<meta name="generator" content="DocBook XSL Stylesheets V1.69.1">
+<style type="text/css" media="screen">/* Colours:
+#74240f dark brown h1, h2, h3, h4
+#336699 medium blue links
+#339999 turquoise link hover colour
+#202020 almost black general text
+#761596 purple md5sum text
+#626262 dark gray pre border
+#eeeeee very light gray pre background
+#f2f2f9 very light blue nav table background
+#3366cc medium blue nav table border
+*/
+
+a, a:link, a:visited, a:active { color: #336699; }
+a:hover { color: #339999; }
+
+body { font: 80%/126% sans-serif; }
+h1, h2, h3, h4 { color: #74240f; }
+
+dt { color: #336699; font-weight: bold }
+dd {
+ margin-left: 1.5em;
+ padding-bottom: 0.8em;
+}
+
+/* -- ruler -- */
+div.hr_blue {
+ height: 3px;
+ background:#ffffff url("/images/hr_blue.png") repeat-x; }
+div.hr_blue hr { display:none; }
+
+/* release styles */
+#release p { margin-top: 0.4em; }
+#release .md5sum { color: #761596; }
+
+
+/* ------ styles for docs|manuals|howto ------ */
+/* -- lists -- */
+ul {
+ margin: 0px 4px 16px 16px;
+ padding: 0px;
+ list-style: url("/images/li-blue.png");
+}
+ul li {
+ margin-bottom: 10px;
+}
+ul ul {
+ list-style-type: none;
+ list-style-image: none;
+ margin-left: 0px;
+}
+
+/* header / footer nav tables */
+table.nav {
+ border: solid 1px #3366cc;
+ background: #f2f2f9;
+ background-color: #f2f2f9;
+ margin-bottom: 0.5em;
+}
+/* don't have underlined links in chunked nav menus */
+table.nav a { text-decoration: none; }
+table.nav a:hover { text-decoration: underline; }
+table.nav td { font-size: 85%; }
+
+code, tt, pre { font-size: 120%; }
+code, tt { color: #761596; }
+
+div.literallayout, pre.programlisting, pre.screen {
+ color: #000000;
+ padding: 0.5em;
+ background: #eeeeee;
+ border: 1px solid #626262;
+ background-color: #eeeeee;
+ margin: 4px 0px 4px 0px;
+}
+</style>
+</head>
+<body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF"><div class="book" lang="en">
+<div class="titlepage">
+<div>
+<div><h1 class="title">
+<a name="userman"></a>bzip2 and libbzip2, version 1.0.5</h1></div>
+<div><h2 class="subtitle">A program and library for data compression</h2></div>
+<div><div class="authorgroup"><div class="author">
+<h3 class="author">
+<span class="firstname">Julian</span> <span class="surname">Seward</span>
+</h3>
+<div class="affiliation"><span class="orgname">http://www.bzip.org<br></span></div>
+</div></div></div>
+<div><p class="releaseinfo">Version 1.0.5 of 10 December 2007</p></div>
+<div><p class="copyright">Copyright © 1996-2007 Julian Seward</p></div>
+<div><div class="legalnotice">
+<a name="id2499833"></a><p>This program, <code class="computeroutput">bzip2</code>, the
+ associated library <code class="computeroutput">libbzip2</code>, and
+ all documentation, are copyright © 1996-2007 Julian Seward.
+ All rights reserved.</p>
+<p>Redistribution and use in source and binary forms, with
+ or without modification, are permitted provided that the
+ following conditions are met:</p>
+<div class="itemizedlist"><ul type="bullet">
+<li style="list-style-type: disc"><p>Redistributions of source code must retain the
+ above copyright notice, this list of conditions and the
+ following disclaimer.</p></li>
+<li style="list-style-type: disc"><p>The origin of this software must not be
+ misrepresented; you must not claim that you wrote the original
+ software. If you use this software in a product, an
+ acknowledgment in the product documentation would be
+ appreciated but is not required.</p></li>
+<li style="list-style-type: disc"><p>Altered source versions must be plainly marked
+ as such, and must not be misrepresented as being the original
+ software.</p></li>
+<li style="list-style-type: disc"><p>The name of the author may not be used to
+ endorse or promote products derived from this software without
+ specific prior written permission.</p></li>
+</ul></div>
+<p>THIS SOFTWARE IS PROVIDED BY THE AUTHOR "AS IS" AND ANY
+ EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+ PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ THE POSSIBILITY OF SUCH DAMAGE.</p>
+<p>PATENTS: To the best of my knowledge,
+ <code class="computeroutput">bzip2</code> and
+ <code class="computeroutput">libbzip2</code> do not use any patented
+ algorithms. However, I do not have the resources to carry
+ out a patent search. Therefore I cannot give any guarantee of
+ the above statement.
+ </p>
+</div></div>
+</div>
+<hr>
+</div>
+<div class="toc">
+<p><b>Table of Contents</b></p>
+<dl>
+<dt><span class="chapter"><a href="#intro">1. Introduction</a></span></dt>
+<dt><span class="chapter"><a href="#using">2. How to use bzip2</a></span></dt>
+<dd><dl>
+<dt><span class="sect1"><a href="#name">2.1. NAME</a></span></dt>
+<dt><span class="sect1"><a href="#synopsis">2.2. SYNOPSIS</a></span></dt>
+<dt><span class="sect1"><a href="#description">2.3. DESCRIPTION</a></span></dt>
+<dt><span class="sect1"><a href="#options">2.4. OPTIONS</a></span></dt>
+<dt><span class="sect1"><a href="#memory-management">2.5. MEMORY MANAGEMENT</a></span></dt>
+<dt><span class="sect1"><a href="#recovering">2.6. RECOVERING DATA FROM DAMAGED FILES</a></span></dt>
+<dt><span class="sect1"><a href="#performance">2.7. PERFORMANCE NOTES</a></span></dt>
+<dt><span class="sect1"><a href="#caveats">2.8. CAVEATS</a></span></dt>
+<dt><span class="sect1"><a href="#author">2.9. AUTHOR</a></span></dt>
+</dl></dd>
+<dt><span class="chapter"><a href="#libprog">3.
+Programming with <code class="computeroutput">libbzip2</code>
+</a></span></dt>
+<dd><dl>
+<dt><span class="sect1"><a href="#top-level">3.1. Top-level structure</a></span></dt>
+<dd><dl>
+<dt><span class="sect2"><a href="#ll-summary">3.1.1. Low-level summary</a></span></dt>
+<dt><span class="sect2"><a href="#hl-summary">3.1.2. High-level summary</a></span></dt>
+<dt><span class="sect2"><a href="#util-fns-summary">3.1.3. Utility functions summary</a></span></dt>
+</dl></dd>
+<dt><span class="sect1"><a href="#err-handling">3.2. Error handling</a></span></dt>
+<dt><span class="sect1"><a href="#low-level">3.3. Low-level interface</a></span></dt>
+<dd><dl>
+<dt><span class="sect2"><a href="#bzcompress-init">3.3.1. <code class="computeroutput">BZ2_bzCompressInit</code></a></span></dt>
+<dt><span class="sect2"><a href="#bzCompress">3.3.2. <code class="computeroutput">BZ2_bzCompress</code></a></span></dt>
+<dt><span class="sect2"><a href="#bzCompress-end">3.3.3. <code class="computeroutput">BZ2_bzCompressEnd</code></a></span></dt>
+<dt><span class="sect2"><a href="#bzDecompress-init">3.3.4. <code class="computeroutput">BZ2_bzDecompressInit</code></a></span></dt>
+<dt><span class="sect2"><a href="#bzDecompress">3.3.5. <code class="computeroutput">BZ2_bzDecompress</code></a></span></dt>
+<dt><span class="sect2"><a href="#bzDecompress-end">3.3.6. <code class="computeroutput">BZ2_bzDecompressEnd</code></a></span></dt>
+</dl></dd>
+<dt><span class="sect1"><a href="#hl-interface">3.4. High-level interface</a></span></dt>
+<dd><dl>
+<dt><span class="sect2"><a href="#bzreadopen">3.4.1. <code class="computeroutput">BZ2_bzReadOpen</code></a></span></dt>
+<dt><span class="sect2"><a href="#bzread">3.4.2. <code class="computeroutput">BZ2_bzRead</code></a></span></dt>
+<dt><span class="sect2"><a href="#bzreadgetunused">3.4.3. <code class="computeroutput">BZ2_bzReadGetUnused</code></a></span></dt>
+<dt><span class="sect2"><a href="#bzreadclose">3.4.4. <code class="computeroutput">BZ2_bzReadClose</code></a></span></dt>
+<dt><span class="sect2"><a href="#bzwriteopen">3.4.5. <code class="computeroutput">BZ2_bzWriteOpen</code></a></span></dt>
+<dt><span class="sect2"><a href="#bzwrite">3.4.6. <code class="computeroutput">BZ2_bzWrite</code></a></span></dt>
+<dt><span class="sect2"><a href="#bzwriteclose">3.4.7. <code class="computeroutput">BZ2_bzWriteClose</code></a></span></dt>
+<dt><span class="sect2"><a href="#embed">3.4.8. Handling embedded compressed data streams</a></span></dt>
+<dt><span class="sect2"><a href="#std-rdwr">3.4.9. Standard file-reading/writing code</a></span></dt>
+</dl></dd>
+<dt><span class="sect1"><a href="#util-fns">3.5. Utility functions</a></span></dt>
+<dd><dl>
+<dt><span class="sect2"><a href="#bzbufftobuffcompress">3.5.1. <code class="computeroutput">BZ2_bzBuffToBuffCompress</code></a></span></dt>
+<dt><span class="sect2"><a href="#bzbufftobuffdecompress">3.5.2. <code class="computeroutput">BZ2_bzBuffToBuffDecompress</code></a></span></dt>
+</dl></dd>
+<dt><span class="sect1"><a href="#zlib-compat">3.6. <code class="computeroutput">zlib</code> compatibility functions</a></span></dt>
+<dt><span class="sect1"><a href="#stdio-free">3.7. Using the library in a <code class="computeroutput">stdio</code>-free environment</a></span></dt>
+<dd><dl>
+<dt><span class="sect2"><a href="#stdio-bye">3.7.1. Getting rid of <code class="computeroutput">stdio</code></a></span></dt>
+<dt><span class="sect2"><a href="#critical-error">3.7.2. Critical error handling</a></span></dt>
+</dl></dd>
+<dt><span class="sect1"><a href="#win-dll">3.8. Making a Windows DLL</a></span></dt>
+</dl></dd>
+<dt><span class="chapter"><a href="#misc">4. Miscellanea</a></span></dt>
+<dd><dl>
+<dt><span class="sect1"><a href="#limits">4.1. Limitations of the compressed file format</a></span></dt>
+<dt><span class="sect1"><a href="#port-issues">4.2. Portability issues</a></span></dt>
+<dt><span class="sect1"><a href="#bugs">4.3. Reporting bugs</a></span></dt>
+<dt><span class="sect1"><a href="#package">4.4. Did you get the right package?</a></span></dt>
+<dt><span class="sect1"><a href="#reading">4.5. Further Reading</a></span></dt>
+</dl></dd>
+</dl>
+</div>
+<div class="chapter" lang="en">
+<div class="titlepage"><div><div><h2 class="title">
+<a name="intro"></a>1. Introduction</h2></div></div></div>
+<p><code class="computeroutput">bzip2</code> compresses files
+using the Burrows-Wheeler block-sorting text compression
+algorithm, and Huffman coding. Compression is generally
+considerably better than that achieved by more conventional
+LZ77/LZ78-based compressors, and approaches the performance of
+the PPM family of statistical compressors.</p>
+<p><code class="computeroutput">bzip2</code> is built on top of
+<code class="computeroutput">libbzip2</code>, a flexible library for
+handling compressed data in the
+<code class="computeroutput">bzip2</code> format. This manual
+describes both how to use the program and how to work with the
+library interface. Most of the manual is devoted to this
+library, not the program, which is good news if your interest is
+only in the program.</p>
+<div class="itemizedlist"><ul type="bullet">
+<li style="list-style-type: disc"><p><a href="#using">How to use bzip2</a> describes how to use
+ <code class="computeroutput">bzip2</code>; this is the only part
+ you need to read if you just want to know how to operate the
+ program.</p></li>
+<li style="list-style-type: disc"><p><a href="#libprog">Programming with libbzip2</a> describes the
+ programming interfaces in detail, and</p></li>
+<li style="list-style-type: disc"><p><a href="#misc">Miscellanea</a> records some
+ miscellaneous notes which I thought ought to be recorded
+ somewhere.</p></li>
+</ul></div>
+</div>
+<div class="chapter" lang="en">
+<div class="titlepage"><div><div><h2 class="title">
+<a name="using"></a>2. How to use bzip2</h2></div></div></div>
+<div class="toc">
+<p><b>Table of Contents</b></p>
+<dl>
+<dt><span class="sect1"><a href="#name">2.1. NAME</a></span></dt>
+<dt><span class="sect1"><a href="#synopsis">2.2. SYNOPSIS</a></span></dt>
+<dt><span class="sect1"><a href="#description">2.3. DESCRIPTION</a></span></dt>
+<dt><span class="sect1"><a href="#options">2.4. OPTIONS</a></span></dt>
+<dt><span class="sect1"><a href="#memory-management">2.5. MEMORY MANAGEMENT</a></span></dt>
+<dt><span class="sect1"><a href="#recovering">2.6. RECOVERING DATA FROM DAMAGED FILES</a></span></dt>
+<dt><span class="sect1"><a href="#performance">2.7. PERFORMANCE NOTES</a></span></dt>
+<dt><span class="sect1"><a href="#caveats">2.8. CAVEATS</a></span></dt>
+<dt><span class="sect1"><a href="#author">2.9. AUTHOR</a></span></dt>
+</dl>
+</div>
+<p>This chapter contains a copy of the
+<code class="computeroutput">bzip2</code> man page, and nothing
+else.</p>
+<div class="sect1" lang="en">
+<div class="titlepage"><div><div><h2 class="title" style="clear: both">
+<a name="name"></a>2.1. NAME</h2></div></div></div>
+<div class="itemizedlist"><ul type="bullet">
+<li style="list-style-type: disc"><p><code class="computeroutput">bzip2</code>,
+ <code class="computeroutput">bunzip2</code> - a block-sorting file
+ compressor, v1.0.4</p></li>
+<li style="list-style-type: disc"><p><code class="computeroutput">bzcat</code> -
+ decompresses files to stdout</p></li>
+<li style="list-style-type: disc"><p><code class="computeroutput">bzip2recover</code> -
+ recovers data from damaged bzip2 files</p></li>
+</ul></div>
+</div>
+<div class="sect1" lang="en">
+<div class="titlepage"><div><div><h2 class="title" style="clear: both">
+<a name="synopsis"></a>2.2. SYNOPSIS</h2></div></div></div>
+<div class="itemizedlist"><ul type="bullet">
+<li style="list-style-type: disc"><p><code class="computeroutput">bzip2</code> [
+ -cdfkqstvzVL123456789 ] [ filenames ... ]</p></li>
+<li style="list-style-type: disc"><p><code class="computeroutput">bunzip2</code> [
+ -fkvsVL ] [ filenames ... ]</p></li>
+<li style="list-style-type: disc"><p><code class="computeroutput">bzcat</code> [ -s ] [
+ filenames ... ]</p></li>
+<li style="list-style-type: disc"><p><code class="computeroutput">bzip2recover</code>
+ filename</p></li>
+</ul></div>
+</div>
+<div class="sect1" lang="en">
+<div class="titlepage"><div><div><h2 class="title" style="clear: both">
+<a name="description"></a>2.3. DESCRIPTION</h2></div></div></div>
+<p><code class="computeroutput">bzip2</code> compresses files
+using the Burrows-Wheeler block sorting text compression
+algorithm, and Huffman coding. Compression is generally
+considerably better than that achieved by more conventional
+LZ77/LZ78-based compressors, and approaches the performance of
+the PPM family of statistical compressors.</p>
+<p>The command-line options are deliberately very similar to
+those of GNU <code class="computeroutput">gzip</code>, but they are
+not identical.</p>
+<p><code class="computeroutput">bzip2</code> expects a list of
+file names to accompany the command-line flags. Each file is
+replaced by a compressed version of itself, with the name
+<code class="computeroutput">original_name.bz2</code>. Each
+compressed file has the same modification date, permissions, and,
+when possible, ownership as the corresponding original, so that
+these properties can be correctly restored at decompression time.
+File name handling is naive in the sense that there is no
+mechanism for preserving original file names, permissions,
+ownerships or dates in filesystems which lack these concepts, or
+have serious file name length restrictions, such as
+MS-DOS.</p>
+<p><code class="computeroutput">bzip2</code> and
+<code class="computeroutput">bunzip2</code> will by default not
+overwrite existing files. If you want this to happen, specify
+the <code class="computeroutput">-f</code> flag.</p>
+<p>If no file names are specified,
+<code class="computeroutput">bzip2</code> compresses from standard
+input to standard output. In this case,
+<code class="computeroutput">bzip2</code> will decline to write
+compressed output to a terminal, as this would be entirely
+incomprehensible and therefore pointless.</p>
+<p><code class="computeroutput">bunzip2</code> (or
+<code class="computeroutput">bzip2 -d</code>) decompresses all
+specified files. Files which were not created by
+<code class="computeroutput">bzip2</code> will be detected and
+ignored, and a warning issued.
+<code class="computeroutput">bzip2</code> attempts to guess the
+filename for the decompressed file from that of the compressed
+file as follows:</p>
+<div class="itemizedlist"><ul type="bullet">
+<li style="list-style-type: disc"><p><code class="computeroutput">filename.bz2 </code>
+ becomes
+ <code class="computeroutput">filename</code></p></li>
+<li style="list-style-type: disc"><p><code class="computeroutput">filename.bz </code>
+ becomes
+ <code class="computeroutput">filename</code></p></li>
+<li style="list-style-type: disc"><p><code class="computeroutput">filename.tbz2</code>
+ becomes
+ <code class="computeroutput">filename.tar</code></p></li>
+<li style="list-style-type: disc"><p><code class="computeroutput">filename.tbz </code>
+ becomes
+ <code class="computeroutput">filename.tar</code></p></li>
+<li style="list-style-type: disc"><p><code class="computeroutput">anyothername </code>
+ becomes
+ <code class="computeroutput">anyothername.out</code></p></li>
+</ul></div>
+<p>If the file does not end in one of the recognised endings,
+<code class="computeroutput">.bz2</code>,
+<code class="computeroutput">.bz</code>,
+<code class="computeroutput">.tbz2</code> or
+<code class="computeroutput">.tbz</code>,
+<code class="computeroutput">bzip2</code> complains that it cannot
+guess the name of the original file, and uses the original name
+with <code class="computeroutput">.out</code> appended.</p>
+<p>As with compression, supplying no filenames causes
+decompression from standard input to standard output.</p>
+<p><code class="computeroutput">bunzip2</code> will correctly
+decompress a file which is the concatenation of two or more
+compressed files. The result is the concatenation of the
+corresponding uncompressed files. Integrity testing
+(<code class="computeroutput">-t</code>) of concatenated compressed
+files is also supported.</p>
+<p>You can also compress or decompress files to the standard
+output by giving the <code class="computeroutput">-c</code> flag.
+Multiple files may be compressed and decompressed like this. The
+resulting outputs are fed sequentially to stdout. Compression of
+multiple files in this manner generates a stream containing
+multiple compressed file representations. Such a stream can be
+decompressed correctly only by
+<code class="computeroutput">bzip2</code> version 0.9.0 or later.
+Earlier versions of <code class="computeroutput">bzip2</code> will
+stop after decompressing the first file in the stream.</p>
+<p><code class="computeroutput">bzcat</code> (or
+<code class="computeroutput">bzip2 -dc</code>) decompresses all
+specified files to the standard output.</p>
+<p><code class="computeroutput">bzip2</code> will read arguments
+from the environment variables
+<code class="computeroutput">BZIP2</code> and
+<code class="computeroutput">BZIP</code>, in that order, and will
+process them before any arguments read from the command line.
+This gives a convenient way to supply default arguments.</p>
+<p>Compression is always performed, even if the compressed
+file is slightly larger than the original. Files of less than
+about one hundred bytes tend to get larger, since the compression
+mechanism has a constant overhead in the region of 50 bytes.
+Random data (including the output of most file compressors) is
+coded at about 8.05 bits per byte, giving an expansion of around
+0.5%.</p>
+<p>As a self-check for your protection,
+<code class="computeroutput">bzip2</code> uses 32-bit CRCs to make
+sure that the decompressed version of a file is identical to the
+original. This guards against corruption of the compressed data,
+and against undetected bugs in
+<code class="computeroutput">bzip2</code> (hopefully very unlikely).
+The chances of data corruption going undetected is microscopic,
+about one chance in four billion for each file processed. Be
+aware, though, that the check occurs upon decompression, so it
+can only tell you that something is wrong. It can't help you
+recover the original uncompressed data. You can use
+<code class="computeroutput">bzip2recover</code> to try to recover
+data from damaged files.</p>
+<p>Return values: 0 for a normal exit, 1 for environmental
+problems (file not found, invalid flags, I/O errors, etc.), 2
+to indicate a corrupt compressed file, 3 for an internal
+consistency error (eg, bug) which caused
+<code class="computeroutput">bzip2</code> to panic.</p>
+</div>
+<div class="sect1" lang="en">
+<div class="titlepage"><div><div><h2 class="title" style="clear: both">
+<a name="options"></a>2.4. OPTIONS</h2></div></div></div>
+<div class="variablelist"><dl>
+<dt><span class="term"><code class="computeroutput">-c --stdout</code></span></dt>
+<dd><p>Compress or decompress to standard
+ output.</p></dd>
+<dt><span class="term"><code class="computeroutput">-d --decompress</code></span></dt>
+<dd><p>Force decompression.
+ <code class="computeroutput">bzip2</code>,
+ <code class="computeroutput">bunzip2</code> and
+ <code class="computeroutput">bzcat</code> are really the same
+ program, and the decision about what actions to take is done on
+ the basis of which name is used. This flag overrides that
+ mechanism, and forces bzip2 to decompress.</p></dd>
+<dt><span class="term"><code class="computeroutput">-z --compress</code></span></dt>
+<dd><p>The complement to
+ <code class="computeroutput">-d</code>: forces compression,
+ regardless of the invokation name.</p></dd>
+<dt><span class="term"><code class="computeroutput">-t --test</code></span></dt>
+<dd><p>Check integrity of the specified file(s), but
+ don't decompress them. This really performs a trial
+ decompression and throws away the result.</p></dd>
+<dt><span class="term"><code class="computeroutput">-f --force</code></span></dt>
+<dd>
+<p>Force overwrite of output files. Normally,
+ <code class="computeroutput">bzip2</code> will not overwrite
+ existing output files. Also forces
+ <code class="computeroutput">bzip2</code> to break hard links to
+ files, which it otherwise wouldn't do.</p>
+<p><code class="computeroutput">bzip2</code> normally declines
+ to decompress files which don't have the correct magic header
+ bytes. If forced (<code class="computeroutput">-f</code>),
+ however, it will pass such files through unmodified. This is
+ how GNU <code class="computeroutput">gzip</code> behaves.</p>
+</dd>
+<dt><span class="term"><code class="computeroutput">-k --keep</code></span></dt>
+<dd><p>Keep (don't delete) input files during
+ compression or decompression.</p></dd>
+<dt><span class="term"><code class="computeroutput">-s --small</code></span></dt>
+<dd>
+<p>Reduce memory usage, for compression,
+ decompression and testing. Files are decompressed and tested
+ using a modified algorithm which only requires 2.5 bytes per
+ block byte. This means any file can be decompressed in 2300k
+ of memory, albeit at about half the normal speed.</p>
+<p>During compression, <code class="computeroutput">-s</code>
+ selects a block size of 200k, which limits memory use to around
+ the same figure, at the expense of your compression ratio. In
+ short, if your machine is low on memory (8 megabytes or less),
+ use <code class="computeroutput">-s</code> for everything. See
+ <a href="#memory-management">MEMORY MANAGEMENT</a> below.</p>
+</dd>
+<dt><span class="term"><code class="computeroutput">-q --quiet</code></span></dt>
+<dd><p>Suppress non-essential warning messages.
+ Messages pertaining to I/O errors and other critical events
+ will not be suppressed.</p></dd>
+<dt><span class="term"><code class="computeroutput">-v --verbose</code></span></dt>
+<dd><p>Verbose mode -- show the compression ratio for
+ each file processed. Further
+ <code class="computeroutput">-v</code>'s increase the verbosity
+ level, spewing out lots of information which is primarily of
+ interest for diagnostic purposes.</p></dd>
+<dt><span class="term"><code class="computeroutput">-L --license -V --version</code></span></dt>
+<dd><p>Display the software version, license terms and
+ conditions.</p></dd>
+<dt><span class="term"><code class="computeroutput">-1</code> (or
+ <code class="computeroutput">--fast</code>) to
+ <code class="computeroutput">-9</code> (or
+ <code class="computeroutput">-best</code>)</span></dt>
+<dd><p>Set the block size to 100 k, 200 k ... 900 k
+ when compressing. Has no effect when decompressing. See <a href="#memory-management">MEMORY MANAGEMENT</a> below. The
+ <code class="computeroutput">--fast</code> and
+ <code class="computeroutput">--best</code> aliases are primarily
+ for GNU <code class="computeroutput">gzip</code> compatibility.
+ In particular, <code class="computeroutput">--fast</code> doesn't
+ make things significantly faster. And
+ <code class="computeroutput">--best</code> merely selects the
+ default behaviour.</p></dd>
+<dt><span class="term"><code class="computeroutput">--</code></span></dt>
+<dd><p>Treats all subsequent arguments as file names,
+ even if they start with a dash. This is so you can handle
+ files with names beginning with a dash, for example:
+ <code class="computeroutput">bzip2 --
+ -myfilename</code>.</p></dd>
+<dt>
+<span class="term"><code class="computeroutput">--repetitive-fast</code>, </span><span class="term"><code class="computeroutput">--repetitive-best</code></span>
+</dt>
+<dd><p>These flags are redundant in versions 0.9.5 and
+ above. They provided some coarse control over the behaviour of
+ the sorting algorithm in earlier versions, which was sometimes
+ useful. 0.9.5 and above have an improved algorithm which
+ renders these flags irrelevant.</p></dd>
+</dl></div>
+</div>
+<div class="sect1" lang="en">
+<div class="titlepage"><div><div><h2 class="title" style="clear: both">
+<a name="memory-management"></a>2.5. MEMORY MANAGEMENT</h2></div></div></div>
+<p><code class="computeroutput">bzip2</code> compresses large
+files in blocks. The block size affects both the compression
+ratio achieved, and the amount of memory needed for compression
+and decompression. The flags <code class="computeroutput">-1</code>
+through <code class="computeroutput">-9</code> specify the block
+size to be 100,000 bytes through 900,000 bytes (the default)
+respectively. At decompression time, the block size used for
+compression is read from the header of the compressed file, and
+<code class="computeroutput">bunzip2</code> then allocates itself
+just enough memory to decompress the file. Since block sizes are
+stored in compressed files, it follows that the flags
+<code class="computeroutput">-1</code> to
+<code class="computeroutput">-9</code> are irrelevant to and so
+ignored during decompression.</p>
+<p>Compression and decompression requirements, in bytes, can be
+estimated as:</p>
+<pre class="programlisting">Compression: 400k + ( 8 x block size )
+
+Decompression: 100k + ( 4 x block size ), or
+ 100k + ( 2.5 x block size )</pre>
+<p>Larger block sizes give rapidly diminishing marginal
+returns. Most of the compression comes from the first two or
+three hundred k of block size, a fact worth bearing in mind when
+using <code class="computeroutput">bzip2</code> on small machines.
+It is also important to appreciate that the decompression memory
+requirement is set at compression time by the choice of block
+size.</p>
+<p>For files compressed with the default 900k block size,
+<code class="computeroutput">bunzip2</code> will require about 3700
+kbytes to decompress. To support decompression of any file on a
+4 megabyte machine, <code class="computeroutput">bunzip2</code> has
+an option to decompress using approximately half this amount of
+memory, about 2300 kbytes. Decompression speed is also halved,
+so you should use this option only where necessary. The relevant
+flag is <code class="computeroutput">-s</code>.</p>
+<p>In general, try and use the largest block size memory
+constraints allow, since that maximises the compression achieved.
+Compression and decompression speed are virtually unaffected by
+block size.</p>
+<p>Another significant point applies to files which fit in a
+single block -- that means most files you'd encounter using a
+large block size. The amount of real memory touched is
+proportional to the size of the file, since the file is smaller
+than a block. For example, compressing a file 20,000 bytes long
+with the flag <code class="computeroutput">-9</code> will cause the
+compressor to allocate around 7600k of memory, but only touch
+400k + 20000 * 8 = 560 kbytes of it. Similarly, the decompressor
+will allocate 3700k but only touch 100k + 20000 * 4 = 180
+kbytes.</p>
+<p>Here is a table which summarises the maximum memory usage
+for different block sizes. Also recorded is the total compressed
+size for 14 files of the Calgary Text Compression Corpus
+totalling 3,141,622 bytes. This column gives some feel for how
+compression varies with block size. These figures tend to
+understate the advantage of larger block sizes for larger files,
+since the Corpus is dominated by smaller files.</p>
+<pre class="programlisting"> Compress Decompress Decompress Corpus
+Flag usage usage -s usage Size
+
+ -1 1200k 500k 350k 914704
+ -2 2000k 900k 600k 877703
+ -3 2800k 1300k 850k 860338
+ -4 3600k 1700k 1100k 846899
+ -5 4400k 2100k 1350k 845160
+ -6 5200k 2500k 1600k 838626
+ -7 6100k 2900k 1850k 834096
+ -8 6800k 3300k 2100k 828642
+ -9 7600k 3700k 2350k 828642</pre>
+</div>
+<div class="sect1" lang="en">
+<div class="titlepage"><div><div><h2 class="title" style="clear: both">
+<a name="recovering"></a>2.6. RECOVERING DATA FROM DAMAGED FILES</h2></div></div></div>
+<p><code class="computeroutput">bzip2</code> compresses files in
+blocks, usually 900kbytes long. Each block is handled
+independently. If a media or transmission error causes a
+multi-block <code class="computeroutput">.bz2</code> file to become
+damaged, it may be possible to recover data from the undamaged
+blocks in the file.</p>
+<p>The compressed representation of each block is delimited by
+a 48-bit pattern, which makes it possible to find the block
+boundaries with reasonable certainty. Each block also carries
+its own 32-bit CRC, so damaged blocks can be distinguished from
+undamaged ones.</p>
+<p><code class="computeroutput">bzip2recover</code> is a simple
+program whose purpose is to search for blocks in
+<code class="computeroutput">.bz2</code> files, and write each block
+out into its own <code class="computeroutput">.bz2</code> file. You
+can then use <code class="computeroutput">bzip2 -t</code> to test
+the integrity of the resulting files, and decompress those which
+are undamaged.</p>
+<p><code class="computeroutput">bzip2recover</code> takes a
+single argument, the name of the damaged file, and writes a
+number of files <code class="computeroutput">rec0001file.bz2</code>,
+<code class="computeroutput">rec0002file.bz2</code>, etc, containing
+the extracted blocks. The output filenames are designed so that
+the use of wildcards in subsequent processing -- for example,
+<code class="computeroutput">bzip2 -dc rec*file.bz2 &gt;
+recovered_data</code> -- lists the files in the correct
+order.</p>
+<p><code class="computeroutput">bzip2recover</code> should be of
+most use dealing with large <code class="computeroutput">.bz2</code>
+files, as these will contain many blocks. It is clearly futile
+to use it on damaged single-block files, since a damaged block
+cannot be recovered. If you wish to minimise any potential data
+loss through media or transmission errors, you might consider
+compressing with a smaller block size.</p>
+</div>
+<div class="sect1" lang="en">
+<div class="titlepage"><div><div><h2 class="title" style="clear: both">
+<a name="performance"></a>2.7. PERFORMANCE NOTES</h2></div></div></div>
+<p>The sorting phase of compression gathers together similar
+strings in the file. Because of this, files containing very long
+runs of repeated symbols, like "aabaabaabaab ..." (repeated
+several hundred times) may compress more slowly than normal.
+Versions 0.9.5 and above fare much better than previous versions
+in this respect. The ratio between worst-case and average-case
+compression time is in the region of 10:1. For previous
+versions, this figure was more like 100:1. You can use the
+<code class="computeroutput">-vvvv</code> option to monitor progress
+in great detail, if you want.</p>
+<p>Decompression speed is unaffected by these
+phenomena.</p>
+<p><code class="computeroutput">bzip2</code> usually allocates
+several megabytes of memory to operate in, and then charges all
+over it in a fairly random fashion. This means that performance,
+both for compressing and decompressing, is largely determined by
+the speed at which your machine can service cache misses.
+Because of this, small changes to the code to reduce the miss
+rate have been observed to give disproportionately large
+performance improvements. I imagine
+<code class="computeroutput">bzip2</code> will perform best on
+machines with very large caches.</p>
+</div>
+<div class="sect1" lang="en">
+<div class="titlepage"><div><div><h2 class="title" style="clear: both">
+<a name="caveats"></a>2.8. CAVEATS</h2></div></div></div>
+<p>I/O error messages are not as helpful as they could be.
+<code class="computeroutput">bzip2</code> tries hard to detect I/O
+errors and exit cleanly, but the details of what the problem is
+sometimes seem rather misleading.</p>
+<p>This manual page pertains to version 1.0.5 of
+<code class="computeroutput">bzip2</code>. Compressed data created by
+this version is entirely forwards and backwards compatible with the
+previous public releases, versions 0.1pl2, 0.9.0 and 0.9.5, 1.0.0,
+1.0.1, 1.0.2 and 1.0.3, but with the following exception: 0.9.0 and
+above can correctly decompress multiple concatenated compressed files.
+0.1pl2 cannot do this; it will stop after decompressing just the first
+file in the stream.</p>
+<p><code class="computeroutput">bzip2recover</code> versions
+prior to 1.0.2 used 32-bit integers to represent bit positions in
+compressed files, so it could not handle compressed files more
+than 512 megabytes long. Versions 1.0.2 and above use 64-bit ints
+on some platforms which support them (GNU supported targets, and
+Windows). To establish whether or not
+<code class="computeroutput">bzip2recover</code> was built with such
+a limitation, run it without arguments. In any event you can
+build yourself an unlimited version if you can recompile it with
+<code class="computeroutput">MaybeUInt64</code> set to be an
+unsigned 64-bit integer.</p>
+</div>
+<div class="sect1" lang="en">
+<div class="titlepage"><div><div><h2 class="title" style="clear: both">
+<a name="author"></a>2.9. AUTHOR</h2></div></div></div>
+<p>Julian Seward,
+<code class="computeroutput">jseward@bzip.org</code></p>
+<p>The ideas embodied in
+<code class="computeroutput">bzip2</code> are due to (at least) the
+following people: Michael Burrows and David Wheeler (for the
+block sorting transformation), David Wheeler (again, for the
+Huffman coder), Peter Fenwick (for the structured coding model in
+the original <code class="computeroutput">bzip</code>, and many
+refinements), and Alistair Moffat, Radford Neal and Ian Witten
+(for the arithmetic coder in the original
+<code class="computeroutput">bzip</code>). I am much indebted for
+their help, support and advice. See the manual in the source
+distribution for pointers to sources of documentation. Christian
+von Roques encouraged me to look for faster sorting algorithms,
+so as to speed up compression. Bela Lubkin encouraged me to
+improve the worst-case compression performance.
+Donna Robinson XMLised the documentation.
+Many people sent
+patches, helped with portability problems, lent machines, gave
+advice and were generally helpful.</p>
+</div>
+</div>
+<div class="chapter" lang="en">
+<div class="titlepage"><div><div><h2 class="title">
+<a name="libprog"></a>3. 
+Programming with <code class="computeroutput">libbzip2</code>
+</h2></div></div></div>
+<div class="toc">
+<p><b>Table of Contents</b></p>
+<dl>
+<dt><span class="sect1"><a href="#top-level">3.1. Top-level structure</a></span></dt>
+<dd><dl>
+<dt><span class="sect2"><a href="#ll-summary">3.1.1. Low-level summary</a></span></dt>
+<dt><span class="sect2"><a href="#hl-summary">3.1.2. High-level summary</a></span></dt>
+<dt><span class="sect2"><a href="#util-fns-summary">3.1.3. Utility functions summary</a></span></dt>
+</dl></dd>
+<dt><span class="sect1"><a href="#err-handling">3.2. Error handling</a></span></dt>
+<dt><span class="sect1"><a href="#low-level">3.3. Low-level interface</a></span></dt>
+<dd><dl>
+<dt><span class="sect2"><a href="#bzcompress-init">3.3.1. <code class="computeroutput">BZ2_bzCompressInit</code></a></span></dt>
+<dt><span class="sect2"><a href="#bzCompress">3.3.2. <code class="computeroutput">BZ2_bzCompress</code></a></span></dt>
+<dt><span class="sect2"><a href="#bzCompress-end">3.3.3. <code class="computeroutput">BZ2_bzCompressEnd</code></a></span></dt>
+<dt><span class="sect2"><a href="#bzDecompress-init">3.3.4. <code class="computeroutput">BZ2_bzDecompressInit</code></a></span></dt>
+<dt><span class="sect2"><a href="#bzDecompress">3.3.5. <code class="computeroutput">BZ2_bzDecompress</code></a></span></dt>
+<dt><span class="sect2"><a href="#bzDecompress-end">3.3.6. <code class="computeroutput">BZ2_bzDecompressEnd</code></a></span></dt>
+</dl></dd>
+<dt><span class="sect1"><a href="#hl-interface">3.4. High-level interface</a></span></dt>
+<dd><dl>
+<dt><span class="sect2"><a href="#bzreadopen">3.4.1. <code class="computeroutput">BZ2_bzReadOpen</code></a></span></dt>
+<dt><span class="sect2"><a href="#bzread">3.4.2. <code class="computeroutput">BZ2_bzRead</code></a></span></dt>
+<dt><span class="sect2"><a href="#bzreadgetunused">3.4.3. <code class="computeroutput">BZ2_bzReadGetUnused</code></a></span></dt>
+<dt><span class="sect2"><a href="#bzreadclose">3.4.4. <code class="computeroutput">BZ2_bzReadClose</code></a></span></dt>
+<dt><span class="sect2"><a href="#bzwriteopen">3.4.5. <code class="computeroutput">BZ2_bzWriteOpen</code></a></span></dt>
+<dt><span class="sect2"><a href="#bzwrite">3.4.6. <code class="computeroutput">BZ2_bzWrite</code></a></span></dt>
+<dt><span class="sect2"><a href="#bzwriteclose">3.4.7. <code class="computeroutput">BZ2_bzWriteClose</code></a></span></dt>
+<dt><span class="sect2"><a href="#embed">3.4.8. Handling embedded compressed data streams</a></span></dt>
+<dt><span class="sect2"><a href="#std-rdwr">3.4.9. Standard file-reading/writing code</a></span></dt>
+</dl></dd>
+<dt><span class="sect1"><a href="#util-fns">3.5. Utility functions</a></span></dt>
+<dd><dl>
+<dt><span class="sect2"><a href="#bzbufftobuffcompress">3.5.1. <code class="computeroutput">BZ2_bzBuffToBuffCompress</code></a></span></dt>
+<dt><span class="sect2"><a href="#bzbufftobuffdecompress">3.5.2. <code class="computeroutput">BZ2_bzBuffToBuffDecompress</code></a></span></dt>
+</dl></dd>
+<dt><span class="sect1"><a href="#zlib-compat">3.6. <code class="computeroutput">zlib</code> compatibility functions</a></span></dt>
+<dt><span class="sect1"><a href="#stdio-free">3.7. Using the library in a <code class="computeroutput">stdio</code>-free environment</a></span></dt>
+<dd><dl>
+<dt><span class="sect2"><a href="#stdio-bye">3.7.1. Getting rid of <code class="computeroutput">stdio</code></a></span></dt>
+<dt><span class="sect2"><a href="#critical-error">3.7.2. Critical error handling</a></span></dt>
+</dl></dd>
+<dt><span class="sect1"><a href="#win-dll">3.8. Making a Windows DLL</a></span></dt>
+</dl>
+</div>
+<p>This chapter describes the programming interface to
+<code class="computeroutput">libbzip2</code>.</p>
+<p>For general background information, particularly about
+memory use and performance aspects, you'd be well advised to read
+<a href="#using">How to use bzip2</a> as well.</p>
+<div class="sect1" lang="en">
+<div class="titlepage"><div><div><h2 class="title" style="clear: both">
+<a name="top-level"></a>3.1. Top-level structure</h2></div></div></div>
+<p><code class="computeroutput">libbzip2</code> is a flexible
+library for compressing and decompressing data in the
+<code class="computeroutput">bzip2</code> data format. Although
+packaged as a single entity, it helps to regard the library as
+three separate parts: the low level interface, and the high level
+interface, and some utility functions.</p>
+<p>The structure of
+<code class="computeroutput">libbzip2</code>'s interfaces is similar
+to that of Jean-loup Gailly's and Mark Adler's excellent
+<code class="computeroutput">zlib</code> library.</p>
+<p>All externally visible symbols have names beginning
+<code class="computeroutput">BZ2_</code>. This is new in version
+1.0. The intention is to minimise pollution of the namespaces of
+library clients.</p>
+<p>To use any part of the library, you need to
+<code class="computeroutput">#include &lt;bzlib.h&gt;</code>
+into your sources.</p>
+<div class="sect2" lang="en">
+<div class="titlepage"><div><div><h3 class="title">
+<a name="ll-summary"></a>3.1.1. Low-level summary</h3></div></div></div>
+<p>This interface provides services for compressing and
+decompressing data in memory. There's no provision for dealing
+with files, streams or any other I/O mechanisms, just straight
+memory-to-memory work. In fact, this part of the library can be
+compiled without inclusion of
+<code class="computeroutput">stdio.h</code>, which may be helpful
+for embedded applications.</p>
+<p>The low-level part of the library has no global variables
+and is therefore thread-safe.</p>
+<p>Six routines make up the low level interface:
+<code class="computeroutput">BZ2_bzCompressInit</code>,
+<code class="computeroutput">BZ2_bzCompress</code>, and
+<code class="computeroutput">BZ2_bzCompressEnd</code> for
+compression, and a corresponding trio
+<code class="computeroutput">BZ2_bzDecompressInit</code>,
+<code class="computeroutput">BZ2_bzDecompress</code> and
+<code class="computeroutput">BZ2_bzDecompressEnd</code> for
+decompression. The <code class="computeroutput">*Init</code>
+functions allocate memory for compression/decompression and do
+other initialisations, whilst the
+<code class="computeroutput">*End</code> functions close down
+operations and release memory.</p>
+<p>The real work is done by
+<code class="computeroutput">BZ2_bzCompress</code> and
+<code class="computeroutput">BZ2_bzDecompress</code>. These
+compress and decompress data from a user-supplied input buffer to
+a user-supplied output buffer. These buffers can be any size;
+arbitrary quantities of data are handled by making repeated calls
+to these functions. This is a flexible mechanism allowing a
+consumer-pull style of activity, or producer-push, or a mixture
+of both.</p>
+</div>
+<div class="sect2" lang="en">
+<div class="titlepage"><div><div><h3 class="title">
+<a name="hl-summary"></a>3.1.2. High-level summary</h3></div></div></div>
+<p>This interface provides some handy wrappers around the
+low-level interface to facilitate reading and writing
+<code class="computeroutput">bzip2</code> format files
+(<code class="computeroutput">.bz2</code> files). The routines
+provide hooks to facilitate reading files in which the
+<code class="computeroutput">bzip2</code> data stream is embedded
+within some larger-scale file structure, or where there are
+multiple <code class="computeroutput">bzip2</code> data streams
+concatenated end-to-end.</p>
+<p>For reading files,
+<code class="computeroutput">BZ2_bzReadOpen</code>,
+<code class="computeroutput">BZ2_bzRead</code>,
+<code class="computeroutput">BZ2_bzReadClose</code> and
+<code class="computeroutput">BZ2_bzReadGetUnused</code> are
+supplied. For writing files,
+<code class="computeroutput">BZ2_bzWriteOpen</code>,
+<code class="computeroutput">BZ2_bzWrite</code> and
+<code class="computeroutput">BZ2_bzWriteFinish</code> are
+available.</p>
+<p>As with the low-level library, no global variables are used
+so the library is per se thread-safe. However, if I/O errors
+occur whilst reading or writing the underlying compressed files,
+you may have to consult <code class="computeroutput">errno</code> to
+determine the cause of the error. In that case, you'd need a C
+library which correctly supports
+<code class="computeroutput">errno</code> in a multithreaded
+environment.</p>
+<p>To make the library a little simpler and more portable,
+<code class="computeroutput">BZ2_bzReadOpen</code> and
+<code class="computeroutput">BZ2_bzWriteOpen</code> require you to
+pass them file handles (<code class="computeroutput">FILE*</code>s)
+which have previously been opened for reading or writing
+respectively. That avoids portability problems associated with
+file operations and file attributes, whilst not being much of an
+imposition on the programmer.</p>
+</div>
+<div class="sect2" lang="en">
+<div class="titlepage"><div><div><h3 class="title">
+<a name="util-fns-summary"></a>3.1.3. Utility functions summary</h3></div></div></div>
+<p>For very simple needs,
+<code class="computeroutput">BZ2_bzBuffToBuffCompress</code> and
+<code class="computeroutput">BZ2_bzBuffToBuffDecompress</code> are
+provided. These compress data in memory from one buffer to
+another buffer in a single function call. You should assess
+whether these functions fulfill your memory-to-memory
+compression/decompression requirements before investing effort in
+understanding the more general but more complex low-level
+interface.</p>
+<p>Yoshioka Tsuneo
+(<code class="computeroutput">tsuneo@rr.iij4u.or.jp</code>) has
+contributed some functions to give better
+<code class="computeroutput">zlib</code> compatibility. These
+functions are <code class="computeroutput">BZ2_bzopen</code>,
+<code class="computeroutput">BZ2_bzread</code>,
+<code class="computeroutput">BZ2_bzwrite</code>,
+<code class="computeroutput">BZ2_bzflush</code>,
+<code class="computeroutput">BZ2_bzclose</code>,
+<code class="computeroutput">BZ2_bzerror</code> and
+<code class="computeroutput">BZ2_bzlibVersion</code>. You may find
+these functions more convenient for simple file reading and
+writing, than those in the high-level interface. These functions
+are not (yet) officially part of the library, and are minimally
+documented here. If they break, you get to keep all the pieces.
+I hope to document them properly when time permits.</p>
+<p>Yoshioka also contributed modifications to allow the
+library to be built as a Windows DLL.</p>
+</div>
+</div>
+<div class="sect1" lang="en">
+<div class="titlepage"><div><div><h2 class="title" style="clear: both">
+<a name="err-handling"></a>3.2. Error handling</h2></div></div></div>
+<p>The library is designed to recover cleanly in all
+situations, including the worst-case situation of decompressing
+random data. I'm not 100% sure that it can always do this, so
+you might want to add a signal handler to catch segmentation
+violations during decompression if you are feeling especially
+paranoid. I would be interested in hearing more about the
+robustness of the library to corrupted compressed data.</p>
+<p>Version 1.0.3 more robust in this respect than any
+previous version. Investigations with Valgrind (a tool for detecting
+problems with memory management) indicate
+that, at least for the few files I tested, all single-bit errors
+in the decompressed data are caught properly, with no
+segmentation faults, no uses of uninitialised data, no out of
+range reads or writes, and no infinite looping in the decompressor.
+So it's certainly pretty robust, although
+I wouldn't claim it to be totally bombproof.</p>
+<p>The file <code class="computeroutput">bzlib.h</code> contains
+all definitions needed to use the library. In particular, you
+should definitely not include
+<code class="computeroutput">bzlib_private.h</code>.</p>
+<p>In <code class="computeroutput">bzlib.h</code>, the various
+return values are defined. The following list is not intended as
+an exhaustive description of the circumstances in which a given
+value may be returned -- those descriptions are given later.
+Rather, it is intended to convey the rough meaning of each return
+value. The first five actions are normal and not intended to
+denote an error situation.</p>
+<div class="variablelist"><dl>
+<dt><span class="term"><code class="computeroutput">BZ_OK</code></span></dt>
+<dd><p>The requested action was completed
+ successfully.</p></dd>
+<dt><span class="term"><code class="computeroutput">BZ_RUN_OK, BZ_FLUSH_OK,
+ BZ_FINISH_OK</code></span></dt>
+<dd><p>In
+ <code class="computeroutput">BZ2_bzCompress</code>, the requested
+ flush/finish/nothing-special action was completed
+ successfully.</p></dd>
+<dt><span class="term"><code class="computeroutput">BZ_STREAM_END</code></span></dt>
+<dd><p>Compression of data was completed, or the
+ logical stream end was detected during
+ decompression.</p></dd>
+</dl></div>
+<p>The following return values indicate an error of some
+kind.</p>
+<div class="variablelist"><dl>
+<dt><span class="term"><code class="computeroutput">BZ_CONFIG_ERROR</code></span></dt>
+<dd><p>Indicates that the library has been improperly
+ compiled on your platform -- a major configuration error.
+ Specifically, it means that
+ <code class="computeroutput">sizeof(char)</code>,
+ <code class="computeroutput">sizeof(short)</code> and
+ <code class="computeroutput">sizeof(int)</code> are not 1, 2 and
+ 4 respectively, as they should be. Note that the library
+ should still work properly on 64-bit platforms which follow
+ the LP64 programming model -- that is, where
+ <code class="computeroutput">sizeof(long)</code> and
+ <code class="computeroutput">sizeof(void*)</code> are 8. Under
+ LP64, <code class="computeroutput">sizeof(int)</code> is still 4,
+ so <code class="computeroutput">libbzip2</code>, which doesn't
+ use the <code class="computeroutput">long</code> type, is
+ OK.</p></dd>
+<dt><span class="term"><code class="computeroutput">BZ_SEQUENCE_ERROR</code></span></dt>
+<dd><p>When using the library, it is important to call
+ the functions in the correct sequence and with data structures
+ (buffers etc) in the correct states.
+ <code class="computeroutput">libbzip2</code> checks as much as it
+ can to ensure this is happening, and returns
+ <code class="computeroutput">BZ_SEQUENCE_ERROR</code> if not.
+ Code which complies precisely with the function semantics, as
+ detailed below, should never receive this value; such an event
+ denotes buggy code which you should
+ investigate.</p></dd>
+<dt><span class="term"><code class="computeroutput">BZ_PARAM_ERROR</code></span></dt>
+<dd><p>Returned when a parameter to a function call is
+ out of range or otherwise manifestly incorrect. As with
+ <code class="computeroutput">BZ_SEQUENCE_ERROR</code>, this
+ denotes a bug in the client code. The distinction between
+ <code class="computeroutput">BZ_PARAM_ERROR</code> and
+ <code class="computeroutput">BZ_SEQUENCE_ERROR</code> is a bit
+ hazy, but still worth making.</p></dd>
+<dt><span class="term"><code class="computeroutput">BZ_MEM_ERROR</code></span></dt>
+<dd><p>Returned when a request to allocate memory
+ failed. Note that the quantity of memory needed to decompress
+ a stream cannot be determined until the stream's header has
+ been read. So
+ <code class="computeroutput">BZ2_bzDecompress</code> and
+ <code class="computeroutput">BZ2_bzRead</code> may return
+ <code class="computeroutput">BZ_MEM_ERROR</code> even though some
+ of the compressed data has been read. The same is not true
+ for compression; once
+ <code class="computeroutput">BZ2_bzCompressInit</code> or
+ <code class="computeroutput">BZ2_bzWriteOpen</code> have
+ successfully completed,
+ <code class="computeroutput">BZ_MEM_ERROR</code> cannot
+ occur.</p></dd>
+<dt><span class="term"><code class="computeroutput">BZ_DATA_ERROR</code></span></dt>
+<dd><p>Returned when a data integrity error is
+ detected during decompression. Most importantly, this means
+ when stored and computed CRCs for the data do not match. This
+ value is also returned upon detection of any other anomaly in
+ the compressed data.</p></dd>
+<dt><span class="term"><code class="computeroutput">BZ_DATA_ERROR_MAGIC</code></span></dt>
+<dd><p>As a special case of
+ <code class="computeroutput">BZ_DATA_ERROR</code>, it is
+ sometimes useful to know when the compressed stream does not
+ start with the correct magic bytes (<code class="computeroutput">'B' 'Z'
+ 'h'</code>).</p></dd>
+<dt><span class="term"><code class="computeroutput">BZ_IO_ERROR</code></span></dt>
+<dd><p>Returned by
+ <code class="computeroutput">BZ2_bzRead</code> and
+ <code class="computeroutput">BZ2_bzWrite</code> when there is an
+ error reading or writing in the compressed file, and by
+ <code class="computeroutput">BZ2_bzReadOpen</code> and
+ <code class="computeroutput">BZ2_bzWriteOpen</code> for attempts
+ to use a file for which the error indicator (viz,
+ <code class="computeroutput">ferror(f)</code>) is set. On
+ receipt of <code class="computeroutput">BZ_IO_ERROR</code>, the
+ caller should consult <code class="computeroutput">errno</code>
+ and/or <code class="computeroutput">perror</code> to acquire
+ operating-system specific information about the
+ problem.</p></dd>
+<dt><span class="term"><code class="computeroutput">BZ_UNEXPECTED_EOF</code></span></dt>
+<dd><p>Returned by
+ <code class="computeroutput">BZ2_bzRead</code> when the
+ compressed file finishes before the logical end of stream is
+ detected.</p></dd>
+<dt><span class="term"><code class="computeroutput">BZ_OUTBUFF_FULL</code></span></dt>
+<dd><p>Returned by
+ <code class="computeroutput">BZ2_bzBuffToBuffCompress</code> and
+ <code class="computeroutput">BZ2_bzBuffToBuffDecompress</code> to
+ indicate that the output data will not fit into the output
+ buffer provided.</p></dd>
+</dl></div>
+</div>
+<div class="sect1" lang="en">
+<div class="titlepage"><div><div><h2 class="title" style="clear: both">
+<a name="low-level"></a>3.3. Low-level interface</h2></div></div></div>
+<div class="sect2" lang="en">
+<div class="titlepage"><div><div><h3 class="title">
+<a name="bzcompress-init"></a>3.3.1. <code class="computeroutput">BZ2_bzCompressInit</code></h3></div></div></div>
+<pre class="programlisting">typedef struct {
+ char *next_in;
+ unsigned int avail_in;
+ unsigned int total_in_lo32;
+ unsigned int total_in_hi32;
+
+ char *next_out;
+ unsigned int avail_out;
+ unsigned int total_out_lo32;
+ unsigned int total_out_hi32;
+
+ void *state;
+
+ void *(*bzalloc)(void *,int,int);
+ void (*bzfree)(void *,void *);
+ void *opaque;
+} bz_stream;
+
+int BZ2_bzCompressInit ( bz_stream *strm,
+ int blockSize100k,
+ int verbosity,
+ int workFactor );</pre>
+<p>Prepares for compression. The
+<code class="computeroutput">bz_stream</code> structure holds all
+data pertaining to the compression activity. A
+<code class="computeroutput">bz_stream</code> structure should be
+allocated and initialised prior to the call. The fields of
+<code class="computeroutput">bz_stream</code> comprise the entirety
+of the user-visible data. <code class="computeroutput">state</code>
+is a pointer to the private data structures required for
+compression.</p>
+<p>Custom memory allocators are supported, via fields
+<code class="computeroutput">bzalloc</code>,
+<code class="computeroutput">bzfree</code>, and
+<code class="computeroutput">opaque</code>. The value
+<code class="computeroutput">opaque</code> is passed to as the first
+argument to all calls to <code class="computeroutput">bzalloc</code>
+and <code class="computeroutput">bzfree</code>, but is otherwise
+ignored by the library. The call <code class="computeroutput">bzalloc (
+opaque, n, m )</code> is expected to return a pointer
+<code class="computeroutput">p</code> to <code class="computeroutput">n *
+m</code> bytes of memory, and <code class="computeroutput">bzfree (
+opaque, p )</code> should free that memory.</p>
+<p>If you don't want to use a custom memory allocator, set
+<code class="computeroutput">bzalloc</code>,
+<code class="computeroutput">bzfree</code> and
+<code class="computeroutput">opaque</code> to
+<code class="computeroutput">NULL</code>, and the library will then
+use the standard <code class="computeroutput">malloc</code> /
+<code class="computeroutput">free</code> routines.</p>
+<p>Before calling
+<code class="computeroutput">BZ2_bzCompressInit</code>, fields
+<code class="computeroutput">bzalloc</code>,
+<code class="computeroutput">bzfree</code> and
+<code class="computeroutput">opaque</code> should be filled
+appropriately, as just described. Upon return, the internal
+state will have been allocated and initialised, and
+<code class="computeroutput">total_in_lo32</code>,
+<code class="computeroutput">total_in_hi32</code>,
+<code class="computeroutput">total_out_lo32</code> and
+<code class="computeroutput">total_out_hi32</code> will have been
+set to zero. These four fields are used by the library to inform
+the caller of the total amount of data passed into and out of the
+library, respectively. You should not try to change them. As of
+version 1.0, 64-bit counts are maintained, even on 32-bit
+platforms, using the <code class="computeroutput">_hi32</code>
+fields to store the upper 32 bits of the count. So, for example,
+the total amount of data in is <code class="computeroutput">(total_in_hi32
+&lt;&lt; 32) + total_in_lo32</code>.</p>
+<p>Parameter <code class="computeroutput">blockSize100k</code>
+specifies the block size to be used for compression. It should
+be a value between 1 and 9 inclusive, and the actual block size
+used is 100000 x this figure. 9 gives the best compression but
+takes most memory.</p>
+<p>Parameter <code class="computeroutput">verbosity</code> should
+be set to a number between 0 and 4 inclusive. 0 is silent, and
+greater numbers give increasingly verbose monitoring/debugging
+output. If the library has been compiled with
+<code class="computeroutput">-DBZ_NO_STDIO</code>, no such output
+will appear for any verbosity setting.</p>
+<p>Parameter <code class="computeroutput">workFactor</code>
+controls how the compression phase behaves when presented with
+worst case, highly repetitive, input data. If compression runs
+into difficulties caused by repetitive data, the library switches
+from the standard sorting algorithm to a fallback algorithm. The
+fallback is slower than the standard algorithm by perhaps a
+factor of three, but always behaves reasonably, no matter how bad
+the input.</p>
+<p>Lower values of <code class="computeroutput">workFactor</code>
+reduce the amount of effort the standard algorithm will expend
+before resorting to the fallback. You should set this parameter
+carefully; too low, and many inputs will be handled by the
+fallback algorithm and so compress rather slowly, too high, and
+your average-to-worst case compression times can become very
+large. The default value of 30 gives reasonable behaviour over a
+wide range of circumstances.</p>
+<p>Allowable values range from 0 to 250 inclusive. 0 is a
+special case, equivalent to using the default value of 30.</p>
+<p>Note that the compressed output generated is the same
+regardless of whether or not the fallback algorithm is
+used.</p>
+<p>Be aware also that this parameter may disappear entirely in
+future versions of the library. In principle it should be
+possible to devise a good way to automatically choose which
+algorithm to use. Such a mechanism would render the parameter
+obsolete.</p>
+<p>Possible return values:</p>
+<pre class="programlisting">BZ_CONFIG_ERROR
+ if the library has been mis-compiled
+BZ_PARAM_ERROR
+ if strm is NULL
+ or blockSize &lt; 1 or blockSize &gt; 9
+ or verbosity &lt; 0 or verbosity &gt; 4
+ or workFactor &lt; 0 or workFactor &gt; 250
+BZ_MEM_ERROR
+ if not enough memory is available
+BZ_OK
+ otherwise</pre>
+<p>Allowable next actions:</p>
+<pre class="programlisting">BZ2_bzCompress
+ if BZ_OK is returned
+ no specific action needed in case of error</pre>
+</div>
+<div class="sect2" lang="en">
+<div class="titlepage"><div><div><h3 class="title">
+<a name="bzCompress"></a>3.3.2. <code class="computeroutput">BZ2_bzCompress</code></h3></div></div></div>
+<pre class="programlisting">int BZ2_bzCompress ( bz_stream *strm, int action );</pre>
+<p>Provides more input and/or output buffer space for the
+library. The caller maintains input and output buffers, and
+calls <code class="computeroutput">BZ2_bzCompress</code> to transfer
+data between them.</p>
+<p>Before each call to
+<code class="computeroutput">BZ2_bzCompress</code>,
+<code class="computeroutput">next_in</code> should point at the data
+to be compressed, and <code class="computeroutput">avail_in</code>
+should indicate how many bytes the library may read.
+<code class="computeroutput">BZ2_bzCompress</code> updates
+<code class="computeroutput">next_in</code>,
+<code class="computeroutput">avail_in</code> and
+<code class="computeroutput">total_in</code> to reflect the number
+of bytes it has read.</p>
+<p>Similarly, <code class="computeroutput">next_out</code> should
+point to a buffer in which the compressed data is to be placed,
+with <code class="computeroutput">avail_out</code> indicating how
+much output space is available.
+<code class="computeroutput">BZ2_bzCompress</code> updates
+<code class="computeroutput">next_out</code>,
+<code class="computeroutput">avail_out</code> and
+<code class="computeroutput">total_out</code> to reflect the number
+of bytes output.</p>
+<p>You may provide and remove as little or as much data as you
+like on each call of
+<code class="computeroutput">BZ2_bzCompress</code>. In the limit,
+it is acceptable to supply and remove data one byte at a time,
+although this would be terribly inefficient. You should always
+ensure that at least one byte of output space is available at
+each call.</p>
+<p>A second purpose of
+<code class="computeroutput">BZ2_bzCompress</code> is to request a
+change of mode of the compressed stream.</p>
+<p>Conceptually, a compressed stream can be in one of four
+states: IDLE, RUNNING, FLUSHING and FINISHING. Before
+initialisation
+(<code class="computeroutput">BZ2_bzCompressInit</code>) and after
+termination (<code class="computeroutput">BZ2_bzCompressEnd</code>),
+a stream is regarded as IDLE.</p>
+<p>Upon initialisation
+(<code class="computeroutput">BZ2_bzCompressInit</code>), the stream
+is placed in the RUNNING state. Subsequent calls to
+<code class="computeroutput">BZ2_bzCompress</code> should pass
+<code class="computeroutput">BZ_RUN</code> as the requested action;
+other actions are illegal and will result in
+<code class="computeroutput">BZ_SEQUENCE_ERROR</code>.</p>
+<p>At some point, the calling program will have provided all
+the input data it wants to. It will then want to finish up -- in
+effect, asking the library to process any data it might have
+buffered internally. In this state,
+<code class="computeroutput">BZ2_bzCompress</code> will no longer
+attempt to read data from
+<code class="computeroutput">next_in</code>, but it will want to
+write data to <code class="computeroutput">next_out</code>. Because
+the output buffer supplied by the user can be arbitrarily small,
+the finishing-up operation cannot necessarily be done with a
+single call of
+<code class="computeroutput">BZ2_bzCompress</code>.</p>
+<p>Instead, the calling program passes
+<code class="computeroutput">BZ_FINISH</code> as an action to
+<code class="computeroutput">BZ2_bzCompress</code>. This changes
+the stream's state to FINISHING. Any remaining input (ie,
+<code class="computeroutput">next_in[0 .. avail_in-1]</code>) is
+compressed and transferred to the output buffer. To do this,
+<code class="computeroutput">BZ2_bzCompress</code> must be called
+repeatedly until all the output has been consumed. At that
+point, <code class="computeroutput">BZ2_bzCompress</code> returns
+<code class="computeroutput">BZ_STREAM_END</code>, and the stream's
+state is set back to IDLE.
+<code class="computeroutput">BZ2_bzCompressEnd</code> should then be
+called.</p>
+<p>Just to make sure the calling program does not cheat, the
+library makes a note of <code class="computeroutput">avail_in</code>
+at the time of the first call to
+<code class="computeroutput">BZ2_bzCompress</code> which has
+<code class="computeroutput">BZ_FINISH</code> as an action (ie, at
+the time the program has announced its intention to not supply
+any more input). By comparing this value with that of
+<code class="computeroutput">avail_in</code> over subsequent calls
+to <code class="computeroutput">BZ2_bzCompress</code>, the library
+can detect any attempts to slip in more data to compress. Any
+calls for which this is detected will return
+<code class="computeroutput">BZ_SEQUENCE_ERROR</code>. This
+indicates a programming mistake which should be corrected.</p>
+<p>Instead of asking to finish, the calling program may ask
+<code class="computeroutput">BZ2_bzCompress</code> to take all the
+remaining input, compress it and terminate the current
+(Burrows-Wheeler) compression block. This could be useful for
+error control purposes. The mechanism is analogous to that for
+finishing: call <code class="computeroutput">BZ2_bzCompress</code>
+with an action of <code class="computeroutput">BZ_FLUSH</code>,
+remove output data, and persist with the
+<code class="computeroutput">BZ_FLUSH</code> action until the value
+<code class="computeroutput">BZ_RUN</code> is returned. As with
+finishing, <code class="computeroutput">BZ2_bzCompress</code>
+detects any attempt to provide more input data once the flush has
+begun.</p>
+<p>Once the flush is complete, the stream returns to the
+normal RUNNING state.</p>
+<p>This all sounds pretty complex, but isn't really. Here's a
+table which shows which actions are allowable in each state, what
+action will be taken, what the next state is, and what the
+non-error return values are. Note that you can't explicitly ask
+what state the stream is in, but nor do you need to -- it can be
+inferred from the values returned by
+<code class="computeroutput">BZ2_bzCompress</code>.</p>
+<pre class="programlisting">IDLE/any
+ Illegal. IDLE state only exists after BZ2_bzCompressEnd or
+ before BZ2_bzCompressInit.
+ Return value = BZ_SEQUENCE_ERROR
+
+RUNNING/BZ_RUN
+ Compress from next_in to next_out as much as possible.
+ Next state = RUNNING
+ Return value = BZ_RUN_OK
+
+RUNNING/BZ_FLUSH
+ Remember current value of next_in. Compress from next_in
+ to next_out as much as possible, but do not accept any more input.
+ Next state = FLUSHING
+ Return value = BZ_FLUSH_OK
+
+RUNNING/BZ_FINISH
+ Remember current value of next_in. Compress from next_in
+ to next_out as much as possible, but do not accept any more input.
+ Next state = FINISHING
+ Return value = BZ_FINISH_OK
+
+FLUSHING/BZ_FLUSH
+ Compress from next_in to next_out as much as possible,
+ but do not accept any more input.
+ If all the existing input has been used up and all compressed
+ output has been removed
+ Next state = RUNNING; Return value = BZ_RUN_OK
+ else
+ Next state = FLUSHING; Return value = BZ_FLUSH_OK
+
+FLUSHING/other
+ Illegal.
+ Return value = BZ_SEQUENCE_ERROR
+
+FINISHING/BZ_FINISH
+ Compress from next_in to next_out as much as possible,
+ but to not accept any more input.
+ If all the existing input has been used up and all compressed
+ output has been removed
+ Next state = IDLE; Return value = BZ_STREAM_END
+ else
+ Next state = FINISHING; Return value = BZ_FINISH_OK
+
+FINISHING/other
+ Illegal.
+ Return value = BZ_SEQUENCE_ERROR</pre>
+<p>That still looks complicated? Well, fair enough. The
+usual sequence of calls for compressing a load of data is:</p>
+<div class="orderedlist"><ol type="1">
+<li><p>Get started with
+ <code class="computeroutput">BZ2_bzCompressInit</code>.</p></li>
+<li><p>Shovel data in and shlurp out its compressed form
+ using zero or more calls of
+ <code class="computeroutput">BZ2_bzCompress</code> with action =
+ <code class="computeroutput">BZ_RUN</code>.</p></li>
+<li><p>Finish up. Repeatedly call
+ <code class="computeroutput">BZ2_bzCompress</code> with action =
+ <code class="computeroutput">BZ_FINISH</code>, copying out the
+ compressed output, until
+ <code class="computeroutput">BZ_STREAM_END</code> is
+ returned.</p></li>
+<li><p>Close up and go home. Call
+ <code class="computeroutput">BZ2_bzCompressEnd</code>.</p></li>
+</ol></div>
+<p>If the data you want to compress fits into your input
+buffer all at once, you can skip the calls of
+<code class="computeroutput">BZ2_bzCompress ( ..., BZ_RUN )</code>
+and just do the <code class="computeroutput">BZ2_bzCompress ( ..., BZ_FINISH
+)</code> calls.</p>
+<p>All required memory is allocated by
+<code class="computeroutput">BZ2_bzCompressInit</code>. The
+compression library can accept any data at all (obviously). So
+you shouldn't get any error return values from the
+<code class="computeroutput">BZ2_bzCompress</code> calls. If you
+do, they will be
+<code class="computeroutput">BZ_SEQUENCE_ERROR</code>, and indicate
+a bug in your programming.</p>
+<p>Trivial other possible return values:</p>
+<pre class="programlisting">BZ_PARAM_ERROR
+ if strm is NULL, or strm-&gt;s is NULL</pre>
+</div>
+<div class="sect2" lang="en">
+<div class="titlepage"><div><div><h3 class="title">
+<a name="bzCompress-end"></a>3.3.3. <code class="computeroutput">BZ2_bzCompressEnd</code></h3></div></div></div>
+<pre class="programlisting">int BZ2_bzCompressEnd ( bz_stream *strm );</pre>
+<p>Releases all memory associated with a compression
+stream.</p>
+<p>Possible return values:</p>
+<pre class="programlisting">BZ_PARAM_ERROR if strm is NULL or strm-&gt;s is NULL
+BZ_OK otherwise</pre>
+</div>
+<div class="sect2" lang="en">
+<div class="titlepage"><div><div><h3 class="title">
+<a name="bzDecompress-init"></a>3.3.4. <code class="computeroutput">BZ2_bzDecompressInit</code></h3></div></div></div>
+<pre class="programlisting">int BZ2_bzDecompressInit ( bz_stream *strm, int verbosity, int small );</pre>
+<p>Prepares for decompression. As with
+<code class="computeroutput">BZ2_bzCompressInit</code>, a
+<code class="computeroutput">bz_stream</code> record should be
+allocated and initialised before the call. Fields
+<code class="computeroutput">bzalloc</code>,
+<code class="computeroutput">bzfree</code> and
+<code class="computeroutput">opaque</code> should be set if a custom
+memory allocator is required, or made
+<code class="computeroutput">NULL</code> for the normal
+<code class="computeroutput">malloc</code> /
+<code class="computeroutput">free</code> routines. Upon return, the
+internal state will have been initialised, and
+<code class="computeroutput">total_in</code> and
+<code class="computeroutput">total_out</code> will be zero.</p>
+<p>For the meaning of parameter
+<code class="computeroutput">verbosity</code>, see
+<code class="computeroutput">BZ2_bzCompressInit</code>.</p>
+<p>If <code class="computeroutput">small</code> is nonzero, the
+library will use an alternative decompression algorithm which
+uses less memory but at the cost of decompressing more slowly
+(roughly speaking, half the speed, but the maximum memory
+requirement drops to around 2300k). See <a href="#using">How to use bzip2</a>
+for more information on memory management.</p>
+<p>Note that the amount of memory needed to decompress a
+stream cannot be determined until the stream's header has been
+read, so even if
+<code class="computeroutput">BZ2_bzDecompressInit</code> succeeds, a
+subsequent <code class="computeroutput">BZ2_bzDecompress</code>
+could fail with
+<code class="computeroutput">BZ_MEM_ERROR</code>.</p>
+<p>Possible return values:</p>
+<pre class="programlisting">BZ_CONFIG_ERROR
+ if the library has been mis-compiled
+BZ_PARAM_ERROR
+ if ( small != 0 &amp;&amp; small != 1 )
+ or (verbosity &lt;; 0 || verbosity &gt; 4)
+BZ_MEM_ERROR
+ if insufficient memory is available</pre>
+<p>Allowable next actions:</p>
+<pre class="programlisting">BZ2_bzDecompress
+ if BZ_OK was returned
+ no specific action required in case of error</pre>
+</div>
+<div class="sect2" lang="en">
+<div class="titlepage"><div><div><h3 class="title">
+<a name="bzDecompress"></a>3.3.5. <code class="computeroutput">BZ2_bzDecompress</code></h3></div></div></div>
+<pre class="programlisting">int BZ2_bzDecompress ( bz_stream *strm );</pre>
+<p>Provides more input and/out output buffer space for the
+library. The caller maintains input and output buffers, and uses
+<code class="computeroutput">BZ2_bzDecompress</code> to transfer
+data between them.</p>
+<p>Before each call to
+<code class="computeroutput">BZ2_bzDecompress</code>,
+<code class="computeroutput">next_in</code> should point at the
+compressed data, and <code class="computeroutput">avail_in</code>
+should indicate how many bytes the library may read.
+<code class="computeroutput">BZ2_bzDecompress</code> updates
+<code class="computeroutput">next_in</code>,
+<code class="computeroutput">avail_in</code> and
+<code class="computeroutput">total_in</code> to reflect the number
+of bytes it has read.</p>
+<p>Similarly, <code class="computeroutput">next_out</code> should
+point to a buffer in which the uncompressed output is to be
+placed, with <code class="computeroutput">avail_out</code>
+indicating how much output space is available.
+<code class="computeroutput">BZ2_bzCompress</code> updates
+<code class="computeroutput">next_out</code>,
+<code class="computeroutput">avail_out</code> and
+<code class="computeroutput">total_out</code> to reflect the number
+of bytes output.</p>
+<p>You may provide and remove as little or as much data as you
+like on each call of
+<code class="computeroutput">BZ2_bzDecompress</code>. In the limit,
+it is acceptable to supply and remove data one byte at a time,
+although this would be terribly inefficient. You should always
+ensure that at least one byte of output space is available at
+each call.</p>
+<p>Use of <code class="computeroutput">BZ2_bzDecompress</code> is
+simpler than
+<code class="computeroutput">BZ2_bzCompress</code>.</p>
+<p>You should provide input and remove output as described
+above, and repeatedly call
+<code class="computeroutput">BZ2_bzDecompress</code> until
+<code class="computeroutput">BZ_STREAM_END</code> is returned.
+Appearance of <code class="computeroutput">BZ_STREAM_END</code>
+denotes that <code class="computeroutput">BZ2_bzDecompress</code>
+has detected the logical end of the compressed stream.
+<code class="computeroutput">BZ2_bzDecompress</code> will not
+produce <code class="computeroutput">BZ_STREAM_END</code> until all
+output data has been placed into the output buffer, so once
+<code class="computeroutput">BZ_STREAM_END</code> appears, you are
+guaranteed to have available all the decompressed output, and
+<code class="computeroutput">BZ2_bzDecompressEnd</code> can safely
+be called.</p>
+<p>If case of an error return value, you should call
+<code class="computeroutput">BZ2_bzDecompressEnd</code> to clean up
+and release memory.</p>
+<p>Possible return values:</p>
+<pre class="programlisting">BZ_PARAM_ERROR
+ if strm is NULL or strm-&gt;s is NULL
+ or strm-&gt;avail_out &lt; 1
+BZ_DATA_ERROR
+ if a data integrity error is detected in the compressed stream
+BZ_DATA_ERROR_MAGIC
+ if the compressed stream doesn't begin with the right magic bytes
+BZ_MEM_ERROR
+ if there wasn't enough memory available
+BZ_STREAM_END
+ if the logical end of the data stream was detected and all
+ output in has been consumed, eg s--&gt;avail_out &gt; 0
+BZ_OK
+ otherwise</pre>
+<p>Allowable next actions:</p>
+<pre class="programlisting">BZ2_bzDecompress
+ if BZ_OK was returned
+BZ2_bzDecompressEnd
+ otherwise</pre>
+</div>
+<div class="sect2" lang="en">
+<div class="titlepage"><div><div><h3 class="title">
+<a name="bzDecompress-end"></a>3.3.6. <code class="computeroutput">BZ2_bzDecompressEnd</code></h3></div></div></div>
+<pre class="programlisting">int BZ2_bzDecompressEnd ( bz_stream *strm );</pre>
+<p>Releases all memory associated with a decompression
+stream.</p>
+<p>Possible return values:</p>
+<pre class="programlisting">BZ_PARAM_ERROR
+ if strm is NULL or strm-&gt;s is NULL
+BZ_OK
+ otherwise</pre>
+<p>Allowable next actions:</p>
+<pre class="programlisting"> None.</pre>
+</div>
+</div>
+<div class="sect1" lang="en">
+<div class="titlepage"><div><div><h2 class="title" style="clear: both">
+<a name="hl-interface"></a>3.4. High-level interface</h2></div></div></div>
+<p>This interface provides functions for reading and writing
+<code class="computeroutput">bzip2</code> format files. First, some
+general points.</p>
+<div class="itemizedlist"><ul type="bullet">
+<li style="list-style-type: disc"><p>All of the functions take an
+ <code class="computeroutput">int*</code> first argument,
+ <code class="computeroutput">bzerror</code>. After each call,
+ <code class="computeroutput">bzerror</code> should be consulted
+ first to determine the outcome of the call. If
+ <code class="computeroutput">bzerror</code> is
+ <code class="computeroutput">BZ_OK</code>, the call completed
+ successfully, and only then should the return value of the
+ function (if any) be consulted. If
+ <code class="computeroutput">bzerror</code> is
+ <code class="computeroutput">BZ_IO_ERROR</code>, there was an
+ error reading/writing the underlying compressed file, and you
+ should then consult <code class="computeroutput">errno</code> /
+ <code class="computeroutput">perror</code> to determine the cause
+ of the difficulty. <code class="computeroutput">bzerror</code>
+ may also be set to various other values; precise details are
+ given on a per-function basis below.</p></li>
+<li style="list-style-type: disc"><p>If <code class="computeroutput">bzerror</code> indicates
+ an error (ie, anything except
+ <code class="computeroutput">BZ_OK</code> and
+ <code class="computeroutput">BZ_STREAM_END</code>), you should
+ immediately call
+ <code class="computeroutput">BZ2_bzReadClose</code> (or
+ <code class="computeroutput">BZ2_bzWriteClose</code>, depending on
+ whether you are attempting to read or to write) to free up all
+ resources associated with the stream. Once an error has been
+ indicated, behaviour of all calls except
+ <code class="computeroutput">BZ2_bzReadClose</code>
+ (<code class="computeroutput">BZ2_bzWriteClose</code>) is
+ undefined. The implication is that (1)
+ <code class="computeroutput">bzerror</code> should be checked
+ after each call, and (2) if
+ <code class="computeroutput">bzerror</code> indicates an error,
+ <code class="computeroutput">BZ2_bzReadClose</code>
+ (<code class="computeroutput">BZ2_bzWriteClose</code>) should then
+ be called to clean up.</p></li>
+<li style="list-style-type: disc"><p>The <code class="computeroutput">FILE*</code> arguments
+ passed to <code class="computeroutput">BZ2_bzReadOpen</code> /
+ <code class="computeroutput">BZ2_bzWriteOpen</code> should be set
+ to binary mode. Most Unix systems will do this by default, but
+ other platforms, including Windows and Mac, will not. If you
+ omit this, you may encounter problems when moving code to new
+ platforms.</p></li>
+<li style="list-style-type: disc"><p>Memory allocation requests are handled by
+ <code class="computeroutput">malloc</code> /
+ <code class="computeroutput">free</code>. At present there is no
+ facility for user-defined memory allocators in the file I/O
+ functions (could easily be added, though).</p></li>
+</ul></div>
+<div class="sect2" lang="en">
+<div class="titlepage"><div><div><h3 class="title">
+<a name="bzreadopen"></a>3.4.1. <code class="computeroutput">BZ2_bzReadOpen</code></h3></div></div></div>
+<pre class="programlisting">typedef void BZFILE;
+
+BZFILE *BZ2_bzReadOpen( int *bzerror, FILE *f,
+ int verbosity, int small,
+ void *unused, int nUnused );</pre>
+<p>Prepare to read compressed data from file handle
+<code class="computeroutput">f</code>.
+<code class="computeroutput">f</code> should refer to a file which
+has been opened for reading, and for which the error indicator
+(<code class="computeroutput">ferror(f)</code>)is not set. If
+<code class="computeroutput">small</code> is 1, the library will try
+to decompress using less memory, at the expense of speed.</p>
+<p>For reasons explained below,
+<code class="computeroutput">BZ2_bzRead</code> will decompress the
+<code class="computeroutput">nUnused</code> bytes starting at
+<code class="computeroutput">unused</code>, before starting to read
+from the file <code class="computeroutput">f</code>. At most
+<code class="computeroutput">BZ_MAX_UNUSED</code> bytes may be
+supplied like this. If this facility is not required, you should
+pass <code class="computeroutput">NULL</code> and
+<code class="computeroutput">0</code> for
+<code class="computeroutput">unused</code> and
+n<code class="computeroutput">Unused</code> respectively.</p>
+<p>For the meaning of parameters
+<code class="computeroutput">small</code> and
+<code class="computeroutput">verbosity</code>, see
+<code class="computeroutput">BZ2_bzDecompressInit</code>.</p>
+<p>The amount of memory needed to decompress a file cannot be
+determined until the file's header has been read. So it is
+possible that <code class="computeroutput">BZ2_bzReadOpen</code>
+returns <code class="computeroutput">BZ_OK</code> but a subsequent
+call of <code class="computeroutput">BZ2_bzRead</code> will return
+<code class="computeroutput">BZ_MEM_ERROR</code>.</p>
+<p>Possible assignments to
+<code class="computeroutput">bzerror</code>:</p>
+<pre class="programlisting">BZ_CONFIG_ERROR
+ if the library has been mis-compiled
+BZ_PARAM_ERROR
+ if f is NULL
+ or small is neither 0 nor 1
+ or ( unused == NULL &amp;&amp; nUnused != 0 )
+ or ( unused != NULL &amp;&amp; !(0 &lt;= nUnused &lt;= BZ_MAX_UNUSED) )
+BZ_IO_ERROR
+ if ferror(f) is nonzero
+BZ_MEM_ERROR
+ if insufficient memory is available
+BZ_OK
+ otherwise.</pre>
+<p>Possible return values:</p>
+<pre class="programlisting">Pointer to an abstract BZFILE
+ if bzerror is BZ_OK
+NULL
+ otherwise</pre>
+<p>Allowable next actions:</p>
+<pre class="programlisting">BZ2_bzRead
+ if bzerror is BZ_OK
+BZ2_bzClose
+ otherwise</pre>
+</div>
+<div class="sect2" lang="en">
+<div class="titlepage"><div><div><h3 class="title">
+<a name="bzread"></a>3.4.2. <code class="computeroutput">BZ2_bzRead</code></h3></div></div></div>
+<pre class="programlisting">int BZ2_bzRead ( int *bzerror, BZFILE *b, void *buf, int len );</pre>
+<p>Reads up to <code class="computeroutput">len</code>
+(uncompressed) bytes from the compressed file
+<code class="computeroutput">b</code> into the buffer
+<code class="computeroutput">buf</code>. If the read was
+successful, <code class="computeroutput">bzerror</code> is set to
+<code class="computeroutput">BZ_OK</code> and the number of bytes
+read is returned. If the logical end-of-stream was detected,
+<code class="computeroutput">bzerror</code> will be set to
+<code class="computeroutput">BZ_STREAM_END</code>, and the number of
+bytes read is returned. All other
+<code class="computeroutput">bzerror</code> values denote an
+error.</p>
+<p><code class="computeroutput">BZ2_bzRead</code> will supply
+<code class="computeroutput">len</code> bytes, unless the logical
+stream end is detected or an error occurs. Because of this, it
+is possible to detect the stream end by observing when the number
+of bytes returned is less than the number requested.
+Nevertheless, this is regarded as inadvisable; you should instead
+check <code class="computeroutput">bzerror</code> after every call
+and watch out for
+<code class="computeroutput">BZ_STREAM_END</code>.</p>
+<p>Internally, <code class="computeroutput">BZ2_bzRead</code>
+copies data from the compressed file in chunks of size
+<code class="computeroutput">BZ_MAX_UNUSED</code> bytes before
+decompressing it. If the file contains more bytes than strictly
+needed to reach the logical end-of-stream,
+<code class="computeroutput">BZ2_bzRead</code> will almost certainly
+read some of the trailing data before signalling
+<code class="computeroutput">BZ_SEQUENCE_END</code>. To collect the
+read but unused data once
+<code class="computeroutput">BZ_SEQUENCE_END</code> has appeared,
+call <code class="computeroutput">BZ2_bzReadGetUnused</code>
+immediately before
+<code class="computeroutput">BZ2_bzReadClose</code>.</p>
+<p>Possible assignments to
+<code class="computeroutput">bzerror</code>:</p>
+<pre class="programlisting">BZ_PARAM_ERROR
+ if b is NULL or buf is NULL or len &lt; 0
+BZ_SEQUENCE_ERROR
+ if b was opened with BZ2_bzWriteOpen
+BZ_IO_ERROR
+ if there is an error reading from the compressed file
+BZ_UNEXPECTED_EOF
+ if the compressed file ended before
+ the logical end-of-stream was detected
+BZ_DATA_ERROR
+ if a data integrity error was detected in the compressed stream
+BZ_DATA_ERROR_MAGIC
+ if the stream does not begin with the requisite header bytes
+ (ie, is not a bzip2 data file). This is really
+ a special case of BZ_DATA_ERROR.
+BZ_MEM_ERROR
+ if insufficient memory was available
+BZ_STREAM_END
+ if the logical end of stream was detected.
+BZ_OK
+ otherwise.</pre>
+<p>Possible return values:</p>
+<pre class="programlisting">number of bytes read
+ if bzerror is BZ_OK or BZ_STREAM_END
+undefined
+ otherwise</pre>
+<p>Allowable next actions:</p>
+<pre class="programlisting">collect data from buf, then BZ2_bzRead or BZ2_bzReadClose
+ if bzerror is BZ_OK
+collect data from buf, then BZ2_bzReadClose or BZ2_bzReadGetUnused
+ if bzerror is BZ_SEQUENCE_END
+BZ2_bzReadClose
+ otherwise</pre>
+</div>
+<div class="sect2" lang="en">
+<div class="titlepage"><div><div><h3 class="title">
+<a name="bzreadgetunused"></a>3.4.3. <code class="computeroutput">BZ2_bzReadGetUnused</code></h3></div></div></div>
+<pre class="programlisting">void BZ2_bzReadGetUnused( int* bzerror, BZFILE *b,
+ void** unused, int* nUnused );</pre>
+<p>Returns data which was read from the compressed file but
+was not needed to get to the logical end-of-stream.
+<code class="computeroutput">*unused</code> is set to the address of
+the data, and <code class="computeroutput">*nUnused</code> to the
+number of bytes. <code class="computeroutput">*nUnused</code> will
+be set to a value between <code class="computeroutput">0</code> and
+<code class="computeroutput">BZ_MAX_UNUSED</code> inclusive.</p>
+<p>This function may only be called once
+<code class="computeroutput">BZ2_bzRead</code> has signalled
+<code class="computeroutput">BZ_STREAM_END</code> but before
+<code class="computeroutput">BZ2_bzReadClose</code>.</p>
+<p>Possible assignments to
+<code class="computeroutput">bzerror</code>:</p>
+<pre class="programlisting">BZ_PARAM_ERROR
+ if b is NULL
+ or unused is NULL or nUnused is NULL
+BZ_SEQUENCE_ERROR
+ if BZ_STREAM_END has not been signalled
+ or if b was opened with BZ2_bzWriteOpen
+BZ_OK
+ otherwise</pre>
+<p>Allowable next actions:</p>
+<pre class="programlisting">BZ2_bzReadClose</pre>
+</div>
+<div class="sect2" lang="en">
+<div class="titlepage"><div><div><h3 class="title">
+<a name="bzreadclose"></a>3.4.4. <code class="computeroutput">BZ2_bzReadClose</code></h3></div></div></div>
+<pre class="programlisting">void BZ2_bzReadClose ( int *bzerror, BZFILE *b );</pre>
+<p>Releases all memory pertaining to the compressed file
+<code class="computeroutput">b</code>.
+<code class="computeroutput">BZ2_bzReadClose</code> does not call
+<code class="computeroutput">fclose</code> on the underlying file
+handle, so you should do that yourself if appropriate.
+<code class="computeroutput">BZ2_bzReadClose</code> should be called
+to clean up after all error situations.</p>
+<p>Possible assignments to
+<code class="computeroutput">bzerror</code>:</p>
+<pre class="programlisting">BZ_SEQUENCE_ERROR
+ if b was opened with BZ2_bzOpenWrite
+BZ_OK
+ otherwise</pre>
+<p>Allowable next actions:</p>
+<pre class="programlisting">none</pre>
+</div>
+<div class="sect2" lang="en">
+<div class="titlepage"><div><div><h3 class="title">
+<a name="bzwriteopen"></a>3.4.5. <code class="computeroutput">BZ2_bzWriteOpen</code></h3></div></div></div>
+<pre class="programlisting">BZFILE *BZ2_bzWriteOpen( int *bzerror, FILE *f,
+ int blockSize100k, int verbosity,
+ int workFactor );</pre>
+<p>Prepare to write compressed data to file handle
+<code class="computeroutput">f</code>.
+<code class="computeroutput">f</code> should refer to a file which
+has been opened for writing, and for which the error indicator
+(<code class="computeroutput">ferror(f)</code>)is not set.</p>
+<p>For the meaning of parameters
+<code class="computeroutput">blockSize100k</code>,
+<code class="computeroutput">verbosity</code> and
+<code class="computeroutput">workFactor</code>, see
+<code class="computeroutput">BZ2_bzCompressInit</code>.</p>
+<p>All required memory is allocated at this stage, so if the
+call completes successfully,
+<code class="computeroutput">BZ_MEM_ERROR</code> cannot be signalled
+by a subsequent call to
+<code class="computeroutput">BZ2_bzWrite</code>.</p>
+<p>Possible assignments to
+<code class="computeroutput">bzerror</code>:</p>
+<pre class="programlisting">BZ_CONFIG_ERROR
+ if the library has been mis-compiled
+BZ_PARAM_ERROR
+ if f is NULL
+ or blockSize100k &lt; 1 or blockSize100k &gt; 9
+BZ_IO_ERROR
+ if ferror(f) is nonzero
+BZ_MEM_ERROR
+ if insufficient memory is available
+BZ_OK
+ otherwise</pre>
+<p>Possible return values:</p>
+<pre class="programlisting">Pointer to an abstract BZFILE
+ if bzerror is BZ_OK
+NULL
+ otherwise</pre>
+<p>Allowable next actions:</p>
+<pre class="programlisting">BZ2_bzWrite
+ if bzerror is BZ_OK
+ (you could go directly to BZ2_bzWriteClose, but this would be pretty pointless)
+BZ2_bzWriteClose
+ otherwise</pre>
+</div>
+<div class="sect2" lang="en">
+<div class="titlepage"><div><div><h3 class="title">
+<a name="bzwrite"></a>3.4.6. <code class="computeroutput">BZ2_bzWrite</code></h3></div></div></div>
+<pre class="programlisting">void BZ2_bzWrite ( int *bzerror, BZFILE *b, void *buf, int len );</pre>
+<p>Absorbs <code class="computeroutput">len</code> bytes from the
+buffer <code class="computeroutput">buf</code>, eventually to be
+compressed and written to the file.</p>
+<p>Possible assignments to
+<code class="computeroutput">bzerror</code>:</p>
+<pre class="programlisting">BZ_PARAM_ERROR
+ if b is NULL or buf is NULL or len &lt; 0
+BZ_SEQUENCE_ERROR
+ if b was opened with BZ2_bzReadOpen
+BZ_IO_ERROR
+ if there is an error writing the compressed file.
+BZ_OK
+ otherwise</pre>
+</div>
+<div class="sect2" lang="en">
+<div class="titlepage"><div><div><h3 class="title">
+<a name="bzwriteclose"></a>3.4.7. <code class="computeroutput">BZ2_bzWriteClose</code></h3></div></div></div>
+<pre class="programlisting">void BZ2_bzWriteClose( int *bzerror, BZFILE* f,
+ int abandon,
+ unsigned int* nbytes_in,
+ unsigned int* nbytes_out );
+
+void BZ2_bzWriteClose64( int *bzerror, BZFILE* f,
+ int abandon,
+ unsigned int* nbytes_in_lo32,
+ unsigned int* nbytes_in_hi32,
+ unsigned int* nbytes_out_lo32,
+ unsigned int* nbytes_out_hi32 );</pre>
+<p>Compresses and flushes to the compressed file all data so
+far supplied by <code class="computeroutput">BZ2_bzWrite</code>.
+The logical end-of-stream markers are also written, so subsequent
+calls to <code class="computeroutput">BZ2_bzWrite</code> are
+illegal. All memory associated with the compressed file
+<code class="computeroutput">b</code> is released.
+<code class="computeroutput">fflush</code> is called on the
+compressed file, but it is not
+<code class="computeroutput">fclose</code>'d.</p>
+<p>If <code class="computeroutput">BZ2_bzWriteClose</code> is
+called to clean up after an error, the only action is to release
+the memory. The library records the error codes issued by
+previous calls, so this situation will be detected automatically.
+There is no attempt to complete the compression operation, nor to
+<code class="computeroutput">fflush</code> the compressed file. You
+can force this behaviour to happen even in the case of no error,
+by passing a nonzero value to
+<code class="computeroutput">abandon</code>.</p>
+<p>If <code class="computeroutput">nbytes_in</code> is non-null,
+<code class="computeroutput">*nbytes_in</code> will be set to be the
+total volume of uncompressed data handled. Similarly,
+<code class="computeroutput">nbytes_out</code> will be set to the
+total volume of compressed data written. For compatibility with
+older versions of the library,
+<code class="computeroutput">BZ2_bzWriteClose</code> only yields the
+lower 32 bits of these counts. Use
+<code class="computeroutput">BZ2_bzWriteClose64</code> if you want
+the full 64 bit counts. These two functions are otherwise
+absolutely identical.</p>
+<p>Possible assignments to
+<code class="computeroutput">bzerror</code>:</p>
+<pre class="programlisting">BZ_SEQUENCE_ERROR
+ if b was opened with BZ2_bzReadOpen
+BZ_IO_ERROR
+ if there is an error writing the compressed file
+BZ_OK
+ otherwise</pre>
+</div>
+<div class="sect2" lang="en">
+<div class="titlepage"><div><div><h3 class="title">
+<a name="embed"></a>3.4.8. Handling embedded compressed data streams</h3></div></div></div>
+<p>The high-level library facilitates use of
+<code class="computeroutput">bzip2</code> data streams which form
+some part of a surrounding, larger data stream.</p>
+<div class="itemizedlist"><ul type="bullet">
+<li style="list-style-type: disc"><p>For writing, the library takes an open file handle,
+ writes compressed data to it,
+ <code class="computeroutput">fflush</code>es it but does not
+ <code class="computeroutput">fclose</code> it. The calling
+ application can write its own data before and after the
+ compressed data stream, using that same file handle.</p></li>
+<li style="list-style-type: disc"><p>Reading is more complex, and the facilities are not as
+ general as they could be since generality is hard to reconcile
+ with efficiency. <code class="computeroutput">BZ2_bzRead</code>
+ reads from the compressed file in blocks of size
+ <code class="computeroutput">BZ_MAX_UNUSED</code> bytes, and in
+ doing so probably will overshoot the logical end of compressed
+ stream. To recover this data once decompression has ended,
+ call <code class="computeroutput">BZ2_bzReadGetUnused</code> after
+ the last call of <code class="computeroutput">BZ2_bzRead</code>
+ (the one returning
+ <code class="computeroutput">BZ_STREAM_END</code>) but before
+ calling
+ <code class="computeroutput">BZ2_bzReadClose</code>.</p></li>
+</ul></div>
+<p>This mechanism makes it easy to decompress multiple
+<code class="computeroutput">bzip2</code> streams placed end-to-end.
+As the end of one stream, when
+<code class="computeroutput">BZ2_bzRead</code> returns
+<code class="computeroutput">BZ_STREAM_END</code>, call
+<code class="computeroutput">BZ2_bzReadGetUnused</code> to collect
+the unused data (copy it into your own buffer somewhere). That
+data forms the start of the next compressed stream. To start
+uncompressing that next stream, call
+<code class="computeroutput">BZ2_bzReadOpen</code> again, feeding in
+the unused data via the <code class="computeroutput">unused</code> /
+<code class="computeroutput">nUnused</code> parameters. Keep doing
+this until <code class="computeroutput">BZ_STREAM_END</code> return
+coincides with the physical end of file
+(<code class="computeroutput">feof(f)</code>). In this situation
+<code class="computeroutput">BZ2_bzReadGetUnused</code> will of
+course return no data.</p>
+<p>This should give some feel for how the high-level interface
+can be used. If you require extra flexibility, you'll have to
+bite the bullet and get to grips with the low-level
+interface.</p>
+</div>
+<div class="sect2" lang="en">
+<div class="titlepage"><div><div><h3 class="title">
+<a name="std-rdwr"></a>3.4.9. Standard file-reading/writing code</h3></div></div></div>
+<p>Here's how you'd write data to a compressed file:</p>
+<pre class="programlisting">FILE* f;
+BZFILE* b;
+int nBuf;
+char buf[ /* whatever size you like */ ];
+int bzerror;
+int nWritten;
+
+f = fopen ( "myfile.bz2", "w" );
+if ( !f ) {
+ /* handle error */
+}
+b = BZ2_bzWriteOpen( &amp;bzerror, f, 9 );
+if (bzerror != BZ_OK) {
+ BZ2_bzWriteClose ( b );
+ /* handle error */
+}
+
+while ( /* condition */ ) {
+ /* get data to write into buf, and set nBuf appropriately */
+ nWritten = BZ2_bzWrite ( &amp;bzerror, b, buf, nBuf );
+ if (bzerror == BZ_IO_ERROR) {
+ BZ2_bzWriteClose ( &amp;bzerror, b );
+ /* handle error */
+ }
+}
+
+BZ2_bzWriteClose( &amp;bzerror, b );
+if (bzerror == BZ_IO_ERROR) {
+ /* handle error */
+}</pre>
+<p>And to read from a compressed file:</p>
+<pre class="programlisting">FILE* f;
+BZFILE* b;
+int nBuf;
+char buf[ /* whatever size you like */ ];
+int bzerror;
+int nWritten;
+
+f = fopen ( "myfile.bz2", "r" );
+if ( !f ) {
+ /* handle error */
+}
+b = BZ2_bzReadOpen ( &amp;bzerror, f, 0, NULL, 0 );
+if ( bzerror != BZ_OK ) {
+ BZ2_bzReadClose ( &amp;bzerror, b );
+ /* handle error */
+}
+
+bzerror = BZ_OK;
+while ( bzerror == BZ_OK &amp;&amp; /* arbitrary other conditions */) {
+ nBuf = BZ2_bzRead ( &amp;bzerror, b, buf, /* size of buf */ );
+ if ( bzerror == BZ_OK ) {
+ /* do something with buf[0 .. nBuf-1] */
+ }
+}
+if ( bzerror != BZ_STREAM_END ) {
+ BZ2_bzReadClose ( &amp;bzerror, b );
+ /* handle error */
+} else {
+ BZ2_bzReadClose ( &amp;bzerror, b );
+}</pre>
+</div>
+</div>
+<div class="sect1" lang="en">
+<div class="titlepage"><div><div><h2 class="title" style="clear: both">
+<a name="util-fns"></a>3.5. Utility functions</h2></div></div></div>
+<div class="sect2" lang="en">
+<div class="titlepage"><div><div><h3 class="title">
+<a name="bzbufftobuffcompress"></a>3.5.1. <code class="computeroutput">BZ2_bzBuffToBuffCompress</code></h3></div></div></div>
+<pre class="programlisting">int BZ2_bzBuffToBuffCompress( char* dest,
+ unsigned int* destLen,
+ char* source,
+ unsigned int sourceLen,
+ int blockSize100k,
+ int verbosity,
+ int workFactor );</pre>
+<p>Attempts to compress the data in <code class="computeroutput">source[0
+.. sourceLen-1]</code> into the destination buffer,
+<code class="computeroutput">dest[0 .. *destLen-1]</code>. If the
+destination buffer is big enough,
+<code class="computeroutput">*destLen</code> is set to the size of
+the compressed data, and <code class="computeroutput">BZ_OK</code>
+is returned. If the compressed data won't fit,
+<code class="computeroutput">*destLen</code> is unchanged, and
+<code class="computeroutput">BZ_OUTBUFF_FULL</code> is
+returned.</p>
+<p>Compression in this manner is a one-shot event, done with a
+single call to this function. The resulting compressed data is a
+complete <code class="computeroutput">bzip2</code> format data
+stream. There is no mechanism for making additional calls to
+provide extra input data. If you want that kind of mechanism,
+use the low-level interface.</p>
+<p>For the meaning of parameters
+<code class="computeroutput">blockSize100k</code>,
+<code class="computeroutput">verbosity</code> and
+<code class="computeroutput">workFactor</code>, see
+<code class="computeroutput">BZ2_bzCompressInit</code>.</p>
+<p>To guarantee that the compressed data will fit in its
+buffer, allocate an output buffer of size 1% larger than the
+uncompressed data, plus six hundred extra bytes.</p>
+<p><code class="computeroutput">BZ2_bzBuffToBuffDecompress</code>
+will not write data at or beyond
+<code class="computeroutput">dest[*destLen]</code>, even in case of
+buffer overflow.</p>
+<p>Possible return values:</p>
+<pre class="programlisting">BZ_CONFIG_ERROR
+ if the library has been mis-compiled
+BZ_PARAM_ERROR
+ if dest is NULL or destLen is NULL
+ or blockSize100k &lt; 1 or blockSize100k &gt; 9
+ or verbosity &lt; 0 or verbosity &gt; 4
+ or workFactor &lt; 0 or workFactor &gt; 250
+BZ_MEM_ERROR
+ if insufficient memory is available
+BZ_OUTBUFF_FULL
+ if the size of the compressed data exceeds *destLen
+BZ_OK
+ otherwise</pre>
+</div>
+<div class="sect2" lang="en">
+<div class="titlepage"><div><div><h3 class="title">
+<a name="bzbufftobuffdecompress"></a>3.5.2. <code class="computeroutput">BZ2_bzBuffToBuffDecompress</code></h3></div></div></div>
+<pre class="programlisting">int BZ2_bzBuffToBuffDecompress( char* dest,
+ unsigned int* destLen,
+ char* source,
+ unsigned int sourceLen,
+ int small,
+ int verbosity );</pre>
+<p>Attempts to decompress the data in <code class="computeroutput">source[0
+.. sourceLen-1]</code> into the destination buffer,
+<code class="computeroutput">dest[0 .. *destLen-1]</code>. If the
+destination buffer is big enough,
+<code class="computeroutput">*destLen</code> is set to the size of
+the uncompressed data, and <code class="computeroutput">BZ_OK</code>
+is returned. If the compressed data won't fit,
+<code class="computeroutput">*destLen</code> is unchanged, and
+<code class="computeroutput">BZ_OUTBUFF_FULL</code> is
+returned.</p>
+<p><code class="computeroutput">source</code> is assumed to hold
+a complete <code class="computeroutput">bzip2</code> format data
+stream.
+<code class="computeroutput">BZ2_bzBuffToBuffDecompress</code> tries
+to decompress the entirety of the stream into the output
+buffer.</p>
+<p>For the meaning of parameters
+<code class="computeroutput">small</code> and
+<code class="computeroutput">verbosity</code>, see
+<code class="computeroutput">BZ2_bzDecompressInit</code>.</p>
+<p>Because the compression ratio of the compressed data cannot
+be known in advance, there is no easy way to guarantee that the
+output buffer will be big enough. You may of course make
+arrangements in your code to record the size of the uncompressed
+data, but such a mechanism is beyond the scope of this
+library.</p>
+<p><code class="computeroutput">BZ2_bzBuffToBuffDecompress</code>
+will not write data at or beyond
+<code class="computeroutput">dest[*destLen]</code>, even in case of
+buffer overflow.</p>
+<p>Possible return values:</p>
+<pre class="programlisting">BZ_CONFIG_ERROR
+ if the library has been mis-compiled
+BZ_PARAM_ERROR
+ if dest is NULL or destLen is NULL
+ or small != 0 &amp;&amp; small != 1
+ or verbosity &lt; 0 or verbosity &gt; 4
+BZ_MEM_ERROR
+ if insufficient memory is available
+BZ_OUTBUFF_FULL
+ if the size of the compressed data exceeds *destLen
+BZ_DATA_ERROR
+ if a data integrity error was detected in the compressed data
+BZ_DATA_ERROR_MAGIC
+ if the compressed data doesn't begin with the right magic bytes
+BZ_UNEXPECTED_EOF
+ if the compressed data ends unexpectedly
+BZ_OK
+ otherwise</pre>
+</div>
+</div>
+<div class="sect1" lang="en">
+<div class="titlepage"><div><div><h2 class="title" style="clear: both">
+<a name="zlib-compat"></a>3.6. <code class="computeroutput">zlib</code> compatibility functions</h2></div></div></div>
+<p>Yoshioka Tsuneo has contributed some functions to give
+better <code class="computeroutput">zlib</code> compatibility.
+These functions are <code class="computeroutput">BZ2_bzopen</code>,
+<code class="computeroutput">BZ2_bzread</code>,
+<code class="computeroutput">BZ2_bzwrite</code>,
+<code class="computeroutput">BZ2_bzflush</code>,
+<code class="computeroutput">BZ2_bzclose</code>,
+<code class="computeroutput">BZ2_bzerror</code> and
+<code class="computeroutput">BZ2_bzlibVersion</code>. These
+functions are not (yet) officially part of the library. If they
+break, you get to keep all the pieces. Nevertheless, I think
+they work ok.</p>
+<pre class="programlisting">typedef void BZFILE;
+
+const char * BZ2_bzlibVersion ( void );</pre>
+<p>Returns a string indicating the library version.</p>
+<pre class="programlisting">BZFILE * BZ2_bzopen ( const char *path, const char *mode );
+BZFILE * BZ2_bzdopen ( int fd, const char *mode );</pre>
+<p>Opens a <code class="computeroutput">.bz2</code> file for
+reading or writing, using either its name or a pre-existing file
+descriptor. Analogous to <code class="computeroutput">fopen</code>
+and <code class="computeroutput">fdopen</code>.</p>
+<pre class="programlisting">int BZ2_bzread ( BZFILE* b, void* buf, int len );
+int BZ2_bzwrite ( BZFILE* b, void* buf, int len );</pre>
+<p>Reads/writes data from/to a previously opened
+<code class="computeroutput">BZFILE</code>. Analogous to
+<code class="computeroutput">fread</code> and
+<code class="computeroutput">fwrite</code>.</p>
+<pre class="programlisting">int BZ2_bzflush ( BZFILE* b );
+void BZ2_bzclose ( BZFILE* b );</pre>
+<p>Flushes/closes a <code class="computeroutput">BZFILE</code>.
+<code class="computeroutput">BZ2_bzflush</code> doesn't actually do
+anything. Analogous to <code class="computeroutput">fflush</code>
+and <code class="computeroutput">fclose</code>.</p>
+<pre class="programlisting">const char * BZ2_bzerror ( BZFILE *b, int *errnum )</pre>
+<p>Returns a string describing the more recent error status of
+<code class="computeroutput">b</code>, and also sets
+<code class="computeroutput">*errnum</code> to its numerical
+value.</p>
+</div>
+<div class="sect1" lang="en">
+<div class="titlepage"><div><div><h2 class="title" style="clear: both">
+<a name="stdio-free"></a>3.7. Using the library in a <code class="computeroutput">stdio</code>-free environment</h2></div></div></div>
+<div class="sect2" lang="en">
+<div class="titlepage"><div><div><h3 class="title">
+<a name="stdio-bye"></a>3.7.1. Getting rid of <code class="computeroutput">stdio</code></h3></div></div></div>
+<p>In a deeply embedded application, you might want to use
+just the memory-to-memory functions. You can do this
+conveniently by compiling the library with preprocessor symbol
+<code class="computeroutput">BZ_NO_STDIO</code> defined. Doing this
+gives you a library containing only the following eight
+functions:</p>
+<p><code class="computeroutput">BZ2_bzCompressInit</code>,
+<code class="computeroutput">BZ2_bzCompress</code>,
+<code class="computeroutput">BZ2_bzCompressEnd</code>
+<code class="computeroutput">BZ2_bzDecompressInit</code>,
+<code class="computeroutput">BZ2_bzDecompress</code>,
+<code class="computeroutput">BZ2_bzDecompressEnd</code>
+<code class="computeroutput">BZ2_bzBuffToBuffCompress</code>,
+<code class="computeroutput">BZ2_bzBuffToBuffDecompress</code></p>
+<p>When compiled like this, all functions will ignore
+<code class="computeroutput">verbosity</code> settings.</p>
+</div>
+<div class="sect2" lang="en">
+<div class="titlepage"><div><div><h3 class="title">
+<a name="critical-error"></a>3.7.2. Critical error handling</h3></div></div></div>
+<p><code class="computeroutput">libbzip2</code> contains a number
+of internal assertion checks which should, needless to say, never
+be activated. Nevertheless, if an assertion should fail,
+behaviour depends on whether or not the library was compiled with
+<code class="computeroutput">BZ_NO_STDIO</code> set.</p>
+<p>For a normal compile, an assertion failure yields the
+message:</p>
+<div class="blockquote"><blockquote class="blockquote">
+<p>bzip2/libbzip2: internal error number N.</p>
+<p>This is a bug in bzip2/libbzip2, 1.0.5 of 10 December 2007.
+Please report it to me at: jseward@bzip.org. If this happened
+when you were using some program which uses libbzip2 as a
+component, you should also report this bug to the author(s)
+of that program. Please make an effort to report this bug;
+timely and accurate bug reports eventually lead to higher
+quality software. Thanks. Julian Seward, 10 December 2007.
+</p>
+</blockquote></div>
+<p>where <code class="computeroutput">N</code> is some error code
+number. If <code class="computeroutput">N == 1007</code>, it also
+prints some extra text advising the reader that unreliable memory
+is often associated with internal error 1007. (This is a
+frequently-observed-phenomenon with versions 1.0.0/1.0.1).</p>
+<p><code class="computeroutput">exit(3)</code> is then
+called.</p>
+<p>For a <code class="computeroutput">stdio</code>-free library,
+assertion failures result in a call to a function declared
+as:</p>
+<pre class="programlisting">extern void bz_internal_error ( int errcode );</pre>
+<p>The relevant code is passed as a parameter. You should
+supply such a function.</p>
+<p>In either case, once an assertion failure has occurred, any
+<code class="computeroutput">bz_stream</code> records involved can
+be regarded as invalid. You should not attempt to resume normal
+operation with them.</p>
+<p>You may, of course, change critical error handling to suit
+your needs. As I said above, critical errors indicate bugs in
+the library and should not occur. All "normal" error situations
+are indicated via error return codes from functions, and can be
+recovered from.</p>
+</div>
+</div>
+<div class="sect1" lang="en">
+<div class="titlepage"><div><div><h2 class="title" style="clear: both">
+<a name="win-dll"></a>3.8. Making a Windows DLL</h2></div></div></div>
+<p>Everything related to Windows has been contributed by
+Yoshioka Tsuneo
+(<code class="computeroutput">tsuneo@rr.iij4u.or.jp</code>), so
+you should send your queries to him (but perhaps Cc: me,
+<code class="computeroutput">jseward@bzip.org</code>).</p>
+<p>My vague understanding of what to do is: using Visual C++
+5.0, open the project file
+<code class="computeroutput">libbz2.dsp</code>, and build. That's
+all.</p>
+<p>If you can't open the project file for some reason, make a
+new one, naming these files:
+<code class="computeroutput">blocksort.c</code>,
+<code class="computeroutput">bzlib.c</code>,
+<code class="computeroutput">compress.c</code>,
+<code class="computeroutput">crctable.c</code>,
+<code class="computeroutput">decompress.c</code>,
+<code class="computeroutput">huffman.c</code>,
+<code class="computeroutput">randtable.c</code> and
+<code class="computeroutput">libbz2.def</code>. You will also need
+to name the header files <code class="computeroutput">bzlib.h</code>
+and <code class="computeroutput">bzlib_private.h</code>.</p>
+<p>If you don't use VC++, you may need to define the
+proprocessor symbol
+<code class="computeroutput">_WIN32</code>.</p>
+<p>Finally, <code class="computeroutput">dlltest.c</code> is a
+sample program using the DLL. It has a project file,
+<code class="computeroutput">dlltest.dsp</code>.</p>
+<p>If you just want a makefile for Visual C, have a look at
+<code class="computeroutput">makefile.msc</code>.</p>
+<p>Be aware that if you compile
+<code class="computeroutput">bzip2</code> itself on Win32, you must
+set <code class="computeroutput">BZ_UNIX</code> to 0 and
+<code class="computeroutput">BZ_LCCWIN32</code> to 1, in the file
+<code class="computeroutput">bzip2.c</code>, before compiling.
+Otherwise the resulting binary won't work correctly.</p>
+<p>I haven't tried any of this stuff myself, but it all looks
+plausible.</p>
+</div>
+</div>
+<div class="chapter" lang="en">
+<div class="titlepage"><div><div><h2 class="title">
+<a name="misc"></a>4. Miscellanea</h2></div></div></div>
+<div class="toc">
+<p><b>Table of Contents</b></p>
+<dl>
+<dt><span class="sect1"><a href="#limits">4.1. Limitations of the compressed file format</a></span></dt>
+<dt><span class="sect1"><a href="#port-issues">4.2. Portability issues</a></span></dt>
+<dt><span class="sect1"><a href="#bugs">4.3. Reporting bugs</a></span></dt>
+<dt><span class="sect1"><a href="#package">4.4. Did you get the right package?</a></span></dt>
+<dt><span class="sect1"><a href="#reading">4.5. Further Reading</a></span></dt>
+</dl>
+</div>
+<p>These are just some random thoughts of mine. Your mileage
+may vary.</p>
+<div class="sect1" lang="en">
+<div class="titlepage"><div><div><h2 class="title" style="clear: both">
+<a name="limits"></a>4.1. Limitations of the compressed file format</h2></div></div></div>
+<p><code class="computeroutput">bzip2-1.0.X</code>,
+<code class="computeroutput">0.9.5</code> and
+<code class="computeroutput">0.9.0</code> use exactly the same file
+format as the original version,
+<code class="computeroutput">bzip2-0.1</code>. This decision was
+made in the interests of stability. Creating yet another
+incompatible compressed file format would create further
+confusion and disruption for users.</p>
+<p>Nevertheless, this is not a painless decision. Development
+work since the release of
+<code class="computeroutput">bzip2-0.1</code> in August 1997 has
+shown complexities in the file format which slow down
+decompression and, in retrospect, are unnecessary. These
+are:</p>
+<div class="itemizedlist"><ul type="bullet">
+<li style="list-style-type: disc"><p>The run-length encoder, which is the first of the
+ compression transformations, is entirely irrelevant. The
+ original purpose was to protect the sorting algorithm from the
+ very worst case input: a string of repeated symbols. But
+ algorithm steps Q6a and Q6b in the original Burrows-Wheeler
+ technical report (SRC-124) show how repeats can be handled
+ without difficulty in block sorting.</p></li>
+<li style="list-style-type: disc">
+<p>The randomisation mechanism doesn't really need to be
+ there. Udi Manber and Gene Myers published a suffix array
+ construction algorithm a few years back, which can be employed
+ to sort any block, no matter how repetitive, in O(N log N)
+ time. Subsequent work by Kunihiko Sadakane has produced a
+ derivative O(N (log N)^2) algorithm which usually outperforms
+ the Manber-Myers algorithm.</p>
+<p>I could have changed to Sadakane's algorithm, but I find
+ it to be slower than <code class="computeroutput">bzip2</code>'s
+ existing algorithm for most inputs, and the randomisation
+ mechanism protects adequately against bad cases. I didn't
+ think it was a good tradeoff to make. Partly this is due to
+ the fact that I was not flooded with email complaints about
+ <code class="computeroutput">bzip2-0.1</code>'s performance on
+ repetitive data, so perhaps it isn't a problem for real
+ inputs.</p>
+<p>Probably the best long-term solution, and the one I have
+ incorporated into 0.9.5 and above, is to use the existing
+ sorting algorithm initially, and fall back to a O(N (log N)^2)
+ algorithm if the standard algorithm gets into
+ difficulties.</p>
+</li>
+<li style="list-style-type: disc"><p>The compressed file format was never designed to be
+ handled by a library, and I have had to jump though some hoops
+ to produce an efficient implementation of decompression. It's
+ a bit hairy. Try passing
+ <code class="computeroutput">decompress.c</code> through the C
+ preprocessor and you'll see what I mean. Much of this
+ complexity could have been avoided if the compressed size of
+ each block of data was recorded in the data stream.</p></li>
+<li style="list-style-type: disc"><p>An Adler-32 checksum, rather than a CRC32 checksum,
+ would be faster to compute.</p></li>
+</ul></div>
+<p>It would be fair to say that the
+<code class="computeroutput">bzip2</code> format was frozen before I
+properly and fully understood the performance consequences of
+doing so.</p>
+<p>Improvements which I was able to incorporate into 0.9.0,
+despite using the same file format, are:</p>
+<div class="itemizedlist"><ul type="bullet">
+<li style="list-style-type: disc"><p>Single array implementation of the inverse BWT. This
+ significantly speeds up decompression, presumably because it
+ reduces the number of cache misses.</p></li>
+<li style="list-style-type: disc"><p>Faster inverse MTF transform for large MTF values.
+ The new implementation is based on the notion of sliding blocks
+ of values.</p></li>
+<li style="list-style-type: disc"><p><code class="computeroutput">bzip2-0.9.0</code> now reads
+ and writes files with <code class="computeroutput">fread</code>
+ and <code class="computeroutput">fwrite</code>; version 0.1 used
+ <code class="computeroutput">putc</code> and
+ <code class="computeroutput">getc</code>. Duh! Well, you live
+ and learn.</p></li>
+</ul></div>
+<p>Further ahead, it would be nice to be able to do random
+access into files. This will require some careful design of
+compressed file formats.</p>
+</div>
+<div class="sect1" lang="en">
+<div class="titlepage"><div><div><h2 class="title" style="clear: both">
+<a name="port-issues"></a>4.2. Portability issues</h2></div></div></div>
+<p>After some consideration, I have decided not to use GNU
+<code class="computeroutput">autoconf</code> to configure 0.9.5 or
+1.0.</p>
+<p><code class="computeroutput">autoconf</code>, admirable and
+wonderful though it is, mainly assists with portability problems
+between Unix-like platforms. But
+<code class="computeroutput">bzip2</code> doesn't have much in the
+way of portability problems on Unix; most of the difficulties
+appear when porting to the Mac, or to Microsoft's operating
+systems. <code class="computeroutput">autoconf</code> doesn't help
+in those cases, and brings in a whole load of new
+complexity.</p>
+<p>Most people should be able to compile the library and
+program under Unix straight out-of-the-box, so to speak,
+especially if you have a version of GNU C available.</p>
+<p>There are a couple of
+<code class="computeroutput">__inline__</code> directives in the
+code. GNU C (<code class="computeroutput">gcc</code>) should be
+able to handle them. If you're not using GNU C, your C compiler
+shouldn't see them at all. If your compiler does, for some
+reason, see them and doesn't like them, just
+<code class="computeroutput">#define</code>
+<code class="computeroutput">__inline__</code> to be
+<code class="computeroutput">/* */</code>. One easy way to do this
+is to compile with the flag
+<code class="computeroutput">-D__inline__=</code>, which should be
+understood by most Unix compilers.</p>
+<p>If you still have difficulties, try compiling with the
+macro <code class="computeroutput">BZ_STRICT_ANSI</code> defined.
+This should enable you to build the library in a strictly ANSI
+compliant environment. Building the program itself like this is
+dangerous and not supported, since you remove
+<code class="computeroutput">bzip2</code>'s checks against
+compressing directories, symbolic links, devices, and other
+not-really-a-file entities. This could cause filesystem
+corruption!</p>
+<p>One other thing: if you create a
+<code class="computeroutput">bzip2</code> binary for public distribution,
+please consider linking it statically (<code class="computeroutput">gcc
+-static</code>). This avoids all sorts of library-version
+issues that others may encounter later on.</p>
+<p>If you build <code class="computeroutput">bzip2</code> on
+Win32, you must set <code class="computeroutput">BZ_UNIX</code> to 0
+and <code class="computeroutput">BZ_LCCWIN32</code> to 1, in the
+file <code class="computeroutput">bzip2.c</code>, before compiling.
+Otherwise the resulting binary won't work correctly.</p>
+</div>
+<div class="sect1" lang="en">
+<div class="titlepage"><div><div><h2 class="title" style="clear: both">
+<a name="bugs"></a>4.3. Reporting bugs</h2></div></div></div>
+<p>I tried pretty hard to make sure
+<code class="computeroutput">bzip2</code> is bug free, both by
+design and by testing. Hopefully you'll never need to read this
+section for real.</p>
+<p>Nevertheless, if <code class="computeroutput">bzip2</code> dies
+with a segmentation fault, a bus error or an internal assertion
+failure, it will ask you to email me a bug report. Experience from
+years of feedback of bzip2 users indicates that almost all these
+problems can be traced to either compiler bugs or hardware
+problems.</p>
+<div class="itemizedlist"><ul type="bullet">
+<li style="list-style-type: disc">
+<p>Recompile the program with no optimisation, and
+ see if it works. And/or try a different compiler. I heard all
+ sorts of stories about various flavours of GNU C (and other
+ compilers) generating bad code for
+ <code class="computeroutput">bzip2</code>, and I've run across two
+ such examples myself.</p>
+<p>2.7.X versions of GNU C are known to generate bad code
+ from time to time, at high optimisation levels. If you get
+ problems, try using the flags
+ <code class="computeroutput">-O2</code>
+ <code class="computeroutput">-fomit-frame-pointer</code>
+ <code class="computeroutput">-fno-strength-reduce</code>. You
+ should specifically <span class="emphasis"><em>not</em></span> use
+ <code class="computeroutput">-funroll-loops</code>.</p>
+<p>You may notice that the Makefile runs six tests as part
+ of the build process. If the program passes all of these, it's
+ a pretty good (but not 100%) indication that the compiler has
+ done its job correctly.</p>
+</li>
+<li style="list-style-type: disc">
+<p>If <code class="computeroutput">bzip2</code>
+ crashes randomly, and the crashes are not repeatable, you may
+ have a flaky memory subsystem.
+ <code class="computeroutput">bzip2</code> really hammers your
+ memory hierarchy, and if it's a bit marginal, you may get these
+ problems. Ditto if your disk or I/O subsystem is slowly
+ failing. Yup, this really does happen.</p>
+<p>Try using a different machine of the same type, and see
+ if you can repeat the problem.</p>
+</li>
+<li style="list-style-type: disc"><p>This isn't really a bug, but ... If
+ <code class="computeroutput">bzip2</code> tells you your file is
+ corrupted on decompression, and you obtained the file via FTP,
+ there is a possibility that you forgot to tell FTP to do a
+ binary mode transfer. That absolutely will cause the file to
+ be non-decompressible. You'll have to transfer it
+ again.</p></li>
+</ul></div>
+<p>If you've incorporated
+<code class="computeroutput">libbzip2</code> into your own program
+and are getting problems, please, please, please, check that the
+parameters you are passing in calls to the library, are correct,
+and in accordance with what the documentation says is allowable.
+I have tried to make the library robust against such problems,
+but I'm sure I haven't succeeded.</p>
+<p>Finally, if the above comments don't help, you'll have to
+send me a bug report. Now, it's just amazing how many people
+will send me a bug report saying something like:</p>
+<pre class="programlisting">bzip2 crashed with segmentation fault on my machine</pre>
+<p>and absolutely nothing else. Needless to say, a such a
+report is <span class="emphasis"><em>totally, utterly, completely and
+comprehensively 100% useless; a waste of your time, my time, and
+net bandwidth</em></span>. With no details at all, there's no way
+I can possibly begin to figure out what the problem is.</p>
+<p>The rules of the game are: facts, facts, facts. Don't omit
+them because "oh, they won't be relevant". At the bare
+minimum:</p>
+<pre class="programlisting">Machine type. Operating system version.
+Exact version of bzip2 (do bzip2 -V).
+Exact version of the compiler used.
+Flags passed to the compiler.</pre>
+<p>However, the most important single thing that will help me
+is the file that you were trying to compress or decompress at the
+time the problem happened. Without that, my ability to do
+anything more than speculate about the cause, is limited.</p>
+</div>
+<div class="sect1" lang="en">
+<div class="titlepage"><div><div><h2 class="title" style="clear: both">
+<a name="package"></a>4.4. Did you get the right package?</h2></div></div></div>
+<p><code class="computeroutput">bzip2</code> is a resource hog.
+It soaks up large amounts of CPU cycles and memory. Also, it
+gives very large latencies. In the worst case, you can feed many
+megabytes of uncompressed data into the library before getting
+any compressed output, so this probably rules out applications
+requiring interactive behaviour.</p>
+<p>These aren't faults of my implementation, I hope, but more
+an intrinsic property of the Burrows-Wheeler transform
+(unfortunately). Maybe this isn't what you want.</p>
+<p>If you want a compressor and/or library which is faster,
+uses less memory but gets pretty good compression, and has
+minimal latency, consider Jean-loup Gailly's and Mark Adler's
+work, <code class="computeroutput">zlib-1.2.1</code> and
+<code class="computeroutput">gzip-1.2.4</code>. Look for them at
+<a href="http://www.zlib.org" target="_top">http://www.zlib.org</a> and
+<a href="http://www.gzip.org" target="_top">http://www.gzip.org</a>
+respectively.</p>
+<p>For something faster and lighter still, you might try Markus F
+X J Oberhumer's <code class="computeroutput">LZO</code> real-time
+compression/decompression library, at
+<a href="http://www.oberhumer.com/opensource" target="_top">http://www.oberhumer.com/opensource</a>.</p>
+</div>
+<div class="sect1" lang="en">
+<div class="titlepage"><div><div><h2 class="title" style="clear: both">
+<a name="reading"></a>4.5. Further Reading</h2></div></div></div>
+<p><code class="computeroutput">bzip2</code> is not research
+work, in the sense that it doesn't present any new ideas.
+Rather, it's an engineering exercise based on existing
+ideas.</p>
+<p>Four documents describe essentially all the ideas behind
+<code class="computeroutput">bzip2</code>:</p>
+<div class="literallayout"><p>Michael Burrows and D. J. Wheeler:<br>
+  "A block-sorting lossless data compression algorithm"<br>
+   10th May 1994. <br>
+   Digital SRC Research Report 124.<br>
+   ftp://ftp.digital.com/pub/DEC/SRC/research-reports/SRC-124.ps.gz<br>
+   If you have trouble finding it, try searching at the<br>
+   New Zealand Digital Library, http://www.nzdl.org.<br>
+<br>
+Daniel S. Hirschberg and Debra A. LeLewer<br>
+  "Efficient Decoding of Prefix Codes"<br>
+   Communications of the ACM, April 1990, Vol 33, Number 4.<br>
+   You might be able to get an electronic copy of this<br>
+   from the ACM Digital Library.<br>
+<br>
+David J. Wheeler<br>
+   Program bred3.c and accompanying document bred3.ps.<br>
+   This contains the idea behind the multi-table Huffman coding scheme.<br>
+   ftp://ftp.cl.cam.ac.uk/users/djw3/<br>
+<br>
+Jon L. Bentley and Robert Sedgewick<br>
+  "Fast Algorithms for Sorting and Searching Strings"<br>
+   Available from Sedgewick's web page,<br>
+   www.cs.princeton.edu/~rs<br>
+</p></div>
+<p>The following paper gives valuable additional insights into
+the algorithm, but is not immediately the basis of any code used
+in bzip2.</p>
+<div class="literallayout"><p>Peter Fenwick:<br>
+   Block Sorting Text Compression<br>
+   Proceedings of the 19th Australasian Computer Science Conference,<br>
+     Melbourne, Australia.  Jan 31 - Feb 2, 1996.<br>
+   ftp://ftp.cs.auckland.ac.nz/pub/peter-f/ACSC96paper.ps</p></div>
+<p>Kunihiko Sadakane's sorting algorithm, mentioned above, is
+available from:</p>
+<div class="literallayout"><p>http://naomi.is.s.u-tokyo.ac.jp/~sada/papers/Sada98b.ps.gz<br>
+</p></div>
+<p>The Manber-Myers suffix array construction algorithm is
+described in a paper available from:</p>
+<div class="literallayout"><p>http://www.cs.arizona.edu/people/gene/PAPERS/suffix.ps<br>
+</p></div>
+<p>Finally, the following papers document some
+investigations I made into the performance of sorting
+and decompression algorithms:</p>
+<div class="literallayout"><p>Julian Seward<br>
+   On the Performance of BWT Sorting Algorithms<br>
+   Proceedings of the IEEE Data Compression Conference 2000<br>
+     Snowbird, Utah.  28-30 March 2000.<br>
+<br>
+Julian Seward<br>
+   Space-time Tradeoffs in the Inverse B-W Transform<br>
+   Proceedings of the IEEE Data Compression Conference 2001<br>
+     Snowbird, Utah.  27-29 March 2001.<br>
+</p></div>
+</div>
+</div>
+</div></body>
+</html>
diff --git a/src/misc/bzlib/manual.pdf b/src/misc/bzlib/manual.pdf
new file mode 100644
index 00000000..f043e164
--- /dev/null
+++ b/src/misc/bzlib/manual.pdf
Binary files differ
diff --git a/src/misc/bzlib/randtable.c b/src/misc/bzlib/randtable.c
new file mode 100644
index 00000000..068b7636
--- /dev/null
+++ b/src/misc/bzlib/randtable.c
@@ -0,0 +1,84 @@
+
+/*-------------------------------------------------------------*/
+/*--- Table for randomising repetitive blocks ---*/
+/*--- randtable.c ---*/
+/*-------------------------------------------------------------*/
+
+/* ------------------------------------------------------------------
+ This file is part of bzip2/libbzip2, a program and library for
+ lossless, block-sorting data compression.
+
+ bzip2/libbzip2 version 1.0.5 of 10 December 2007
+ Copyright (C) 1996-2007 Julian Seward <jseward@bzip.org>
+
+ Please read the WARNING, DISCLAIMER and PATENTS sections in the
+ README file.
+
+ This program is released under the terms of the license contained
+ in the file LICENSE.
+ ------------------------------------------------------------------ */
+
+
+#include "bzlib_private.h"
+
+
+/*---------------------------------------------*/
+Int32 BZ2_rNums[512] = {
+ 619, 720, 127, 481, 931, 816, 813, 233, 566, 247,
+ 985, 724, 205, 454, 863, 491, 741, 242, 949, 214,
+ 733, 859, 335, 708, 621, 574, 73, 654, 730, 472,
+ 419, 436, 278, 496, 867, 210, 399, 680, 480, 51,
+ 878, 465, 811, 169, 869, 675, 611, 697, 867, 561,
+ 862, 687, 507, 283, 482, 129, 807, 591, 733, 623,
+ 150, 238, 59, 379, 684, 877, 625, 169, 643, 105,
+ 170, 607, 520, 932, 727, 476, 693, 425, 174, 647,
+ 73, 122, 335, 530, 442, 853, 695, 249, 445, 515,
+ 909, 545, 703, 919, 874, 474, 882, 500, 594, 612,
+ 641, 801, 220, 162, 819, 984, 589, 513, 495, 799,
+ 161, 604, 958, 533, 221, 400, 386, 867, 600, 782,
+ 382, 596, 414, 171, 516, 375, 682, 485, 911, 276,
+ 98, 553, 163, 354, 666, 933, 424, 341, 533, 870,
+ 227, 730, 475, 186, 263, 647, 537, 686, 600, 224,
+ 469, 68, 770, 919, 190, 373, 294, 822, 808, 206,
+ 184, 943, 795, 384, 383, 461, 404, 758, 839, 887,
+ 715, 67, 618, 276, 204, 918, 873, 777, 604, 560,
+ 951, 160, 578, 722, 79, 804, 96, 409, 713, 940,
+ 652, 934, 970, 447, 318, 353, 859, 672, 112, 785,
+ 645, 863, 803, 350, 139, 93, 354, 99, 820, 908,
+ 609, 772, 154, 274, 580, 184, 79, 626, 630, 742,
+ 653, 282, 762, 623, 680, 81, 927, 626, 789, 125,
+ 411, 521, 938, 300, 821, 78, 343, 175, 128, 250,
+ 170, 774, 972, 275, 999, 639, 495, 78, 352, 126,
+ 857, 956, 358, 619, 580, 124, 737, 594, 701, 612,
+ 669, 112, 134, 694, 363, 992, 809, 743, 168, 974,
+ 944, 375, 748, 52, 600, 747, 642, 182, 862, 81,
+ 344, 805, 988, 739, 511, 655, 814, 334, 249, 515,
+ 897, 955, 664, 981, 649, 113, 974, 459, 893, 228,
+ 433, 837, 553, 268, 926, 240, 102, 654, 459, 51,
+ 686, 754, 806, 760, 493, 403, 415, 394, 687, 700,
+ 946, 670, 656, 610, 738, 392, 760, 799, 887, 653,
+ 978, 321, 576, 617, 626, 502, 894, 679, 243, 440,
+ 680, 879, 194, 572, 640, 724, 926, 56, 204, 700,
+ 707, 151, 457, 449, 797, 195, 791, 558, 945, 679,
+ 297, 59, 87, 824, 713, 663, 412, 693, 342, 606,
+ 134, 108, 571, 364, 631, 212, 174, 643, 304, 329,
+ 343, 97, 430, 751, 497, 314, 983, 374, 822, 928,
+ 140, 206, 73, 263, 980, 736, 876, 478, 430, 305,
+ 170, 514, 364, 692, 829, 82, 855, 953, 676, 246,
+ 369, 970, 294, 750, 807, 827, 150, 790, 288, 923,
+ 804, 378, 215, 828, 592, 281, 565, 555, 710, 82,
+ 896, 831, 547, 261, 524, 462, 293, 465, 502, 56,
+ 661, 821, 976, 991, 658, 869, 905, 758, 745, 193,
+ 768, 550, 608, 933, 378, 286, 215, 979, 792, 961,
+ 61, 688, 793, 644, 986, 403, 106, 366, 905, 644,
+ 372, 567, 466, 434, 645, 210, 389, 550, 919, 135,
+ 780, 773, 635, 389, 707, 100, 626, 958, 165, 504,
+ 920, 176, 193, 713, 857, 265, 203, 50, 668, 108,
+ 645, 990, 626, 197, 510, 357, 358, 850, 858, 364,
+ 936, 638
+};
+
+
+/*-------------------------------------------------------------*/
+/*--- end randtable.c ---*/
+/*-------------------------------------------------------------*/