SHOGUN
v1.1.0
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 2009 Soeren Sonnenburg 00008 * Copyright (C) 2009 Berlin Institute of Technology 00009 */ 00010 #include <shogun/lib/Compressor.h> 00011 #include <shogun/mathematics/Math.h> 00012 #include <string.h> 00013 00014 #ifdef USE_LZO 00015 #include <lzo/lzoconf.h> 00016 #include <lzo/lzoutil.h> 00017 #include <lzo/lzo1x.h> 00018 #endif 00019 00020 #ifdef USE_GZIP 00021 #include <zlib.h> 00022 #endif 00023 00024 #ifdef USE_BZIP2 00025 #include <bzlib.h> 00026 #endif 00027 00028 #ifdef USE_LZMA 00029 #include <lzma.h> 00030 #endif 00031 00032 #ifdef USE_SNAPPY 00033 #include <snappy.h> 00034 #endif 00035 00036 using namespace shogun; 00037 00038 CCompressor::CCompressor() 00039 :CSGObject(), compression_type(UNCOMPRESSED) 00040 { 00041 SG_UNSTABLE("CCompressor::CCompressor()", "\n"); 00042 } 00043 00044 void CCompressor::compress(uint8_t* uncompressed, uint64_t uncompressed_size, 00045 uint8_t* &compressed, uint64_t &compressed_size, int32_t level) 00046 { 00047 uint64_t initial_buffer_size=0; 00048 00049 if (uncompressed_size==0) 00050 { 00051 compressed=NULL; 00052 compressed_size=0; 00053 return; 00054 } 00055 00056 switch (compression_type) 00057 { 00058 case UNCOMPRESSED: 00059 { 00060 initial_buffer_size=uncompressed_size; 00061 compressed_size=uncompressed_size; 00062 compressed=SG_MALLOC(uint8_t, compressed_size); 00063 memcpy(compressed, uncompressed, uncompressed_size); 00064 break; 00065 } 00066 #ifdef USE_LZO 00067 case LZO: 00068 { 00069 if (lzo_init() != LZO_E_OK) 00070 SG_ERROR("Error initializing LZO Compression\n"); 00071 00072 lzo_bytep lzo_wrkmem = (lzo_bytep) lzo_malloc(LZO1X_999_MEM_COMPRESS); 00073 if (!lzo_wrkmem) 00074 SG_ERROR("Error allocating LZO workmem\n"); 00075 00076 initial_buffer_size=uncompressed_size + 00077 uncompressed_size / 16+ 64 + 3; 00078 00079 compressed_size=initial_buffer_size; 00080 compressed=SG_MALLOC(uint8_t, initial_buffer_size); 00081 00082 lzo_uint lzo_size=compressed_size; 00083 00084 int ret; 00085 if (level<9) 00086 { 00087 ret=lzo1x_1_15_compress(uncompressed, uncompressed_size, 00088 compressed, &lzo_size, lzo_wrkmem); 00089 } 00090 else 00091 { 00092 ret=lzo1x_999_compress(uncompressed, uncompressed_size, 00093 compressed, &lzo_size, lzo_wrkmem); 00094 } 00095 00096 compressed_size=lzo_size; 00097 lzo_free(lzo_wrkmem); 00098 00099 if (ret!= LZO_E_OK) 00100 SG_ERROR("Error lzo-compressing data\n"); 00101 00102 break; 00103 } 00104 #endif 00105 #ifdef USE_GZIP 00106 case GZIP: 00107 { 00108 initial_buffer_size=1.001*uncompressed_size + 12; 00109 compressed_size=initial_buffer_size; 00110 compressed=SG_MALLOC(uint8_t, initial_buffer_size); 00111 uLongf gz_size=compressed_size; 00112 00113 if (compress2(compressed, &gz_size, uncompressed, 00114 uncompressed_size, level) != Z_OK) 00115 { 00116 SG_ERROR("Error gzip-compressing data\n"); 00117 } 00118 compressed_size=gz_size; 00119 break; 00120 } 00121 #endif 00122 #ifdef USE_BZIP2 00123 case BZIP2: 00124 { 00125 bz_stream strm; 00126 strm.bzalloc=NULL; 00127 strm.bzfree=NULL; 00128 strm.opaque=NULL; 00129 initial_buffer_size=1.01*uncompressed_size + 600; 00130 compressed_size=initial_buffer_size; 00131 compressed=SG_MALLOC(uint8_t, initial_buffer_size); 00132 if (BZ2_bzCompressInit(&strm, level, 0, 0)!=BZ_OK) 00133 SG_ERROR("Error initializing bzip2 compressor\n"); 00134 00135 strm.next_in=(char*) uncompressed; 00136 strm.avail_in=(unsigned int) uncompressed_size; 00137 strm.next_out=(char*) compressed; 00138 strm.avail_out=(unsigned int) compressed_size; 00139 if (BZ2_bzCompress(&strm, BZ_RUN) != BZ_RUN_OK) 00140 SG_ERROR("Error bzip2-compressing data (BZ_RUN)\n"); 00141 00142 int ret=0; 00143 while (true) 00144 { 00145 ret=BZ2_bzCompress(&strm, BZ_FINISH); 00146 if (ret==BZ_FINISH_OK) 00147 continue; 00148 if (ret==BZ_STREAM_END) 00149 break; 00150 else 00151 SG_ERROR("Error bzip2-compressing data (BZ_FINISH)\n"); 00152 } 00153 BZ2_bzCompressEnd(&strm); 00154 compressed_size=(((uint64_t) strm.total_out_hi32) << 32) + strm.total_out_lo32; 00155 break; 00156 } 00157 #endif 00158 #ifdef USE_LZMA 00159 case LZMA: 00160 { 00161 lzma_stream strm = LZMA_STREAM_INIT; 00162 initial_buffer_size = lzma_stream_buffer_bound(uncompressed_size); 00163 compressed_size=initial_buffer_size; 00164 compressed=SG_MALLOC(uint8_t, initial_buffer_size); 00165 strm.next_in=uncompressed; 00166 strm.avail_in=(size_t) uncompressed_size; 00167 strm.next_out=compressed; 00168 strm.avail_out=(size_t) compressed_size; 00169 00170 if (lzma_easy_encoder(&strm, level, LZMA_CHECK_CRC32) != LZMA_OK) 00171 SG_ERROR("Error initializing lzma compressor\n"); 00172 if (lzma_code(&strm, LZMA_RUN) != LZMA_OK) 00173 SG_ERROR("Error lzma-compressing data (LZMA_RUN)\n"); 00174 00175 lzma_ret ret; 00176 while (true) 00177 { 00178 ret=lzma_code(&strm, LZMA_FINISH); 00179 if (ret==LZMA_OK) 00180 continue; 00181 if (ret==LZMA_STREAM_END) 00182 break; 00183 else 00184 SG_ERROR("Error lzma-compressing data (LZMA_FINISH)\n"); 00185 } 00186 lzma_end(&strm); 00187 compressed_size=strm.total_out; 00188 break; 00189 } 00190 #endif 00191 #ifdef USE_SNAPPY 00192 case SNAPPY: 00193 { 00194 compressed=SG_MALLOC(uint8_t, snappy::MaxCompressedLength((size_t) uncompressed_size)); 00195 size_t output_length; 00196 snappy::RawCompress((char*) uncompressed, size_t(uncompressed_size), (char*) compressed, &output_length); 00197 compressed_size=(uint64_t) output_length; 00198 break; 00199 } 00200 #endif 00201 default: 00202 SG_ERROR("Unknown compression type\n"); 00203 } 00204 00205 if (compressed) 00206 CMath::resize(compressed, initial_buffer_size, compressed_size); 00207 } 00208 00209 void CCompressor::decompress(uint8_t* compressed, uint64_t compressed_size, 00210 uint8_t* uncompressed, uint64_t& uncompressed_size) 00211 { 00212 if (compressed_size==0) 00213 { 00214 uncompressed_size=0; 00215 return; 00216 } 00217 00218 switch (compression_type) 00219 { 00220 case UNCOMPRESSED: 00221 { 00222 ASSERT(uncompressed_size>=compressed_size); 00223 uncompressed_size=compressed_size; 00224 memcpy(uncompressed, compressed, uncompressed_size); 00225 break; 00226 } 00227 #ifdef USE_LZO 00228 case LZO: 00229 { 00230 if (lzo_init() != LZO_E_OK) 00231 SG_ERROR("Error initializing LZO Compression\n"); 00232 00233 lzo_bytep lzo_wrkmem = (lzo_bytep) lzo_malloc(LZO1X_999_MEM_COMPRESS); 00234 if (!lzo_wrkmem) 00235 SG_ERROR("Error allocating LZO workmem\n"); 00236 00237 lzo_uint lzo_size=uncompressed_size; 00238 if (lzo1x_decompress(compressed, compressed_size, uncompressed, 00239 &lzo_size, NULL) != LZO_E_OK) 00240 { 00241 SG_ERROR("Error uncompressing lzo-data\n"); 00242 } 00243 uncompressed_size=lzo_size; 00244 00245 lzo_free(lzo_wrkmem); 00246 break; 00247 } 00248 #endif 00249 #ifdef USE_GZIP 00250 case GZIP: 00251 { 00252 uLongf gz_size=uncompressed_size; 00253 if (uncompress(uncompressed, &gz_size, compressed, 00254 compressed_size) != Z_OK) 00255 { 00256 SG_ERROR("Error uncompressing gzip-data\n"); 00257 } 00258 uncompressed_size=gz_size; 00259 break; 00260 } 00261 #endif 00262 #ifdef USE_BZIP2 00263 case BZIP2: 00264 { 00265 bz_stream strm; 00266 strm.bzalloc=NULL; 00267 strm.bzfree=NULL; 00268 strm.opaque=NULL; 00269 if (BZ2_bzDecompressInit(&strm, 0, 0)!=BZ_OK) 00270 SG_ERROR("Error initializing bzip2 decompressor\n"); 00271 strm.next_in=(char*) compressed; 00272 strm.avail_in=(unsigned int) compressed_size; 00273 strm.next_out=(char*) uncompressed; 00274 strm.avail_out=(unsigned int) uncompressed_size; 00275 if (BZ2_bzDecompress(&strm) != BZ_STREAM_END || strm.avail_in!=0) 00276 SG_ERROR("Error uncompressing bzip2-data\n"); 00277 BZ2_bzDecompressEnd(&strm); 00278 break; 00279 } 00280 #endif 00281 #ifdef USE_LZMA 00282 case LZMA: 00283 { 00284 lzma_stream strm = LZMA_STREAM_INIT; 00285 strm.next_in=compressed; 00286 strm.avail_in=(size_t) compressed_size; 00287 strm.next_out=uncompressed; 00288 strm.avail_out=(size_t) uncompressed_size; 00289 00290 uint64_t memory_limit=lzma_easy_decoder_memusage(9); 00291 00292 if (lzma_stream_decoder(&strm, memory_limit, 0)!= LZMA_OK) 00293 SG_ERROR("Error initializing lzma decompressor\n"); 00294 if (lzma_code(&strm, LZMA_RUN) != LZMA_STREAM_END) 00295 SG_ERROR("Error decompressing lzma data\n"); 00296 lzma_end(&strm); 00297 break; 00298 } 00299 #endif 00300 #ifdef USE_SNAPPY 00301 case SNAPPY: 00302 { 00303 size_t uncompressed_length; 00304 if (!snappy::GetUncompressedLength( (char*) compressed, 00305 (size_t) compressed_size, &uncompressed_length)) 00306 SG_ERROR("Error obtaining uncompressed length\n"); 00307 00308 ASSERT(uncompressed_length<=uncompressed_size); 00309 uncompressed_size=uncompressed_length; 00310 if (!snappy::RawUncompress((char*) compressed, 00311 (size_t) compressed_size, 00312 (char*) uncompressed)) 00313 SG_ERROR("Error uncompressing snappy data\n"); 00314 00315 break; 00316 } 00317 #endif 00318 default: 00319 SG_ERROR("Unknown compression type\n"); 00320 } 00321 }