diff --git a/src/Makefile b/src/Makefile index 820b94397665d84bfd3307289bfba771004964cd..9e66e12b09eae14deb95c6d1593bf7c03f85a606 100644 --- a/src/Makefile +++ b/src/Makefile @@ -1,7 +1,7 @@ CC = gcc #CFLAGS = -g -pg -lc -O2 -Wall -fprofile-arcs -ftest-coverage -DPROFILE #CFLAGS = -g -pg -lc -O2 -Wall -fprofile-arcs -ftest-coverage -DUNROLL_BLOCK_8 -CFLAGS = -g -O0 -Wall -DUNROLL_BLOCK_8 +CFLAGS = -g -O3 -Wall OBJS = aee.o aed.o sz_compat.o @@ -46,5 +46,6 @@ check: test_encode test_decode test_szcomp ./test_decode 101 101 < ../data/test.ae > ../data/test diff ../data/test ../data/example_data ./test_szcomp 65536 < ../data/example_data_16 > ../data/test - diff ../data/test ../data/example_data_16 - + diff ../data/test ../data/example_data_16 + ./test_szcomp 2097257 < ../data/zero_test > ../data/test + diff ../data/test ../data/zero_test diff --git a/src/aed.c b/src/aed.c index acef5ad4445bc0d187f8ce3d9dd64497dfc4b249..9d4214d929c192095194ef0d0336d54b4ab1f6ff 100644 --- a/src/aed.c +++ b/src/aed.c @@ -37,6 +37,7 @@ typedef struct internal_state { int fs; /* last fundamental sequence in accumulator */ int ref; /* 1 if current block has reference sample */ int pp; /* 1 if postprocessor has to be used */ + int byte_per_sample; size_t samples_out; } decode_state; @@ -263,6 +264,7 @@ int ae_decode_init(ae_streamp strm) if (strm->bit_per_sample > 16) { + state->byte_per_sample = 4; state->id_len = 5; state->out_blklen = strm->block_size * 4; if (strm->flags & AE_DATA_MSB) @@ -272,6 +274,7 @@ int ae_decode_init(ae_streamp strm) } else if (strm->bit_per_sample > 8) { + state->byte_per_sample = 2; state->id_len = 4; state->out_blklen = strm->block_size * 2; if (strm->flags & AE_DATA_MSB) @@ -281,6 +284,7 @@ int ae_decode_init(ae_streamp strm) } else { + state->byte_per_sample = 1; state->id_len = 3; state->out_blklen = strm->block_size; state->put_sample = put_8; @@ -498,6 +502,10 @@ int ae_decode(ae_streamp strm, int flush) (state->samples_out / strm->block_size) % strm->segment_size); } + else if (zero_blocks > ROS) + { + zero_blocks--; + } if (state->ref) @@ -505,7 +513,7 @@ int ae_decode(ae_streamp strm, int flush) else state->i = zero_blocks * strm->block_size; - if (strm->avail_out >= state->i) + if (strm->avail_out >= state->i * state->byte_per_sample) { fast_zero(strm); state->mode = M_ID; diff --git a/src/aee.c b/src/aee.c index 9943d9ed9fb37725f9822f3897e034fd7b431a32..aa718e7652660071ecc78ad0fcbf17fec674e396 100644 --- a/src/aee.c +++ b/src/aee.c @@ -9,9 +9,10 @@ #include "libae.h" -#define ROS 5 +#define ROS -1 #define MIN(a, b) (((a) < (b))? (a): (b)) +#define MAX(a, b) (((a) > (b))? (a): (b)) enum { @@ -38,7 +39,7 @@ typedef struct internal_state { int64_t *block_in; /* input block buffer */ uint8_t *block_out; /* output block buffer */ uint8_t *bp_out; /* pointer to current output */ - size_t total_blocks; + int64_t total_blocks; int bitp; /* bit pointer to the next unused bit in accumulator */ int block_deferred; /* there is a block in the input buffer but we first have to emit a zero block */ @@ -181,9 +182,11 @@ int ae_encode_init(ae_streamp strm) return AE_MEM_ERROR; } - blklen = (strm->block_size * strm->bit_per_sample - + state->id_len) / 8 + 16; - + /* Zero blocks can span a segment and thus need up to segment_size + bits in encoded block */ + blklen = MAX(strm->block_size * strm->bit_per_sample, + strm->segment_size + 10); + blklen = (blklen + state->id_len) / 8 + 3; state->block_out = (uint8_t *)malloc(blklen); if (state->block_out == NULL) { @@ -414,11 +417,11 @@ int ae_encode(ae_streamp strm, int flush) if (state->total_blocks % strm->segment_size == 0) { - if (state->zero_blocks > ROS) - state->zero_blocks = ROS; #ifdef PROFILE state->prof[0] += state->zero_blocks; #endif + if (state->zero_blocks > 4) + state->zero_blocks = ROS; state->mode = M_ENCODE_ZERO; break; } @@ -476,6 +479,7 @@ int ae_encode(ae_streamp strm, int flush) split_len_min = split_len; k = j; +#if 0 if (fs_len < this_bs) { /* Next can't get better because what we lose @@ -486,6 +490,9 @@ int ae_encode(ae_streamp strm, int flush) } else break; +#else + } +#endif } /* Count bits for 2nd extension */ @@ -596,7 +603,14 @@ int ae_encode(ae_streamp strm, int flush) { emit(state, state->zero_ref_sample, strm->bit_per_sample); } - emitfs(state, state->zero_blocks - 1); + if (state->zero_blocks == ROS) + { + emitfs(state, 4); + } + else if (state->zero_blocks >= 5) + emitfs(state, state->zero_blocks); + else + emitfs(state, state->zero_blocks - 1); state->zero_blocks = 0; state->mode = M_FLUSH_BLOCK; break; diff --git a/src/sz_compat.c b/src/sz_compat.c index a8b594cae2187425c6c0ab5d4be92bab9ce554a7..740982835f035d09ccb4b4d09fe3e33be1cd0021 100644 --- a/src/sz_compat.c +++ b/src/sz_compat.c @@ -8,7 +8,8 @@ int SZ_BufftoBuffCompress(void *dest, size_t *destLen, const void *source, size_ strm.bit_per_sample = param->bits_per_pixel; strm.block_size = param->pixels_per_block; - strm.segment_size = param->pixels_per_scanline / param->pixels_per_block; +// strm.segment_size = param->pixels_per_scanline / param->pixels_per_block; + strm.segment_size = 8; strm.flags = param->options_mask; strm.avail_in = sourceLen; strm.avail_out = *destLen; @@ -36,7 +37,8 @@ int SZ_BufftoBuffDecompress(void *dest, size_t *destLen, const void *source, siz strm.bit_per_sample = param->bits_per_pixel; strm.block_size = param->pixels_per_block; - strm.segment_size = param->pixels_per_scanline / param->pixels_per_block; +// strm.segment_size = param->pixels_per_scanline / param->pixels_per_block; + strm.segment_size = 8; strm.flags = param->options_mask; strm.avail_in = sourceLen; strm.avail_out = *destLen;