Commit 4172a0b4 authored by Mathis Rosenhauer's avatar Mathis Rosenhauer

Buffer complete RSI of preprocessed input data for substantial speedup

parent b3e52ccd
......@@ -43,7 +43,7 @@ check: encode decode test_szcomp
./encode -c -b1 -B8 -R128 -J8 ../data/example_data > ../data/test.aee
./decode -b1 -B8 -R128 -J8 ../data/test.aee
diff ../data/test ../data/example_data
./encode -c -b1024 -B32 -R128 -J64 ../data/example_data > ../data/test.aee
./decode -b1024 -B32 -R128 -J64 ../data/test.aee
./encode -c -b 512 -B8 -R128 -J8 ../data/example_data > ../data/test.aee
./decode -b 512 -B8 -R128 -J8 ../data/test.aee
diff ../data/test ../data/example_data
./test_szcomp 65536 ../data/example_data_16
This diff is collapsed.
......@@ -13,21 +13,19 @@ typedef struct internal_state {
int64_t (*get_sample)(ae_streamp);
int id_len; /* bit length of code option identification key */
int64_t last_in; /* previous input for preprocessing */
int64_t xmin; /* minimum integer for preprocessing */
int64_t xmax; /* maximum integer for preprocessing */
int i; /* counter for samples */
int64_t *in_block; /* input block buffer */
int in_blklen; /* input block length in byte */
int64_t in_total_blocks;/* total blocks in */
uint8_t *out_block; /* output block buffer */
int out_blklen; /* output block length in byte */
uint8_t *out_bp; /* pointer to current output */
int out_direct; /* output to strm->next_out (1)
or out_block (0) */
int bitp; /* bit pointer to the next unused bit in accumulator */
int block_deferred; /* there is a block in the input buffer
but we first have to emit a zero block */
int i; /* counter */
int64_t *block_buf; /* RSI blocks of input */
int blocks_avail; /* remaining blocks in buffer */
int64_t *block_p; /* pointer to current block */
int block_len; /* input block length in byte */
uint8_t *cds_buf; /* Buffer for one Coded Data Set */
int cds_len; /* max cds length in byte */
uint8_t *cds_p; /* pointer to current output */
int direct_out; /* output to strm->next_out (1)
or cds_buf (0) */
int bit_p; /* bit pointer to the next unused bit in accumulator */
int ref; /* length of reference sample in current block
i.e. 0 or 1 depending on whether the block has
a reference sample or not */
......
......@@ -68,81 +68,89 @@ int64_t get_8(ae_streamp strm)
void get_block_msb_16_bs_8(ae_streamp strm)
{
int64_t *block = strm->state->in_block;
block[0] = ((int64_t)strm->next_in[0] << 8) | (int64_t)strm->next_in[1];
block[1] = ((int64_t)strm->next_in[2] << 8) | (int64_t)strm->next_in[3];
block[2] = ((int64_t)strm->next_in[4] << 8) | (int64_t)strm->next_in[5];
block[3] = ((int64_t)strm->next_in[6] << 8) | (int64_t)strm->next_in[7];
block[4] = ((int64_t)strm->next_in[8] << 8) | (int64_t)strm->next_in[9];
block[5] = ((int64_t)strm->next_in[10] << 8) | (int64_t)strm->next_in[11];
block[6] = ((int64_t)strm->next_in[12] << 8) | (int64_t)strm->next_in[13];
block[7] = ((int64_t)strm->next_in[14] << 8) | (int64_t)strm->next_in[15];
strm->next_in += 16;
strm->total_in += 16;
strm->avail_in -= 16;
int i;
int64_t *block = strm->state->block_buf;
for (i = 0; i < 8 * strm->rsi; i += 8)
{
block[i + 0] = ((int64_t)strm->next_in[0] << 8) | (int64_t)strm->next_in[1];
block[i + 1] = ((int64_t)strm->next_in[2] << 8) | (int64_t)strm->next_in[3];
block[i + 2] = ((int64_t)strm->next_in[4] << 8) | (int64_t)strm->next_in[5];
block[i + 3] = ((int64_t)strm->next_in[6] << 8) | (int64_t)strm->next_in[7];
block[i + 4] = ((int64_t)strm->next_in[8] << 8) | (int64_t)strm->next_in[9];
block[i + 5] = ((int64_t)strm->next_in[10] << 8) | (int64_t)strm->next_in[11];
block[i + 6] = ((int64_t)strm->next_in[12] << 8) | (int64_t)strm->next_in[13];
block[i + 7] = ((int64_t)strm->next_in[14] << 8) | (int64_t)strm->next_in[15];
strm->next_in += 16;
}
strm->total_in += 16 * strm->rsi;
strm->avail_in -= 16 * strm->rsi;
}
void get_block_msb_16(ae_streamp strm)
{
int i;
int64_t *block = strm->state->in_block;
int64_t *block = strm->state->block_buf;
for (i = 0; i < strm->block_size; i++)
for (i = 0; i < strm->block_size * strm->rsi; i++)
{
block[i] = ((int64_t)strm->next_in[2 * i] << 8)
| (int64_t)strm->next_in[2 * i + 1];
}
strm->next_in += 2 * strm->block_size;
strm->total_in += 2 * strm->block_size;
strm->avail_in -= 2 * strm->block_size;
strm->next_in += 2 * strm->block_size * strm->rsi;
strm->total_in += 2 * strm->block_size * strm->rsi;
strm->avail_in -= 2 * strm->block_size * strm->rsi;
}
void get_block_msb_32(ae_streamp strm)
{
int i;
int64_t *block = strm->state->in_block;
int64_t *block = strm->state->block_buf;
for (i = 0; i < strm->block_size; i++)
for (i = 0; i < strm->block_size * strm->rsi; i++)
{
block[i] = ((int64_t)strm->next_in[4 * i] << 24)
| ((int64_t)strm->next_in[4 * i + 1] << 16)
| ((int64_t)strm->next_in[4 * i + 2] << 8)
| (int64_t)strm->next_in[4 * i + 3];
}
strm->next_in += 4 * strm->block_size;
strm->total_in += 4 * strm->block_size;
strm->avail_in -= 4 * strm->block_size;
strm->next_in += 4 * strm->block_size * strm->rsi;
strm->total_in += 4 * strm->block_size * strm->rsi;
strm->avail_in -= 4 * strm->block_size * strm->rsi;
}
void get_block_8_bs_8(ae_streamp strm)
{
int64_t *block = strm->state->in_block;
block[0] = strm->next_in[0];
block[1] = strm->next_in[1];
block[2] = strm->next_in[2];
block[3] = strm->next_in[3];
block[4] = strm->next_in[4];
block[5] = strm->next_in[5];
block[6] = strm->next_in[6];
block[7] = strm->next_in[7];
strm->next_in += 8;
strm->total_in += 8;
strm->avail_in -= 8;
int i;
int64_t *block = strm->state->block_buf;
for (i = 0; i < 8 * strm->rsi; i += 8)
{
block[i + 0] = strm->next_in[0];
block[i + 1] = strm->next_in[1];
block[i + 2] = strm->next_in[2];
block[i + 3] = strm->next_in[3];
block[i + 4] = strm->next_in[4];
block[i + 5] = strm->next_in[5];
block[i + 6] = strm->next_in[6];
block[i + 7] = strm->next_in[7];
strm->next_in += 8;
strm->total_in += 8;
strm->avail_in -= 8;
}
}
void get_block_8(ae_streamp strm)
{
int i;
int64_t *block = strm->state->in_block;
int64_t *block = strm->state->block_buf;
for (i = 0; i < strm->block_size; i++)
for (i = 0; i < strm->block_size * strm->rsi; i++)
block[i] = strm->next_in[i];
strm->next_in += strm->block_size;
strm->total_in += strm->block_size;
strm->avail_in -= strm->block_size;
strm->next_in += strm->block_size * strm->rsi;
strm->total_in += strm->block_size * strm->rsi;
strm->avail_in -= strm->block_size * strm->rsi;
}
......@@ -35,10 +35,6 @@ typedef ae_stream *ae_streamp;
#define AE_DATA_LSB 8
#define AE_DATA_MSB 16
#define AE_DATA_PREPROCESS 32 /* Set if preprocessor should be used */
#define AE_DATA_SZ_COMPAT 256 /* Set this if you want szip to decode
* our output. Increases output
* slightly.
*/
/* Return codes of library functions */
#define AE_OK 0
......
......@@ -10,7 +10,7 @@ int SZ_BufftoBuffCompress(void *dest, size_t *destLen, const void *source, size_
strm.bit_per_sample = param->bits_per_pixel;
strm.block_size = param->pixels_per_block;
strm.rsi = param->pixels_per_scanline / param->pixels_per_block;
strm.flags = param->options_mask | AE_DATA_SZ_COMPAT;
strm.flags = param->options_mask;
strm.avail_in = sourceLen;
strm.avail_out = *destLen;
strm.next_out = dest;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment