Commit 082c9c46 authored by Mathis Rosenhauer's avatar Mathis Rosenhauer

Reduce sample buffer to 32bit for 10% speed increase

parent 0479cfe4
......@@ -40,10 +40,10 @@ clean:
*.gcno *.gcda *.gcov gmon.out
check: encode decode test_szcomp
./encode -c -b1 -B8 -R128 -J8 ../data/example_data > ../data/test.aee
./decode -b1 -B8 -R128 -J8 ../data/test.aee
diff ../data/test ../data/example_data
./encode -c -b4 -B32 -R128 -J8 ../data/example_data > ../data/test.aee
./decode -b4 -B32 -R128 -J8 ../data/test.aee
cmp ../data/test ../data/example_data
./encode -c -b 512 -B8 -R128 -J8 ../data/example_data > ../data/test.aee
./decode -b 512 -B8 -R128 -J8 ../data/test.aee
diff ../data/test ../data/example_data
cmp ../data/test ../data/example_data
./test_szcomp 65536 ../data/example_data_16
......@@ -40,7 +40,7 @@ static int m_encode_zero(ae_streamp strm);
*
*/
static inline void emit(encode_state *state, uint64_t data, int bits)
static inline void emit(encode_state *state, uint32_t data, int bits)
{
if (bits <= state->bit_p)
{
......@@ -50,7 +50,7 @@ static inline void emit(encode_state *state, uint64_t data, int bits)
else
{
bits -= state->bit_p;
*state->cds_p++ += data >> bits;
*state->cds_p++ += (uint64_t)data >> bits;
while (bits & ~7)
{
......@@ -93,9 +93,9 @@ static inline void emitblock(ae_streamp strm, int k, int skip)
int i;
uint64_t acc;
encode_state *state = strm->state;
int64_t *in = state->block_p + skip;
int64_t *in_end = state->block_p + strm->block_size;
int64_t mask = (1ULL << k) - 1;
uint32_t *in = state->block_p + skip;
uint32_t *in_end = state->block_p + strm->block_size;
uint64_t mask = (1ULL << k) - 1;
uint8_t *out = state->cds_p;
acc = *out;
......@@ -108,7 +108,7 @@ static inline void emitblock(ae_streamp strm, int k, int skip)
while (state->bit_p > k && in < in_end)
{
state->bit_p -= k;
acc += (*in++ & mask) << state->bit_p;
acc += ((uint64_t)(*in++) & mask) << state->bit_p;
}
for (i = 56; i > (state->bit_p & ~7); i -= 8)
......@@ -136,7 +136,7 @@ static inline void preprocess(ae_streamp strm)
{
theta = MIN(last_in - state->xmin,
state->xmax - last_in);
Delta = state->block_buf[i] - last_in;
Delta = (int64_t)state->block_buf[i] - last_in;
last_in = state->block_buf[i];
if (0 <= Delta && Delta <= theta)
{
......@@ -323,8 +323,9 @@ static inline int m_check_zero_block(ae_streamp strm)
static inline int m_select_code_option(ae_streamp strm)
{
int i, j, k, this_bs, looked_bothways, direction;
int64_t d, split_len, uncomp_len;
int64_t split_len_min, se_len, fs_len;
int64_t split_len, uncomp_len;
int64_t split_len_min, fs_len;
int64_t d, se_len;
encode_state *state = strm->state;
/* Length of this block minus reference sample (if present) */
......@@ -340,20 +341,20 @@ static inline int m_select_code_option(ae_streamp strm)
*/
for (;;)
{
fs_len = (state->block_p[1] >> i)
+ (state->block_p[2] >> i)
+ (state->block_p[3] >> i)
+ (state->block_p[4] >> i)
+ (state->block_p[5] >> i)
+ (state->block_p[6] >> i)
+ (state->block_p[7] >> i);
fs_len = (int64_t)(state->block_p[1] >> i)
+ (int64_t)(state->block_p[2] >> i)
+ (int64_t)(state->block_p[3] >> i)
+ (int64_t)(state->block_p[4] >> i)
+ (int64_t)(state->block_p[5] >> i)
+ (int64_t)(state->block_p[6] >> i)
+ (int64_t)(state->block_p[7] >> i);
if (state->ref == 0)
fs_len += (state->block_p[0] >> i);
fs_len += (int64_t)(state->block_p[0] >> i);
if (strm->block_size > 8)
for (j = 8; j < strm->block_size; j++)
fs_len += state->block_p[j] >> i;
fs_len += (int64_t)(state->block_p[j] >> i);
split_len = fs_len + this_bs * (i + 1);
......@@ -441,16 +442,16 @@ static inline int m_select_code_option(ae_streamp strm)
se_len = 1;
for (i = 0; i < strm->block_size; i+= 2)
{
d = state->block_p[i] + state->block_p[i + 1];
d = (int64_t)state->block_p[i] + (int64_t)state->block_p[i + 1];
/* we have to worry about overflow here */
if (d > split_len_min)
{
se_len = d;
se_len = INT64_MAX;
break;
}
else
{
se_len += d * (d + 1) / 2 + state->block_p[i + 1];
se_len += d * (d + 1) / 2 + (int64_t)state->block_p[i + 1];
}
}
......@@ -516,7 +517,7 @@ static inline int m_encode_uncomp(ae_streamp strm)
static inline int m_encode_se(ae_streamp strm)
{
int i;
int64_t d;
uint32_t d;
encode_state *state = strm->state;
emit(state, 1, state->id_len + 1);
......@@ -680,7 +681,9 @@ int ae_encode_init(ae_streamp strm)
state->xmax = (1ULL << strm->bit_per_sample) - 1;
}
state->block_buf = (int64_t *)malloc(strm->rsi * strm->block_size * sizeof(int64_t));
state->block_buf = (uint32_t *)malloc(strm->rsi
* strm->block_size
* sizeof(uint32_t));
if (state->block_buf == NULL)
{
return AE_MEM_ERROR;
......
......@@ -10,15 +10,15 @@
typedef struct internal_state {
int (*mode)(ae_streamp);
void (*get_block)(ae_streamp);
int64_t (*get_sample)(ae_streamp);
uint32_t (*get_sample)(ae_streamp);
int id_len; /* bit length of code option identification key */
int64_t xmin; /* minimum integer for preprocessing */
int64_t xmax; /* maximum integer for preprocessing */
int i; /* counter */
int64_t *block_buf; /* RSI blocks of input */
uint32_t *block_buf; /* RSI blocks of input */
int blocks_avail; /* remaining blocks in buffer */
int64_t *block_p; /* pointer to current block */
uint32_t *block_p; /* pointer to current block */
int block_len; /* input block length in byte */
uint8_t *cds_buf; /* Buffer for one Coded Data Set */
int cds_len; /* max cds length in byte */
......
......@@ -3,14 +3,14 @@
#include "aee.h"
#include "aee_mutators.h"
int64_t get_lsb_32(ae_streamp strm)
uint32_t get_lsb_32(ae_streamp strm)
{
int64_t data;
uint32_t data;
data = ((int64_t)strm->next_in[3] << 24)
| ((int64_t)strm->next_in[2] << 16)
| ((int64_t)strm->next_in[1] << 8)
| (int64_t)strm->next_in[0];
data = ((uint32_t)strm->next_in[3] << 24)
| ((uint32_t)strm->next_in[2] << 16)
| ((uint32_t)strm->next_in[1] << 8)
| (uint32_t)strm->next_in[0];
strm->next_in += 4;
strm->total_in += 4;
......@@ -18,12 +18,12 @@ int64_t get_lsb_32(ae_streamp strm)
return data;
}
int64_t get_lsb_16(ae_streamp strm)
uint32_t get_lsb_16(ae_streamp strm)
{
int64_t data;
uint32_t data;
data = ((int64_t)strm->next_in[1] << 8)
| (int64_t)strm->next_in[0];
data = ((uint32_t)strm->next_in[1] << 8)
| (uint32_t)strm->next_in[0];
strm->next_in += 2;
strm->total_in += 2;
......@@ -31,14 +31,14 @@ int64_t get_lsb_16(ae_streamp strm)
return data;
}
int64_t get_msb_32(ae_streamp strm)
uint32_t get_msb_32(ae_streamp strm)
{
int64_t data;
uint32_t data;
data = ((int64_t)strm->next_in[0] << 24)
| ((int64_t)strm->next_in[1] << 16)
| ((int64_t)strm->next_in[2] << 8)
| (int64_t)strm->next_in[3];
data = ((uint32_t)strm->next_in[0] << 24)
| ((uint32_t)strm->next_in[1] << 16)
| ((uint32_t)strm->next_in[2] << 8)
| (uint32_t)strm->next_in[3];
strm->next_in += 4;
strm->total_in += 4;
......@@ -46,12 +46,12 @@ int64_t get_msb_32(ae_streamp strm)
return data;
}
int64_t get_msb_16(ae_streamp strm)
uint32_t get_msb_16(ae_streamp strm)
{
int64_t data;
uint32_t data;
data = ((int64_t)strm->next_in[0] << 8)
| (int64_t)strm->next_in[1];
data = ((uint32_t)strm->next_in[0] << 8)
| (uint32_t)strm->next_in[1];
strm->next_in += 2;
strm->total_in += 2;
......@@ -59,7 +59,7 @@ int64_t get_msb_16(ae_streamp strm)
return data;
}
int64_t get_8(ae_streamp strm)
uint32_t get_8(ae_streamp strm)
{
strm->avail_in--;
strm->total_in++;
......@@ -69,18 +69,18 @@ int64_t get_8(ae_streamp strm)
void get_block_msb_16_bs_8(ae_streamp strm)
{
int i;
int64_t *block = strm->state->block_buf;
uint32_t *block = strm->state->block_buf;
for (i = 0; i < 8 * strm->rsi; i += 8)
{
block[i + 0] = ((int64_t)strm->next_in[0] << 8) | (int64_t)strm->next_in[1];
block[i + 1] = ((int64_t)strm->next_in[2] << 8) | (int64_t)strm->next_in[3];
block[i + 2] = ((int64_t)strm->next_in[4] << 8) | (int64_t)strm->next_in[5];
block[i + 3] = ((int64_t)strm->next_in[6] << 8) | (int64_t)strm->next_in[7];
block[i + 4] = ((int64_t)strm->next_in[8] << 8) | (int64_t)strm->next_in[9];
block[i + 5] = ((int64_t)strm->next_in[10] << 8) | (int64_t)strm->next_in[11];
block[i + 6] = ((int64_t)strm->next_in[12] << 8) | (int64_t)strm->next_in[13];
block[i + 7] = ((int64_t)strm->next_in[14] << 8) | (int64_t)strm->next_in[15];
block[i + 0] = ((uint32_t)strm->next_in[0] << 8) | (uint32_t)strm->next_in[1];
block[i + 1] = ((uint32_t)strm->next_in[2] << 8) | (uint32_t)strm->next_in[3];
block[i + 2] = ((uint32_t)strm->next_in[4] << 8) | (uint32_t)strm->next_in[5];
block[i + 3] = ((uint32_t)strm->next_in[6] << 8) | (uint32_t)strm->next_in[7];
block[i + 4] = ((uint32_t)strm->next_in[8] << 8) | (uint32_t)strm->next_in[9];
block[i + 5] = ((uint32_t)strm->next_in[10] << 8) | (uint32_t)strm->next_in[11];
block[i + 6] = ((uint32_t)strm->next_in[12] << 8) | (uint32_t)strm->next_in[13];
block[i + 7] = ((uint32_t)strm->next_in[14] << 8) | (uint32_t)strm->next_in[15];
strm->next_in += 16;
}
......@@ -91,12 +91,12 @@ void get_block_msb_16_bs_8(ae_streamp strm)
void get_block_msb_16(ae_streamp strm)
{
int i;
int64_t *block = strm->state->block_buf;
uint32_t *block = strm->state->block_buf;
for (i = 0; i < strm->block_size * strm->rsi; i++)
{
block[i] = ((int64_t)strm->next_in[2 * i] << 8)
| (int64_t)strm->next_in[2 * i + 1];
block[i] = ((uint32_t)strm->next_in[2 * i] << 8)
| (uint32_t)strm->next_in[2 * i + 1];
}
strm->next_in += 2 * strm->block_size * strm->rsi;
strm->total_in += 2 * strm->block_size * strm->rsi;
......@@ -106,14 +106,14 @@ void get_block_msb_16(ae_streamp strm)
void get_block_msb_32(ae_streamp strm)
{
int i;
int64_t *block = strm->state->block_buf;
uint32_t *block = strm->state->block_buf;
for (i = 0; i < strm->block_size * strm->rsi; i++)
{
block[i] = ((int64_t)strm->next_in[4 * i] << 24)
| ((int64_t)strm->next_in[4 * i + 1] << 16)
| ((int64_t)strm->next_in[4 * i + 2] << 8)
| (int64_t)strm->next_in[4 * i + 3];
block[i] = ((uint32_t)strm->next_in[4 * i] << 24)
| ((uint32_t)strm->next_in[4 * i + 1] << 16)
| ((uint32_t)strm->next_in[4 * i + 2] << 8)
| (uint32_t)strm->next_in[4 * i + 3];
}
strm->next_in += 4 * strm->block_size * strm->rsi;
strm->total_in += 4 * strm->block_size * strm->rsi;
......@@ -123,29 +123,28 @@ void get_block_msb_32(ae_streamp strm)
void get_block_8_bs_8(ae_streamp strm)
{
int i;
int64_t *block = strm->state->block_buf;
uint32_t *block = strm->state->block_buf;
for (i = 0; i < 8 * strm->rsi; i += 8)
{
block[i + 0] = strm->next_in[0];
block[i + 1] = strm->next_in[1];
block[i + 2] = strm->next_in[2];
block[i + 3] = strm->next_in[3];
block[i + 4] = strm->next_in[4];
block[i + 5] = strm->next_in[5];
block[i + 6] = strm->next_in[6];
block[i + 7] = strm->next_in[7];
strm->next_in += 8;
strm->total_in += 8;
strm->avail_in -= 8;
block[i + 0] = strm->next_in[i + 0];
block[i + 1] = strm->next_in[i + 1];
block[i + 2] = strm->next_in[i + 2];
block[i + 3] = strm->next_in[i + 3];
block[i + 4] = strm->next_in[i + 4];
block[i + 5] = strm->next_in[i + 5];
block[i + 6] = strm->next_in[i + 6];
block[i + 7] = strm->next_in[i + 7];
}
strm->next_in += 8 * strm->rsi;
strm->total_in += 8 * strm->rsi;
strm->avail_in -= 8 * strm->rsi;
}
void get_block_8(ae_streamp strm)
{
int i;
int64_t *block = strm->state->block_buf;
uint32_t *block = strm->state->block_buf;
for (i = 0; i < strm->block_size * strm->rsi; i++)
block[i] = strm->next_in[i];
......
......@@ -4,16 +4,16 @@
#include <inttypes.h>
#include "libae.h"
int64_t get_lsb_32(ae_streamp);
int64_t get_lsb_16(ae_streamp);
int64_t get_msb_32(ae_streamp);
int64_t get_msb_16(ae_streamp);
int64_t get_8(ae_streamp);
uint32_t get_lsb_32(ae_streamp strm);
uint32_t get_lsb_16(ae_streamp strm);
uint32_t get_msb_32(ae_streamp strm);
uint32_t get_msb_16(ae_streamp strm);
uint32_t get_8(ae_streamp strm);
void get_block_msb_32(ae_streamp);
void get_block_msb_16_bs_8(ae_streamp);
void get_block_msb_16(ae_streamp);
void get_block_8_bs_8(ae_streamp);
void get_block_8(ae_streamp);
void get_block_msb_32(ae_streamp strm);
void get_block_msb_16_bs_8(ae_streamp strm);
void get_block_msb_16(ae_streamp strm);
void get_block_8_bs_8(ae_streamp strm);
void get_block_8(ae_streamp strm);
#endif
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment