Commit 647a83fc authored by Mathis Rosenhauer's avatar Mathis Rosenhauer

Obvious performance enhancements for encoding

parent 79136519
CC = gcc
#CFLAGS = -g -pg -lc -O2 -Wall -fprofile-arcs -ftest-coverage -DPROFILE
#CFLAGS = -g -pg -lc -O2 -Wall -fprofile-arcs -ftest-coverage -DUNROLL_BLOCK_8
CFLAGS = -g -O3 -Wall
#CFLAGS = -g -pg -lc -O0 -Wall -fprofile-arcs -ftest-coverage
CFLAGS = -g -O3 -Wall -std=c99
OBJS = aee.o aed.o sz_compat.o
OBJS = aee.o aee_mutators.o aed.o sz_compat.o
.PHONY : all clean check
......@@ -23,7 +22,9 @@ libae.a: $(OBJS)
-@ ($(RANLIB) $@ || true) >/dev/null 2>&1
aed.o: libae.h
aee.o: libae.h
aee.o: aee_mutators.h aee.h libae.h
aee_mutators.o: aee.h libae.h
sz_compat.o: libae.h
install: libae.a
mkdir -p ../lib ../include
......
/* Adaptive Entropy Encoder */
/* CCSDS 121.0-B-1 and CCSDS 120.0-G-2 */
/**
* @file aee.c
* @author Mathis Rosenhauer, Deutsches Klimarechenzentrum
* @section DESCRIPTION
*
* Adaptive Entropy Encoder
* Based on CCSDS documents 121.0-B-1 and 120.0-G-2
*
*/
#include <stdio.h>
#include <stdlib.h>
......@@ -8,623 +15,682 @@
#include <string.h>
#include "libae.h"
#include "aee.h"
#include "aee_mutators.h"
/* Marker for Remainder Of Segment condition in zero block encoding */
#define ROS -1
#define MIN(a, b) (((a) < (b))? (a): (b))
#define MAX(a, b) (((a) > (b))? (a): (b))
enum
{
M_NEW_BLOCK,
M_GET_BLOCK,
M_CHECK_ZERO_BLOCK,
M_SELECT_CODE_OPTION,
M_ENCODE_SPLIT,
M_FLUSH_BLOCK,
M_FLUSH_BLOCK_LOOP,
M_ENCODE_UNCOMP,
M_ENCODE_SE,
M_ENCODE_ZERO,
};
typedef struct internal_state {
int id_len; /* bit length of code option identification key */
int64_t last_in; /* previous input for preprocessing */
int64_t (*get_sample)(ae_streamp);
int64_t xmin; /* minimum integer for preprocessing */
int64_t xmax; /* maximum integer for preprocessing */
int mode; /* current mode of FSM */
int i; /* counter for samples */
int64_t *block_in; /* input block buffer */
uint8_t *block_out; /* output block buffer */
uint8_t *bp_out; /* pointer to current output */
int64_t total_blocks;
int bitp; /* bit pointer to the next unused bit in accumulator */
int block_deferred; /* there is a block in the input buffer
but we first have to emit a zero block */
int ref; /* length of reference sample in current block
i.e. 0 or 1 depending on whether the block has
a reference sample or not */
int zero_ref; /* current zero block has a reference sample */
int64_t zero_ref_sample;/* reference sample of zero block */
int zero_blocks; /* number of contiguous zero blocks */
#ifdef PROFILE
int *prof;
#endif
} encode_state;
static int64_t get_lsb_32(ae_streamp strm)
{
int64_t data;
data = (strm->next_in[3] << 24)
| (strm->next_in[2] << 16)
| (strm->next_in[1] << 8)
| strm->next_in[0];
strm->next_in += 4;
strm->total_in += 4;
strm->avail_in -= 4;
return data;
}
static int64_t get_lsb_16(ae_streamp strm)
{
int64_t data;
data = (strm->next_in[1] << 8) | strm->next_in[0];
strm->next_in += 2;
strm->total_in += 2;
strm->avail_in -= 2;
return data;
}
static int m_get_block(ae_streamp strm);
static int m_get_block_cautious(ae_streamp strm);
static int m_check_zero_block(ae_streamp strm);
static int m_select_code_option(ae_streamp strm);
static int m_flush_block(ae_streamp strm);
static int m_flush_block_cautious(ae_streamp strm);
static int m_encode_splitting(ae_streamp strm);
static int m_encode_uncomp(ae_streamp strm);
static int m_encode_se(ae_streamp strm);
static int m_encode_zero(ae_streamp strm);
/*
*
* Bit emitters
*
*/
static int64_t get_msb_32(ae_streamp strm)
static inline void emit(encode_state *state, uint64_t data, int bits)
{
int64_t data;
data = (strm->next_in[0] << 24)
| (strm->next_in[1] << 16)
| (strm->next_in[2] << 8)
| strm->next_in[3];
strm->next_in += 4;
strm->total_in += 4;
strm->avail_in -= 4;
return data;
for(;;)
{
data &= ((1ULL << bits) - 1);
if (bits <= state->bitp)
{
state->bitp -= bits;
*state->out_bp += data << state->bitp;
break;
}
else
{
bits -= state->bitp;
*state->out_bp += data >> bits;
*++state->out_bp = 0;
state->bitp = 8;
}
}
}
static int64_t get_msb_16(ae_streamp strm)
static inline void emitfs(encode_state *state, int fs)
{
int64_t data;
data = (strm->next_in[0] << 8) | strm->next_in[1];
/**
Emits a fundamental sequence.
strm->next_in += 2;
strm->total_in += 2;
strm->avail_in -= 2;
return data;
}
fs zero bits followed by one 1 bit.
*/
static int64_t get_8(ae_streamp strm)
{
strm->avail_in--;
strm->total_in++;
return *strm->next_in++;
fs++;
for(;;)
{
if (fs <= state->bitp)
{
state->bitp -= fs;
*state->out_bp += 1 << state->bitp;
break;
}
else
{
fs -= state->bitp;
*++state->out_bp = 0;
state->bitp = 8;
}
}
}
int ae_encode_init(ae_streamp strm)
static inline void preprocess(ae_streamp strm)
{
int blklen;
encode_state *state;
int i;
int64_t theta, d, Delta;
encode_state *state = strm->state;
/* Some sanity checks */
if (strm->bit_per_sample > 32 || strm->bit_per_sample == 0)
/* If this is the first block between reference
samples then we need to insert one.
*/
if(state->in_total_blocks % strm->rsi == 0)
{
return AE_ERRNO;
state->ref = 1;
state->last_in = state->in_block[0];
}
/* Internal state for encoder */
state = (encode_state *) malloc(sizeof(encode_state));
if (state == NULL)
else
{
return AE_MEM_ERROR;
state->ref = 0;
}
strm->state = state;
if (strm->bit_per_sample > 16)
{
state->id_len = 5;
if (strm->flags & AE_DATA_MSB)
state->get_sample = get_msb_32;
else
state->get_sample = get_lsb_32;
}
else if (strm->bit_per_sample > 8)
for (i = state->ref; i < strm->block_size; i++)
{
state->id_len = 4;
if (strm->flags & AE_DATA_MSB)
state->get_sample = get_msb_16;
theta = MIN(state->last_in - state->xmin,
state->xmax - state->last_in);
Delta = state->in_block[i] - state->last_in;
state->last_in = state->in_block[i];
if (0 <= Delta && Delta <= theta)
{
state->in_block[i] = 2 * Delta;
}
else if (-theta <= Delta && Delta < 0)
{
d = Delta < 0 ? -(uint64_t)Delta : Delta;
state->in_block[i] = 2 * d - 1;
}
else
state->get_sample = get_lsb_16;
}
else
{
state->id_len = 3;
state->get_sample = get_8;
{
state->in_block[i] = theta +
(Delta < 0 ? -(uint64_t)Delta : Delta);
}
}
}
if (strm->flags & AE_DATA_SIGNED)
/*
*
* FSM functions
*
*/
static int m_get_block(ae_streamp strm)
{
encode_state *state = strm->state;
if (strm->avail_out > state->out_blklen)
{
state->xmin = -(1ULL << (strm->bit_per_sample - 1));
state->xmax = (1ULL << (strm->bit_per_sample - 1)) - 1;
if (!state->out_direct)
{
state->out_direct = 1;
*strm->next_out = *state->out_bp;
state->out_bp = strm->next_out;
}
}
else
{
state->xmin = 0;
state->xmax = (1ULL << strm->bit_per_sample) - 1;
if (state->zero_blocks == 0 || state->out_direct)
{
/* copy leftover from last block */
*state->out_block = *state->out_bp;
state->out_bp = state->out_block;
}
state->out_direct = 0;
}
#ifdef PROFILE
state->prof = (int *)malloc((strm->bit_per_sample + 2) * sizeof(int));
if (state->prof == NULL)
if(state->block_deferred)
{
return AE_MEM_ERROR;
state->block_deferred = 0;
state->mode = m_select_code_option;
return M_CONTINUE;
}
memset(state->prof, 0, (strm->bit_per_sample + 2) * sizeof(int));
#endif
state->block_in = (int64_t *)malloc(strm->block_size * sizeof(int64_t));
if (state->block_in == NULL)
if (strm->avail_in >= state->in_blklen)
{
return AE_MEM_ERROR;
}
state->get_block(strm);
/* Largest possible block according to specs */
blklen = (5 + 16 * 32) / 8 + 3;
state->block_out = (uint8_t *)malloc(blklen);
if (state->block_out == NULL)
if (strm->flags & AE_DATA_PREPROCESS)
preprocess(strm);
state->in_total_blocks++;
return m_check_zero_block(strm);
}
else
{
return AE_MEM_ERROR;
state->i = 0;
state->mode = m_get_block_cautious;
}
state->bp_out = state->block_out;
*state->bp_out = 0;
state->bitp = 8;
strm->total_in = 0;
strm->total_out = 0;
state->mode = M_NEW_BLOCK;
state->total_blocks = 0;
state->block_deferred = 0;
state->zero_blocks = 0;
state->zero_ref = 0;
state->ref = 0;
return AE_OK;
return M_CONTINUE;
}
int ae_encode_end(ae_streamp strm)
static int m_get_block_cautious(ae_streamp strm)
{
encode_state *state;
state = strm->state;
#ifdef PROFILE
free(state->prof);
#endif
free(state->block_in);
free(state->block_out);
free(state);
return AE_OK;
}
encode_state *state = strm->state;
static inline void emit(encode_state *state, uint64_t data, int bits)
{
while(bits)
do
{
data &= ((1ULL << bits) - 1);
if (bits <= state->bitp)
if (strm->avail_in == 0)
{
state->bitp -= bits;
*state->bp_out += data << state->bitp;
bits = 0;
if (state->flush == AE_FLUSH)
{
if (state->i > 0)
{
/* pad block with last sample if we have
a partial block */
state->in_block[state->i] = state->in_block[state->i - 1];
}
else
{
if (state->zero_blocks)
{
/* Output any remaining zero blocks */
state->mode = m_encode_zero;
return M_CONTINUE;
}
/* Pad last output byte with 0 bits
if user wants to flush, i.e. we got
all input there is.
*/
emit(state, 0, state->bitp);
if (state->out_direct == 0)
*strm->next_out++ = *state->out_bp;
strm->avail_out--;
strm->total_out++;
return M_EXIT;
}
}
else
{
return M_EXIT;
}
}
else
{
bits -= state->bitp;
*state->bp_out += data >> bits;
*++state->bp_out = 0;
state->bitp = 8;
state->in_block[state->i] = state->get_sample(strm);
}
}
while (++state->i < strm->block_size);
if (strm->flags & AE_DATA_PREPROCESS)
preprocess(strm);
state->in_total_blocks++;
return m_check_zero_block(strm);
}
static inline void emitfs(encode_state *state, int fs)
static inline int m_check_zero_block(ae_streamp strm)
{
if (fs < 63)
{
emit(state, 1, fs + 1);
}
else
int i;
encode_state *state = strm->state;
i = state->ref;
while(i < strm->block_size && state->in_block[i] == 0)
i++;
if (i == strm->block_size)
{
emit(state, 0, fs);
emit(state, 1, 1);
}
}
/* remember ref on first zero block */
if (state->zero_blocks == 0)
{
state->zero_ref = state->ref;
state->zero_ref_sample = state->in_block[0];
}
#ifdef PROFILE
static inline void profile_print(ae_streamp strm)
{
int i, total;
encode_state *state;
state->zero_blocks++;
state = strm->state;
fprintf(stderr, "Blocks encoded by each coding option\n");
fprintf(stderr, "Zero blocks: %i\n", state->prof[0]);
total = state->prof[0];
fprintf(stderr, "Second Ext.: %i\n", state->prof[strm->bit_per_sample+1]);
total += state->prof[strm->bit_per_sample+1];
fprintf(stderr, "FS: %i\n", state->prof[1]);
total += state->prof[1];
for (i = 2; i < strm->bit_per_sample - 1; i++)
if (state->in_total_blocks % strm->rsi % 64 == 0)
{
if (state->zero_blocks > 4)
state->zero_blocks = ROS;
state->mode = m_encode_zero;
return M_CONTINUE;
}
state->mode = m_get_block;
return M_CONTINUE;
}
else if (state->zero_blocks)
{
fprintf(stderr, "k = %02i: %i\n", i-1, state->prof[i]);
total += state->prof[i];
/* The current block isn't zero but we have to
emit a previous zero block first. The
current block will be handled later.
*/
state->block_deferred = 1;
state->mode = m_encode_zero;
return M_CONTINUE;
}
fprintf(stderr, "Uncompressed: %i\n", state->prof[strm->bit_per_sample]);
total += state->prof[strm->bit_per_sample];
fprintf(stderr, "Total blocks: %i\n", total);
state->mode = m_select_code_option;
return M_CONTINUE;
}
#endif
int ae_encode(ae_streamp strm, int flush)
static inline int m_select_code_option(ae_streamp strm)
{
/**
Finite-state machine implementation of the adaptive entropy
encoder.
*/
int i, j, k, zb, this_bs;
int64_t split_len;
int i, k, this_bs, looked_bothways, direction;
int64_t d, split_len, uncomp_len;
int64_t split_len_min, se_len, fs_len;
int64_t d;
int64_t theta, Delta;
size_t avail_out, total_out;
encode_state *state = strm->state;
encode_state *state;
/* Length of this block minus reference sample (if present) */
this_bs = strm->block_size - state->ref;
state = strm->state;
total_out = strm->total_out;
avail_out = strm->avail_out;
split_len_min = INT64_MAX;
i = state->k;
direction = 1;
looked_bothways = 0;
/* Starting with splitting position of last block look left
and possibly right to find new minimum.*/
for (;;)
{
switch(state->mode)
fs_len = (state->in_block[1] >> i)
+ (state->in_block[2] >> i)
+ (state->in_block[3] >> i)
+ (state->in_block[4] >> i)
+ (state->in_block[5] >> i)
+ (state->in_block[6] >> i)
+ (state->in_block[7] >> i);
if (state->ref == 0)
fs_len += (state->in_block[0] >> i);
if (strm->block_size == 16)
fs_len += (state->in_block[8] >> i)
+ (state->in_block[9] >> i)
+ (state->in_block[10] >> i)
+ (state->in_block[11] >> i)
+ (state->in_block[12] >> i)
+ (state->in_block[13] >> i)
+ (state->in_block[14] >> i)
+ (state->in_block[15] >> i);
split_len = fs_len + this_bs * (i + 1);
if (split_len < split_len_min)
{
case M_NEW_BLOCK:
if (state->zero_blocks == 0)
if (split_len_min < INT64_MAX)
{
/* copy leftover from last block */
*state->block_out = *state->bp_out;
state->bp_out = state->block_out;
/* We are moving towards the minimum so it cant be in
the other direction.*/
looked_bothways = 1;
}
split_len_min = split_len;
k = i;
if(state->block_deferred)
if (direction == 1)
{
state->block_deferred = 0;
state->mode = M_SELECT_CODE_OPTION;
break;
}
state->i = 0;
state->mode = M_GET_BLOCK;
case M_GET_BLOCK:
do
{
if (strm->avail_in == 0)
if (fs_len < this_bs)
{
if (flush == AE_FLUSH)
/* Next can't get better because what we lose by
additional uncompressed bits isn't compensated by a
smaller FS part. Vice versa if we are coming from
the other direction.*/
if (looked_bothways)
{
if (state->i > 0)
{
/* pad block with last sample if we have
a partial block */
state->block_in[state->i] = state->block_in[state->i - 1];
}
else
{
if (state->zero_blocks)
{
/* Output any remaining zero blocks */
state->mode=M_ENCODE_ZERO;
break;
}
/* Pad last output byte with 0 bits
if user wants to flush, i.e. we got
all input there is.
*/
emit(state, 0, state->bitp);
*strm->next_out++ = *state->bp_out;
avail_out--;
total_out++;
#ifdef PROFILE
profile_print(strm);
#endif
goto req_buffer;
}
break;
}
else
{
goto req_buffer;
direction = -direction;
looked_bothways = 1;
i = state->k;
}
}
else
{
state->block_in[state->i] = state->get_sample(strm);
while (fs_len > 5 * this_bs)
{
i++;
fs_len /= 5;
}
}
}
while (++state->i < strm->block_size);
state->total_blocks++;
/* preprocess block if needed */
if (strm->flags & AE_DATA_PREPROCESS)
else if (fs_len > this_bs)
{
/* If this is the first block in a segment
then we need to insert a reference sample.
*/
if(state->total_blocks % strm->rsi == 1)
/* Since we started looking the other way there is no
need to turn back.*/
break;
}
}
else
{
/* Stop looking for better option if we
don't see any improvement. */
if (looked_bothways)
{
state->ref = 1;
state->last_in = state->block_in[0];
break;
}
else
{
state->ref = 0;
direction = -direction;
looked_bothways = 1;
i = state->k;
}
}
if (i + direction < 0
|| i + direction >= strm->bit_per_sample - 2)
{
if (looked_bothways)
break;
for (i = state->ref; i < strm->block_size; i++)
{
theta = MIN(state->last_in - state->xmin,
state->xmax - state->last_in);
Delta = state->block_in[i] - state->last_in;
state->last_in = state->block_in[i];
if (0 <= Delta && Delta <= theta)
{
state->block_in[i] = 2 * Delta;
}
else if (-theta <= Delta && Delta < 0)
{
d = Delta < 0 ? -(uint64_t)Delta : Delta;
state->block_in[i] = 2 * d - 1;
}
else
{
state->block_in[i] = theta +
(Delta < 0 ? -(uint64_t)Delta : Delta);
}
}
}