Commit 0a02c98c authored by Uwe Schulzweida's avatar Uwe Schulzweida
Browse files

Added new namelist parser.

parent dbc2c02b
......@@ -86,6 +86,8 @@ libcdo_la_SOURCES = \
merge_sort2.h \
modules.c \
modules.h \
namelist.c \
namelist.h \
nml.c \
nml.h \
normal.c \
......
......@@ -124,11 +124,12 @@ am_libcdo_la_OBJECTS = libcdo_la-cdo_pthread.lo libcdo_la-cdo_vlist.lo \
libcdo_la-interpol.lo libcdo_la-job.lo libcdo_la-juldate.lo \
libcdo_la-grid_search.lo libcdo_la-listarray.lo \
libcdo_la-list.lo libcdo_la-merge_sort2.lo \
libcdo_la-modules.lo libcdo_la-nml.lo libcdo_la-normal.lo \
libcdo_la-nth_element.lo libcdo_la-par_io.lo \
libcdo_la-parse_cmor_table.lo libcdo_la-percentiles_hist.lo \
libcdo_la-percentiles.lo libcdo_la-pipe.lo libcdo_la-pml.lo \
libcdo_la-pmlist.lo libcdo_la-process.lo libcdo_la-pstream.lo \
libcdo_la-modules.lo libcdo_la-namelist.lo libcdo_la-nml.lo \
libcdo_la-normal.lo libcdo_la-nth_element.lo \
libcdo_la-par_io.lo libcdo_la-parse_cmor_table.lo \
libcdo_la-percentiles_hist.lo libcdo_la-percentiles.lo \
libcdo_la-pipe.lo libcdo_la-pml.lo libcdo_la-pmlist.lo \
libcdo_la-process.lo libcdo_la-pstream.lo \
libcdo_la-pthread_debug.lo libcdo_la-readline.lo \
libcdo_la-realtime.lo libcdo_la-remaplib.lo \
libcdo_la-remapsort.lo libcdo_la-remap_scrip_io.lo \
......@@ -514,22 +515,22 @@ libcdo_la_SOURCES = cdo_int.h compare.h cdo_pthread.c cdo_vlist.c \
griddes_nc.c hetaeta.c hetaeta.h institution.c interpol.c \
interpol.h job.c juldate.c grid_search.c grid_search.h \
listarray.c listarray.h list.c list.h merge_sort2.c \
merge_sort2.h modules.c modules.h nml.c nml.h normal.c \
nth_element.c nth_element.h operator_help.h par_io.c par_io.h \
parse_cmor_table.c percentiles_hist.c percentiles_hist.h \
percentiles.c percentiles.h pipe.c pipe.h pml.c pml.h pmlist.c \
pmlist.h pragma_omp_atomic_update.h printinfo.h process.c \
process.h pstream.c pstream.h pstream_write.h pstream_int.h \
pthread_debug.c pthread_debug.h readline.c realtime.c remap.h \
remaplib.c remapsort.c remap_scrip_io.c remap_search_reg2d.c \
remap_search_latbins.c remap_store_link.c remap_store_link.h \
remap_store_link_cnsrv.c remap_store_link_cnsrv.h \
remap_conserv.c remap_conserv_scrip.c remap_distwgt.c \
remap_bicubic_scrip.c remap_bilinear_scrip.c stdnametable.c \
stdnametable.h specspace.c specspace.h statistic.c statistic.h \
table.c text.c text.h timebase.h timer.c userlog.c uthash.h \
util.c util.h zaxis.c json/jsmn.h json/jsmn.c \
kdtreelib/kdtree.h kdtreelib/kdtree_cartesian.c \
merge_sort2.h modules.c modules.h namelist.c namelist.h nml.c \
nml.h normal.c nth_element.c nth_element.h operator_help.h \
par_io.c par_io.h parse_cmor_table.c percentiles_hist.c \
percentiles_hist.h percentiles.c percentiles.h pipe.c pipe.h \
pml.c pml.h pmlist.c pmlist.h pragma_omp_atomic_update.h \
printinfo.h process.c process.h pstream.c pstream.h \
pstream_write.h pstream_int.h pthread_debug.c pthread_debug.h \
readline.c realtime.c remap.h remaplib.c remapsort.c \
remap_scrip_io.c remap_search_reg2d.c remap_search_latbins.c \
remap_store_link.c remap_store_link.h remap_store_link_cnsrv.c \
remap_store_link_cnsrv.h remap_conserv.c remap_conserv_scrip.c \
remap_distwgt.c remap_bicubic_scrip.c remap_bilinear_scrip.c \
stdnametable.c stdnametable.h specspace.c specspace.h \
statistic.c statistic.h table.c text.c text.h timebase.h \
timer.c userlog.c uthash.h util.c util.h zaxis.c json/jsmn.h \
json/jsmn.c kdtreelib/kdtree.h kdtreelib/kdtree_cartesian.c \
kdtreelib/kdtree_common.c kdtreelib/kdtree_spherical.c \
kdtreelib/qsort.c kdtreelib/pmergesort.c kdtreelib/pqueue.c \
kdtreelib/pqueue.h clipping/clipping.c clipping/clipping.h \
......@@ -1038,6 +1039,7 @@ distclean-compile:
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libcdo_la-listarray.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libcdo_la-merge_sort2.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libcdo_la-modules.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libcdo_la-namelist.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libcdo_la-nml.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libcdo_la-normal.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libcdo_la-nth_element.Plo@am__quote@
......@@ -1455,6 +1457,13 @@ libcdo_la-modules.lo: modules.c
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libcdo_la_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o libcdo_la-modules.lo `test -f 'modules.c' || echo '$(srcdir)/'`modules.c
libcdo_la-namelist.lo: namelist.c
@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libcdo_la_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT libcdo_la-namelist.lo -MD -MP -MF $(DEPDIR)/libcdo_la-namelist.Tpo -c -o libcdo_la-namelist.lo `test -f 'namelist.c' || echo '$(srcdir)/'`namelist.c
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/libcdo_la-namelist.Tpo $(DEPDIR)/libcdo_la-namelist.Plo
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='namelist.c' object='libcdo_la-namelist.lo' libtool=yes @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libcdo_la_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o libcdo_la-namelist.lo `test -f 'namelist.c' || echo '$(srcdir)/'`namelist.c
libcdo_la-nml.lo: nml.c
@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libcdo_la_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT libcdo_la-nml.lo -MD -MP -MF $(DEPDIR)/libcdo_la-nml.Tpo -c -o libcdo_la-nml.lo `test -f 'nml.c' || echo '$(srcdir)/'`nml.c
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/libcdo_la-nml.Tpo $(DEPDIR)/libcdo_la-nml.Plo
......
#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#include "namelist.h"
static
void namelist_init(namelist_parser *parser)
{
parser->tokens = NULL;
parser->num_tokens = 0;
parser->toknext = 0;
parser->pos = 0;
}
namelist_parser *namelist_new(void)
{
namelist_parser *parser = (namelist_parser *) malloc(sizeof(namelist_parser));
namelist_init(parser);
return parser;
}
void namelist_destroy(namelist_parser *parser)
{
if ( parser )
{
if ( parser->tokens ) free(parser->tokens);
namelist_init(parser);
free(parser);
}
}
// Allocates a fresh unused token from the token pull.
static
namelisttok_t *namelist_alloc_token(namelist_parser *parser)
{
const unsigned int TOK_MEM_INCR = 64;
namelisttok_t *tok;
if ( parser->toknext >= parser->num_tokens )
{
parser->num_tokens += TOK_MEM_INCR;
parser->tokens = (namelisttok_t *) realloc(parser->tokens, sizeof(*tok) * parser->num_tokens);
if ( parser->tokens == NULL )
{
fprintf(stderr, "%s: Failed to allocated more memory!", __func__);
exit(-1);
}
}
tok = &parser->tokens[parser->toknext++];
tok->start = tok->end = -1;
return tok;
}
// Fills token type and boundaries.
static
void namelist_fill_token(namelisttok_t *token, int type, int start, int end)
{
token->type = type;
token->start = start;
token->end = end;
}
void namelist_new_object(namelist_parser *parser)
{
namelisttok_t *token;
token = namelist_alloc_token(parser);
token->type = NAMELIST_OBJECT;
token->start = parser->pos;
}
// Fills next available token with NAMELIST word.
static
int namelist_parse_word(namelist_parser *parser, const char *buf, size_t len)
{
namelisttok_t *token;
int start = parser->pos;
for ( ; parser->pos < len && buf[parser->pos] != '\0'; parser->pos++ )
{
switch (buf[parser->pos])
{
case ':': case '=':
case ',': case '&': case '/':
case '\r': case '\n': case '\t': case ' ':
goto found;
}
if ( buf[parser->pos] < 32 || buf[parser->pos] >= 127 )
{
parser->pos = start;
return NAMELIST_ERROR_INVAL;
}
}
found:
token = namelist_alloc_token(parser);
namelist_fill_token(token, NAMELIST_WORD, start, parser->pos);
parser->pos--;
return 0;
}
// Fills next token with NAMELIST string.
static
int namelist_parse_string(namelist_parser *parser, const char *buf, size_t len)
{
namelisttok_t *token;
int start = parser->pos;
parser->pos++;
/* Skip starting quote */
for ( ; parser->pos < len && buf[parser->pos] != '\0'; parser->pos++ )
{
char c = buf[parser->pos];
/* Quote: end of string */
if ( c == '\"' )
{
token = namelist_alloc_token(parser);
namelist_fill_token(token, NAMELIST_STRING, start+1, parser->pos);
return 0;
}
/* Backslash: Quoted symbol expected */
if ( c == '\\' && parser->pos + 1 < len )
{
parser->pos++;
switch (buf[parser->pos])
{
// Allowed escaped symbols
case '\"': case '\\' : case 'b' :
case 'f' : case 'r' : case 'n' : case 't' :
break;
// Allows escaped symbol \uXXXX
case 'u':
parser->pos++;
for ( int i = 0; i < 4 && parser->pos < len && buf[parser->pos] != '\0'; i++ )
{
// If it isn't a hex character we have an error
if ( !((buf[parser->pos] >= 48 && buf[parser->pos] <= 57) || // 0-9
(buf[parser->pos] >= 65 && buf[parser->pos] <= 70) || // A-F
(buf[parser->pos] >= 97 && buf[parser->pos] <= 102)) ) // a-f
{
return NAMELIST_ERROR_INVAL;
}
parser->pos++;
}
parser->pos--;
break;
// Unexpected symbol
default:
return NAMELIST_ERROR_INVAL;
}
}
}
parser->pos = start;
return NAMELIST_ERROR_PART;
return 0;
}
static
int namelist_check_keyname(const char *buf, namelisttok_t *t)
{
switch (t->type)
{
case NAMELIST_STRING:
while ( isspace((int) buf[t->start]) && t->start < t->end ) t->start++;
while ( isspace((int) buf[t->end-1]) && t->start < t->end ) t->end--;
if ( (t->end - t->start) < 1 ) return NAMELIST_ERROR_EMKEY;
for ( int i = t->start; i < t->end; ++i )
if ( isspace((int)buf[i]) ) return NAMELIST_ERROR_INKEY;
case NAMELIST_WORD:
t->type = NAMELIST_KEY;
break;
default:
return NAMELIST_ERROR_INTYP;
break;
}
return 0;
}
int namelist_parse(namelist_parser *parser, const char *buf, size_t len)
{
int status = 0;
namelisttok_t *token;
parser->lineno = 1;
for ( ; parser->pos < len && buf[parser->pos] != '\0'; parser->pos++ )
{
char c = buf[parser->pos];
switch (c)
{
case '&':
namelist_new_object(parser);
break;
case '/':
for ( int i = parser->toknext - 1; i >= 0; i-- )
{
token = &parser->tokens[i];
if ( token->start != -1 && token->end == -1 )
{
if ( token->type != NAMELIST_OBJECT ) return NAMELIST_ERROR_INOBJ;
token->end = parser->pos + 1;
break;
}
}
break;
case '\t': case ' ':
break;
case '\r':
if ( parser->pos+1 < len && buf[parser->pos+1] == '\n' ) parser->pos++;
case '\n':
parser->lineno++;
break;
case ',':
break;
case '#': case '!': // Skip to end of line
for ( ; parser->pos < len && buf[parser->pos] != '\0'; parser->pos++ )
if ( buf[parser->pos] == '\r' || buf[parser->pos] == '\n' )
{
parser->pos--;
break;
}
break;
case ':': case '=':
status = namelist_check_keyname(buf, &parser->tokens[parser->toknext-1]);
break;
case '\"':
status = namelist_parse_string(parser, buf, len);
break;
default:
status = namelist_parse_word(parser, buf, len);
break;
}
if ( status ) return status;
}
return status;
}
void namelist_dump(namelist_parser *parser, const char *buf)
{
unsigned int ntok = parser->toknext;
printf("Number of tokens %d\n", ntok);
for ( unsigned int it = 0; it < ntok; ++it )
{
namelisttok_t *t = &parser->tokens[it];
int length = t->end - t->start;
const char *start = buf+t->start;
printf("Token %u", it+1);
if ( t->type == NAMELIST_OBJECT )
{
printf(" NAMELIST=");
if ( length > 80 ) length = 80;
printf("'%.*s'", length, start);
}
else if ( t->type == NAMELIST_KEY )
{
printf(" KEY=");
printf("'%.*s'", length, start);
}
else if ( t->type == NAMELIST_WORD )
{
printf(" WORD=");
printf("'%.*s'", length, start);
}
else if ( t->type == NAMELIST_STRING )
{
printf(" STRING=");
printf("'%.*s'", length, start);
}
printf("\n");
}
}
#ifndef __NAMELIST_H_
#define __NAMELIST_H_
enum namelisttype {
NAMELIST_UNDEFINED = 0,
NAMELIST_OBJECT = 1,
NAMELIST_KEY = 2,
NAMELIST_STRING = 3,
NAMELIST_WORD = 4
};
enum namelisterr {
NAMELIST_ERROR_INVAL = -1, // Invalid character inside NAMELIST string/word
NAMELIST_ERROR_PART = -2, // The string is not a full NAMELIST packet, more bytes expected
NAMELIST_ERROR_INKEY = -3, // Invalid character inside NAMELIST key
NAMELIST_ERROR_INTYP = -4, // Invalid NAMELIST key type
NAMELIST_ERROR_INOBJ = -5, // Invalid NAMELIST object
NAMELIST_ERROR_EMKEY = -6 // Empty key name
};
// NAMELIST token description.
typedef struct {
int type; // type (object, key, string word)
int start; // start position in NAMELIST buffer
int end; // end position in NAMELIST buffer
} namelisttok_t;
typedef struct {
namelisttok_t *tokens;
unsigned int num_tokens;
unsigned int toknext;
unsigned int pos;
unsigned int lineno;
} namelist_parser;
namelist_parser *namelist_new(void);
void namelist_destroy(namelist_parser *parser);
int namelist_parse(namelist_parser *parser, const char *buf, size_t len);
void namelist_dump(namelist_parser *parser, const char *buf);
#endif // __NAMELIST_H_
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <sys/stat.h>
#include "namelist.h"
int main(int argc, char *argv[])
{
if ( argc != 2 )
{
fprintf(stderr, "Usage: %s namelist\n", argv[0]);
return -1;
}
const char *filename = argv[1];
printf("Parse namelist %s:\n", filename);
struct stat sbuf;
size_t filesize = (stat(filename, &sbuf) == 0) ? sbuf.st_size : 0;
if ( filesize == 0 )
{
fprintf(stderr, "Empty table file: %s\n", filename);
return -1;
}
FILE *fp = fopen(filename, "r");
if ( fp == NULL )
{
fprintf(stderr, "Open failed on %s: %s\n", filename, strerror(errno));
return -1;
}
char *buffer = (char*) malloc(filesize);
size_t nitems = fread(buffer, 1, filesize, fp);
fclose(fp);
if ( nitems != filesize )
{
fprintf(stderr, "Read failed on %s!\n", filename);
return -1;
}
namelist_parser *p = namelist_new();
int status = namelist_parse(p, buffer, filesize);
printf("Processed number of lines: %d\n", p->lineno-1);
if ( status != 0 )
{
switch (status)
{
case NAMELIST_ERROR_INVAL: fprintf(stderr, "Namelist error: Invalid character in %s (line=%d character='%c')!\n", filename, p->lineno, buffer[p->pos]); break;
case NAMELIST_ERROR_PART: fprintf(stderr, "Namelist error: End of string not found in %s (line=%d)!\n", filename, p->lineno); break;
case NAMELIST_ERROR_INKEY: fprintf(stderr, "Namelist error: Invalid key word in %s (line=%d)!\n", filename, p->lineno); break;
case NAMELIST_ERROR_INTYP: fprintf(stderr, "Namelist error: Invalid key word type in %s (line=%d)!\n", filename, p->lineno); break;
case NAMELIST_ERROR_INOBJ: fprintf(stderr, "Namelist error: Invalid object in %s (line=%d)!\n", filename, p->lineno); break;
case NAMELIST_ERROR_EMKEY: fprintf(stderr, "Namelsit error: Emtry key name in %s (line=%d)!\n", filename, p->lineno); break;
default: fprintf(stderr, "Namelsit error in %s (line=%d)!\n", filename, p->lineno); break;
}
}
namelist_dump(p, buffer);
namelist_destroy(p);
free(buffer);
return 0;
}
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment