/*
 * Copyright (c) 2009, 2010 Nhat Minh Lê
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in
 *    the documentation and/or other materials provided with the
 *    distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

#ifndef REGXML_H
#define REGXML_H

#include <sys/cdefs.h>
#include <sys/queue.h>

#include <limits.h>
#include <regex.h>
#include <stdlib.h>

#include <regxml/bitstring.h>
#include <regxml/buffer.h>
#include <regxml/errno.h>

enum regxml_flags {
	REGXML_LONGEST = 1,
	REGXML_ENTITIZE = REGXML_LONGEST << 1,
	REGXML_ICASE = REGXML_ENTITIZE << 1,
	REGXML_MATCHSTEP = REGXML_ICASE << 1,
	REGXML_PDELIM = REGXML_MATCHSTEP << 1
};

enum regxml_op {
	REGXML_FNODE,
	REGXML_FTEXT,
	REGXML_FCOMMENT,
	REGXML_FPI,
	REGXML_FATTR,
	REGXML_FNAME,
	REGXML_FVALUE,
	REGXML_FRELPOS,
	REGXML_ADD,
	REGXML_SUB,
	REGXML_MUL,
	REGXML_DIV,
	REGXML_MOD,
	REGXML_LT,
	REGXML_GT,
	REGXML_LE,
	REGXML_GE,
	REGXML_EQ,
	REGXML_NE,
	REGXML_RE,
	REGXML_NR,
	REGXML_CONSTANT,
	REGXML_SUBROOT,
	REGXML_RELPOS1,
	REGXML_RELPOSN,
	REGXML_CAPTURE,
	REGXML_ANY,
	REGXML_CHILD0,
	REGXML_NEXT,
	REGXML_SELFL,
	REGXML_SELFR,
	REGXML_THEN,
	REGXML_THENX,		/* Compilation artefact. */
	REGXML_ELSE,
	REGXML_OR
};

enum regxml_vtype {
	REGXML_UNIT,
	REGXML_NUMBER,
	REGXML_STRING,
	REGXML_REGEX,
	REGXML_XRANGE
};

enum regxml_xtype {
	REGXML_ROOT,
	REGXML_NODE,
	REGXML_TEXT,
	REGXML_COMMENT,
	REGXML_PI,
	REGXML_XFAIL,
	REGXML_XWAIT
};

#define REGXML_ID_MAX USHRT_MAX

typedef unsigned short regxml_id_t;
typedef unsigned int regxml_xid_t;
typedef enum regxml_op regxml_op_t;
typedef enum regxml_vtype regxml_vtype_t;
typedef enum regxml_xtype regxml_xtype_t;

union regxml_value;
struct regxml_node;
struct regxml_elem;
struct regxml_task;
struct regxml_vstack;
struct regxml_namepair;
struct regxml_namemap;
struct regxml;

union regxml_value {
	struct {
		regxml_vtype_t _type;
	} rxv_unit;
	struct {
		regxml_vtype_t _type;
		double _data;
	} rxv_number;
	struct {
		regxml_vtype_t _type;
		unsigned _shared: 1;
		char *_data;
		size_t _len;
	} rxv_string;
	struct {
		regxml_vtype_t _type;
		unsigned _shared: 1;
		regex_t *_data;
	} rxv_regex;
	struct {
		regxml_vtype_t _type;
		struct regxml_elem *_start, *_end;
	} rxv_xrange;
};

struct regxml_node {
	regxml_id_t rxn_id, rxn_mid;
	regxml_op_t rxn_type;
	struct regxml_node *rxn_left, *rxn_right;
	union regxml_value rxn_value;
	unsigned rxn_nullable: 1;
};

struct regxml_elem {
	size_t rxe_refcount;
	regxml_xid_t rxe_lowid, rxe_highid;
	regxml_xtype_t rxe_type;
	char *rxe_name, *rxe_value;
	struct regxml_elem *rxe_parent, *rxe_child0;
	struct regxml_elem *rxe_prev, *rxe_next;
	char **rxe_attrv;
	regxml_xid_t rxe_nchild, rxe_nattr;
	regxml_xid_t rxe_pos;
	unsigned int rxe_lineno;
	regxml_bitstr_t *rxe_state, *rxe_cstate0, *rxe_nstate;
	struct regxml_elem **rxe_memory;
	unsigned rxe_cpending: 1;
	unsigned rxe_npending: 1;
};

struct regxml_task {
	struct regxml_elem *rxt_current;
	struct regxml_node *rxt_node;
	regxml_xid_t rxt_state;
	struct regxml_task *rxt_return;
	SIMPLEQ_ENTRY (regxml_task) rxt_entries;
};

struct regxml_vstack {
	struct regxml *rxvs_reg;
	union regxml_value *rxvs_base;
	int rxvs_size, rxvs_top;
};

struct regxml_namepair {
	char *rxnp_name;
	SLIST_ENTRY (regxml_namepair) rxnp_entries;
};

struct regxml_namemap {
	SLIST_HEAD (regxml_namepairq, regxml_namepair) *rxnm_array;
	size_t rxnm_size, rxnm_width;
};

struct regxml_iter {
	struct regxml_elem *rxi_ptr, *rxi_end;
	regxml_xid_t rxi_nextid;
};

struct regxml {
	struct regxml_elem *rx_tree, *rx_pointer, *rx_start;
	struct regxml_elem *rx_elempool;
	struct regxml_namemap rx_namemap;
	struct regxml_node *rx_pattern;
	struct regxml_node **rx_pvect;
	regxml_xid_t rx_elemid, rx_startid;
	regxml_id_t rx_psize, rx_msize;
	unsigned int rx_lineno;
	SIMPLEQ_HEAD (regxml_taskq, regxml_task) rx_tasks, rx_taskpool;
	struct regxml_vstack rx_vstack;
	struct regxml_elem **rx_match;
	regxml_id_t rx_maxmatch;
	int rx_reflags;
	char *rx_scope;
	unsigned rx_smatch: 1;
	unsigned rx_dofetch: 1;
	unsigned rx_longest: 1;
	unsigned rx_matchstep: 1;
	unsigned rx_inscope: 1;
};

__BEGIN_DECLS
extern struct regxml_elem regxml_elem_fail, regxml_elem_wait;
#define REGXML_FAIL (&regxml_elem_fail)
#define REGXML_WAIT (&regxml_elem_wait)

int regxml_pattern_comp(struct regxml *, const char *, char **, int);
void regxml_pattern_free(struct regxml *);

int regxml_create(struct regxml **, const char *, int);
void regxml_destroy(struct regxml *);

size_t regxml_strerror(struct regxml *, int, char *, size_t);

int regxml_pushopening(struct regxml *, const char *, char * const *);
int regxml_pushclosing(struct regxml *);
int regxml_pushtext(struct regxml *, const char *);
int regxml_pushcomment(struct regxml *, const char *);
int regxml_pushpi(struct regxml *, const char *, const char *);
int regxml_end(struct regxml *);

int regxml_setscope(struct regxml *, const char *);
int regxml_match(struct regxml *, struct regxml_elem **);
int regxml_fetch(struct regxml *);
void regxml_reset(struct regxml *);

void regxml_hold(struct regxml_elem *, struct regxml_elem *);
void regxml_release(struct regxml *,
    struct regxml_elem *, struct regxml_elem *);
void regxml_inititer(struct regxml_iter *,
    struct regxml_elem *, struct regxml_elem *);
int regxml_fetchiter(struct regxml_iter *);

int regxml_initvstack(struct regxml_vstack *, struct regxml *, int);
int regxml_checkvstack(struct regxml_vstack *, int);
void regxml_cleanvstack(struct regxml_vstack *, int);
void regxml_freevstack(struct regxml_vstack *);

static __inline union regxml_value *regxml_getval(struct regxml_vstack *,
    int);
static __inline void regxml_setval(struct regxml_vstack *, int,
    const union regxml_value *);
static __inline void regxml_setsharedval(struct regxml_vstack *, int,
    const union regxml_value *);
static __inline void regxml_pushval(struct regxml_vstack *,
    const union regxml_value *);
static __inline void regxml_pushsharedval(struct regxml_vstack *,
    const union regxml_value *);

static __inline regxml_vtype_t regxml_gettype(struct regxml_vstack *, int);
static __inline double regxml_getnumber(struct regxml_vstack *, int);
static __inline const char *regxml_getstring(struct regxml_vstack *, int);
static __inline size_t regxml_getlen(struct regxml_vstack *, int);
static __inline const regex_t *regxml_getregex(struct regxml_vstack *, int);
static __inline struct regxml_elem *regxml_getstart(struct regxml_vstack *,
    int);
static __inline struct regxml_elem *regxml_getend(struct regxml_vstack *,
    int);

void regxml_freeval(struct regxml *, union regxml_value *);
static __inline void regxml_initunit(union regxml_value *);
static __inline void regxml_initbool(union regxml_value *, int);
static __inline void regxml_initnumber(union regxml_value *, double);
static __inline int regxml_initstring(union regxml_value *,
    const char *);
static __inline void regxml_initsharedstring(union regxml_value *,
    char *);
static __inline int regxml_initregex(union regxml_value *,
    const char *, int);
static __inline void regxml_initxrange(union regxml_value *,
    struct regxml_elem *, struct regxml_elem *);

static __inline void regxml_setunit(struct regxml_vstack *, int);
static __inline void regxml_setbool(struct regxml_vstack *, int, int);
static __inline void regxml_setnumber(struct regxml_vstack *, int, double);
static __inline int regxml_setstring(struct regxml_vstack *, int,
    const char *);
static __inline void regxml_setsharedstring(struct regxml_vstack *, int,
    char *);
static __inline int regxml_setregex(struct regxml_vstack *, int,
    const char *, int);
static __inline void regxml_setxrange(struct regxml_vstack *, int,
    struct regxml_elem *, struct regxml_elem *);

static __inline void regxml_pushunit(struct regxml_vstack *);
static __inline void regxml_pushbool(struct regxml_vstack *, int);
static __inline void regxml_pushnumber(struct regxml_vstack *, double);
static __inline int regxml_pushstring(struct regxml_vstack *, const char *);
static __inline void regxml_pushsharedstring(struct regxml_vstack *, char *);
static __inline int regxml_pushregex(struct regxml_vstack *,
    const char *, int);
static __inline void regxml_pushxrange(struct regxml_vstack *,
    struct regxml_elem *, struct regxml_elem *);

static __inline void regxml_retpushunit(struct regxml_vstack *, int);
static __inline void regxml_retpushbool(struct regxml_vstack *, int, int);
static __inline void regxml_retpushnumber(struct regxml_vstack *, int,
    double);
static __inline int regxml_retpushstring(struct regxml_vstack *, int,
    const char *);
static __inline void regxml_retpushsharedstring(struct regxml_vstack *, int,
    char *);
static __inline int regxml_retpushregex(struct regxml_vstack *, int,
    const char *, int);
static __inline void regxml_retpushxrange(struct regxml_vstack *, int,
    struct regxml_elem *, struct regxml_elem *);
__END_DECLS

static __inline union regxml_value *
regxml_getval(struct regxml_vstack *_vs, int _k)
{
	_DIAGASSERT(_vs != NULL);
	if (_k < 0)
		_k += _vs->rxvs_top;
	else
		--_k;
	_DIAGASSERT(_k >= 0 && _k < _vs->rxvs_top);
	return &_vs->rxvs_base[_k];
}

static __inline void
regxml_setval(struct regxml_vstack *_vs, int _k,
    const union regxml_value *_val)
{
	union regxml_value *_lval;

	_lval = regxml_getval(_vs, _k);
	regxml_freeval(_vs->rxvs_reg, _lval);
	*_lval = *_val;
}

static __inline void
regxml_setsharedval(struct regxml_vstack *_vs, int _k,
    const union regxml_value *_val)
{
	union regxml_value *_lval;

	_lval = regxml_getval(_vs, _k);
	regxml_freeval(_vs->rxvs_reg, _lval);
	*_lval = *_val;
	switch (_val->rxv_unit._type) {
	case REGXML_STRING:
		_lval->rxv_string._shared = 1;
		break;
	case REGXML_REGEX:
		_lval->rxv_regex._shared = 1;
		break;
	default:
		break;
	}
}

static __inline void
regxml_pushval(struct regxml_vstack *_vs, const union regxml_value *_val)
{
	_DIAGASSERT(_vs != NULL);
	_DIAGASSERT(_vs->rxvs_top < _vs->rxvs_size);
	_vs->rxvs_base[_vs->rxvs_top++] = *_val;
}

static __inline void
regxml_pushsharedval(struct regxml_vstack *_vs,
    const union regxml_value *_val)
{
	_DIAGASSERT(_vs != NULL);
	_DIAGASSERT(_vs->rxvs_top < _vs->rxvs_size);

	_vs->rxvs_base[_vs->rxvs_top] = *_val;
	switch (_val->rxv_unit._type) {
	case REGXML_STRING:
		_vs->rxvs_base[_vs->rxvs_top].rxv_string._shared = 1;
		break;
	case REGXML_REGEX:
		_vs->rxvs_base[_vs->rxvs_top].rxv_regex._shared = 1;
		break;
	default:
		break;
	}
	++_vs->rxvs_top;
}

static __inline regxml_vtype_t
regxml_gettype(struct regxml_vstack *_vs, int _k)
{
	return regxml_getval(_vs, _k)->rxv_unit._type;
}

static __inline double
regxml_getnumber(struct regxml_vstack *_vs, int _k)
{
	union regxml_value *_val;

	_val = regxml_getval(_vs, _k);
	_DIAGASSERT(_val->rxv_unit._type == REGXML_NUMBER);
	return _val->rxv_number._data;
}

static __inline const char *
regxml_getstring(struct regxml_vstack *_vs, int _k)
{
	union regxml_value *_val;

	_val = regxml_getval(_vs, _k);
	_DIAGASSERT(_val->rxv_unit._type == REGXML_STRING);
	return _val->rxv_string._data;
}

static __inline size_t
regxml_getlen(struct regxml_vstack *_vs, int _k)
{
	union regxml_value *_val;

	_val = regxml_getval(_vs, _k);
	_DIAGASSERT(_val->rxv_unit._type == REGXML_STRING);
	return _val->rxv_string._len;
}

static __inline const regex_t *
regxml_getregex(struct regxml_vstack *_vs, int _k)
{
	union regxml_value *_val;

	_val = regxml_getval(_vs, _k);
	_DIAGASSERT(_val->rxv_unit._type == REGXML_REGEX);
	return _val->rxv_regex._data;
}

static __inline struct regxml_elem *
regxml_getstart(struct regxml_vstack *_vs, int _k)
{
	union regxml_value *_val;

	_val = regxml_getval(_vs, _k);
	_DIAGASSERT(_val->rxv_unit._type == REGXML_XRANGE);
	return _val->rxv_xrange._start;
}

static __inline struct regxml_elem *
regxml_getend(struct regxml_vstack *_vs, int _k)
{
	union regxml_value *_val;

	_val = regxml_getval(_vs, _k);
	_DIAGASSERT(_val->rxv_unit._type == REGXML_XRANGE);
	return _val->rxv_xrange._end;
}

static __inline void
regxml_initunit(union regxml_value *_val)
{
	_DIAGASSERT(_val != NULL);
	_val->rxv_unit._type = REGXML_UNIT;
}

static __inline void
regxml_initbool(union regxml_value *_val, int _b)
{
	if (!_b)
		regxml_initunit(_val);
	else
		regxml_initnumber(_val, 1.0);
}

static __inline void
regxml_initnumber(union regxml_value *_val, double _d)
{
	_DIAGASSERT(_val != NULL);
	_val->rxv_number._type = REGXML_NUMBER;
	_val->rxv_number._data = _d;
}

static __inline int
regxml_initstring(union regxml_value *_val, const char *_s)
{
	_DIAGASSERT(_val != NULL);
	_val->rxv_string._type = REGXML_STRING;
	_val->rxv_string._shared = 0;
	_val->rxv_string._data = strdup(_s);
	if (_val->rxv_string._data == NULL)
		return REGXML_ESYSTEM;
	_val->rxv_string._len = strlen(_s);
	return 0;
}

static __inline void
regxml_initsharedstring(union regxml_value *_val, char *_s)
{
	_DIAGASSERT(_val != NULL);
	_val->rxv_string._type = REGXML_STRING;
	_val->rxv_string._shared = 1;
	_val->rxv_string._data = _s;
	_val->rxv_string._len = strlen(_s);
}

static __inline int
regxml_initregex(union regxml_value *_val, const char *_s, int _flags)
{
	_DIAGASSERT(_val != NULL);
	_val->rxv_regex._type = REGXML_REGEX;
	_val->rxv_regex._data = malloc(sizeof *_val->rxv_regex._data);
	if (_val->rxv_regex._data == NULL)
		return REGXML_ESYSTEM;
	if (regcomp(_val->rxv_regex._data, _s, _flags|REG_EXTENDED) != 0) {
		free(_val->rxv_regex._data);
		return REGXML_EREGEX;
	}
	return 0;
}

static __inline void
regxml_initxrange(union regxml_value *_val,
    struct regxml_elem *_start, struct regxml_elem *_end)
{
	_DIAGASSERT(_val != NULL);
	_val->rxv_xrange._type = REGXML_XRANGE;
	_val->rxv_xrange._start = _start;
	_val->rxv_xrange._end = _end;
	_DIAGASSERT(_start != REGXML_FAIL && _start != REGXML_WAIT);
	_DIAGASSERT(_end != REGXML_FAIL && _end != REGXML_WAIT);
	_DIAGASSERT((_start != NULL && _end != NULL) ||
	    (_start == NULL && _end == NULL));
	if (_end != NULL)
		regxml_hold(_start, _end);
}

static __inline void
regxml_setunit(struct regxml_vstack *_vs, int _k)
{
	union regxml_value _value;

	regxml_initunit(&_value);
	regxml_setval(_vs, _k, &_value);
}

static __inline void
regxml_setbool(struct regxml_vstack *_vs, int _k, int _b)
{
	union regxml_value _value;

	regxml_initbool(&_value, _b);
	regxml_setval(_vs, _k, &_value);
}

static __inline void
regxml_setnumber(struct regxml_vstack *_vs, int _k, double _d)
{
	union regxml_value _value;

	regxml_initnumber(&_value, _d);
	regxml_setval(_vs, _k, &_value);
}

static __inline int
regxml_setstring(struct regxml_vstack *_vs, int _k, const char *_s)
{
	union regxml_value _value;
	int _r;

	_r = regxml_initstring(&_value, _s);
	if (_r != 0)
		return _r;
	regxml_setval(_vs, _k, &_value);
	return 0;
}

static __inline void
regxml_setsharedstring(struct regxml_vstack *_vs, int _k, char *_s)
{
	union regxml_value _value;

	regxml_initsharedstring(&_value, _s);
	regxml_setval(_vs, _k, &_value);
}

static __inline int
regxml_setregex(struct regxml_vstack *_vs, int _k,
    const char *_s, int _flags)
{
	union regxml_value _value;
	int _r;

	_r = regxml_initregex(&_value, _s, _flags);
	if (_r != 0)
		return _r;
	regxml_setval(_vs, _k, &_value);
	return 0;
}

static __inline void
regxml_setxrange(struct regxml_vstack *_vs, int _k,
    struct regxml_elem *_start, struct regxml_elem *_end)
{
	union regxml_value _value;

	regxml_initxrange(&_value, _start, _end);
	regxml_setval(_vs, _k, &_value);
}

static __inline void
regxml_pushunit(struct regxml_vstack *_vs)
{
	union regxml_value _value;

	regxml_initunit(&_value);
	regxml_pushval(_vs, &_value);
}

static __inline void
regxml_pushbool(struct regxml_vstack *_vs, int _b)
{
	union regxml_value _value;

	regxml_initbool(&_value, _b);
	regxml_pushval(_vs, &_value);
}

static __inline void
regxml_pushnumber(struct regxml_vstack *_vs, double _d)
{
	union regxml_value _value;

	regxml_initnumber(&_value, _d);
	regxml_pushval(_vs, &_value);
}

static __inline int
regxml_pushstring(struct regxml_vstack *_vs, const char *_s)
{
	union regxml_value _value;
	int _r;

	_r = regxml_initstring(&_value, _s);
	if (_r != 0)
		return _r;
	regxml_pushval(_vs, &_value);
	return 0;
}

static __inline void
regxml_pushsharedstring(struct regxml_vstack *_vs, char *_s)
{
	union regxml_value _value;

	regxml_initsharedstring(&_value, _s);
	regxml_pushval(_vs, &_value);
}

static __inline int
regxml_pushregex(struct regxml_vstack *_vs, const char *_s, int _flags)
{
	union regxml_value _value;
	int _r;

	_r = regxml_initregex(&_value, _s, _flags);
	if (_r != 0)
		return _r;
	regxml_pushval(_vs, &_value);
	return 0;
}

static __inline void
regxml_pushxrange(struct regxml_vstack *_vs,
    struct regxml_elem *_start, struct regxml_elem *_end)
{
	union regxml_value _value;

	regxml_initxrange(&_value, _start, _end);
	regxml_pushval(_vs, &_value);
}

static __inline void
regxml_retpushunit(struct regxml_vstack *_vs, int _k)
{
	regxml_cleanvstack(_vs, _k);
	regxml_pushunit(_vs);
}

static __inline void
regxml_retpushbool(struct regxml_vstack *_vs, int _k, int _b)
{
	regxml_cleanvstack(_vs, _k);
	regxml_pushbool(_vs, _b);
}

static __inline void
regxml_retpushnumber(struct regxml_vstack *_vs, int _k, double _d)
{
	regxml_cleanvstack(_vs, _k);
	regxml_pushnumber(_vs, _d);
}

static __inline int
regxml_retpushstring(struct regxml_vstack *_vs, int _k, const char *_s)
{
	regxml_cleanvstack(_vs, _k);
	return regxml_pushstring(_vs, _s);
}

static __inline void
regxml_retpushsharedstring(struct regxml_vstack *_vs, int _k, char *_s)
{
	regxml_cleanvstack(_vs, _k);
	regxml_pushsharedstring(_vs, _s);
}

static __inline int
regxml_retpushregex(struct regxml_vstack *_vs, int _k,
    const char *_s, int _flags)
{
	regxml_cleanvstack(_vs, _k);
	return regxml_pushregex(_vs, _s, _flags);
}

static __inline void
regxml_retpushxrange(struct regxml_vstack *_vs, int _k,
    struct regxml_elem *_start, struct regxml_elem *_end)
{
	regxml_cleanvstack(_vs, _k);
	regxml_pushxrange(_vs, _start, _end);
}

#endif	/* !REGXML_H */
