This commit is contained in:
nephacks
2025-06-04 03:22:50 +02:00
parent f234f23848
commit f12416cffd
14243 changed files with 6446499 additions and 26 deletions

View File

@@ -0,0 +1,89 @@
/*===========================================================================
bistream.h
see parsifal.h for copyright info
===========================================================================*/
#ifndef BISTREAM__H
#define BISTREAM__H
#include <stddef.h>
#include <limits.h>
#ifdef ICONV_SUPPORT
#include "iconv.h"
#endif
#ifndef BYTE
#define BYTE unsigned char
#endif
#ifndef COUNTBUFSIZE
#define COUNTBUFSIZE(cBytes, blocksize) \
((!(cBytes)) ? (blocksize) : (!( (cBytes) % (blocksize) ) ? (int)(cBytes) : (int)( (((cBytes) / (blocksize)) + 1) * (blocksize) )) )
#endif
#define BIS_DEFAULT_MAXBUFSIZE INT_MAX
#define BIS_DEFAULT_BLOCKSIZE 512
#define BIS_CHAR_MAX 16
enum BIS_ERRORS { BIS_ERR_MEMALLOC = -40,
BIS_ERR_MAXBUF,
BIS_ERR_INVALIDARG,
BIS_ERR_ENCODING,
BIS_ERR_INPUT,
BIS_EOF = 1 };
typedef int (*LPFNINPUTSRC)(BYTE *buf, int cBytes, int *cBytesActual, void *inputData);
typedef struct tagBUFFEREDISTREAM
{
BYTE *buf;
BYTE *inbuf;
int bufsize;
int maxbufsize;
int blocksize;
int bytesavail;
int pos;
int eof;
int err;
int encerr;
int inbufrest;
void *userdata;
void *inputData;
LPFNINPUTSRC inputsrc;
size_t (*encode) (struct tagBUFFEREDISTREAM *reader, const BYTE **inbuf, size_t *inbytesleft, BYTE **outbuf, size_t *outbytesleft);
#ifdef ICONV_SUPPORT
iconv_t cd;
#endif
} BUFFEREDISTREAM, *LPBUFFEREDISTREAM;
typedef size_t (*LPFNENCODE) (LPBUFFEREDISTREAM r, const BYTE **inbuf, size_t *inbytesleft, BYTE **outbuf, size_t *outbytesleft);
#ifdef __cplusplus
extern "C" {
#endif
int BufferedIStream_EncodeBuffer(LPBUFFEREDISTREAM r);
int BufferedIStream_Peek(LPBUFFEREDISTREAM r,
const BYTE *tok,
int len,
int offset);
int BufferedIStream_ResetBuf(LPBUFFEREDISTREAM r,
int numBytes);
LPBUFFEREDISTREAM BufferedIStream_Init(LPBUFFEREDISTREAM r,
int blocksize);
int BufferedIStream_AppendBytes(LPBUFFEREDISTREAM r,
const BYTE *bytes,
int len);
void BufferedIStream_Free(LPBUFFEREDISTREAM r);
#ifdef __cplusplus
}
#endif /* __cplusplus */
#endif /* BISTREAM__H */

61
public/parsifal/isrcmem.h Normal file
View File

@@ -0,0 +1,61 @@
/*===========================================================================
isrcmem.h
Provides helper macros and typedefs for parsing memory buffers
see parsifal.h for copyright info
USAGE
declare inputsource handler function (note param names
must be exactly buf, cBytes etc.):
int MemInputsrc(BYTE *buf, int cBytes, int *cBytesActual, void *inputData);
int MemInputsrc(BYTE *buf, int cBytes, int *cBytesActual, void *inputData)
{
XMLMEMINPUTSRC_HANDLE
}
.....
LPXMLPARSER parser;
XMLMEMINPUTSRC meminput;
char *xml = "<root><child1/><child2>text</child2></root>";
XMLMEMINPUTSRC_INIT(&meminput, xml, strlen(xml));
XMLParser_Create(&parser);
XMLParser_Parse(parser, MemInputsrc, &meminput, NULL);
.....
===========================================================================*/
#ifndef ISRCMEM__H
#define ISRCMEM__H
typedef struct tagXMLMEMINPUTSRC
{
BYTE *pBuf;
unsigned long cTotal;
unsigned long cBytes;
} XMLMEMINPUTSRC, *LPXMLMEMINPUTSRC;
#define XMLMEMINPUTSRC_INIT(lpMemISrc,buf,size) \
(((LPXMLMEMINPUTSRC)lpMemISrc)->cTotal=(size), \
((LPXMLMEMINPUTSRC)lpMemISrc)->cBytes=0, \
((LPXMLMEMINPUTSRC)lpMemISrc)->pBuf=(buf))
#define XMLMEMINPUTSRC_HANDLE \
if ((((LPXMLMEMINPUTSRC)inputData)->cBytes + cBytes) < ((LPXMLMEMINPUTSRC)inputData)->cTotal) { \
memcpy(buf, ((LPXMLMEMINPUTSRC)inputData)->pBuf+((LPXMLMEMINPUTSRC)inputData)->cBytes, cBytes); \
*cBytesActual = cBytes; \
((LPXMLMEMINPUTSRC)inputData)->cBytes += cBytes; \
return 0; \
} \
else { \
*cBytesActual = ((LPXMLMEMINPUTSRC)inputData)->cTotal - ((LPXMLMEMINPUTSRC)inputData)->cBytes; \
if (*cBytesActual) { \
memcpy(buf, ((LPXMLMEMINPUTSRC)inputData)->pBuf+((LPXMLMEMINPUTSRC)inputData)->cBytes, *cBytesActual); \
((LPXMLMEMINPUTSRC)inputData)->cBytes += *cBytesActual; } \
return 1; \
}
#endif /* ISRCMEM__H */

257
public/parsifal/nametab.h Normal file
View File

@@ -0,0 +1,257 @@
/*===========================================================================
nametab.h
this header contains ASCII and UTF-8 tables for qualified
name checking + macros for comparing multibyte UTF-8 sequences:
UTF8_GET_NAMING2 and UTF8_GET_NAMING3
UTF8_GET_NAMING... and UTF-8 tables are stolen from EXPAT
note: DO NOT increment parameters in these macro calls i.e.
ISMAPCH(whitespace, *c++) will return invalid value
memory usage:
namingBitmap (16x80): 1280 bytes
nmstrtPages & namePages (2x8x32) 512 bytes
4 ascii tables (4x32) 128 bytes
---------------------------------------------------
total 1920 bytes
TODO: fix ascii tables (separate to pages to save space)
see parsifal.h for copyright info
===========================================================================*/
#ifndef NAMETAB__H
#define NAMETAB__H
#include "xmlcfg.h" /* for UINT32 */
/* ascii tables generated by GENMAPS.C */
static const XMLCH nameStartAscii[32] = {
0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x04,
0xFE, 0xFF, 0xFF, 0x87,
0xFE, 0xFF, 0xFF, 0x07,
0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00
}; /* ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz_: */
static const XMLCH nameAscii[32] = {
0x00, 0x00, 0x00, 0x00,
0x00, 0x60, 0xFF, 0x07,
0xFE, 0xFF, 0xFF, 0x87,
0xFE, 0xFF, 0xFF, 0x07,
0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00
}; /* ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz_:0123456789.- */
static const XMLCH whitespace[32] = {
0x00, 0x26, 0x00, 0x00,
0x01, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00
};
static const XMLCH illByte[32] = {
0xFF, 0xD9, 0xFF, 0xFF,
0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00
}; /* from \0x0 to \0x1f excluding whitespace chars 0x9, 0xA, 0xD */
#define utf8_isName2(c) (UTF8_GET_NAMING2(namePages, (c)))
#define utf8_isName3(c) (UTF8_GET_NAMING3(namePages, (c)))
#define utf8_isNmstrt2(c) (UTF8_GET_NAMING2(nmstrtPages, (c)))
#define utf8_isNmstrt3(c) (UTF8_GET_NAMING3(nmstrtPages, (c)))
/* the rest is stolen from EXPAT: */
/* A 2 byte UTF-8 representation splits the characters 11 bits
between the bottom 5 and 6 bits of the bytes.
We need 8 bits to index into pages, 3 bits to add to that index and
5 bits to generate the mask. */
#define UTF8_GET_NAMING2(pages, byte) \
(namingBitmap[((pages)[(((byte)[0]) >> 2) & 7] << 3) \
+ ((((byte)[0]) & 3) << 1) \
+ ((((byte)[1]) >> 5) & 1)] \
& (1 << (((byte)[1]) & 0x1F)))
/* A 3 byte UTF-8 representation splits the characters 16 bits
between the bottom 4, 6 and 6 bits of the bytes.
We need 8 bits to index into pages, 3 bits to add to that index and
5 bits to generate the mask. */
#define UTF8_GET_NAMING3(pages, byte) \
(namingBitmap[((pages)[((((byte)[0]) & 0xF) << 4) \
+ ((((byte)[1]) >> 2) & 0xF)] \
<< 3) \
+ ((((byte)[1]) & 3) << 1) \
+ ((((byte)[2]) >> 5) & 1)] \
& (1 << (((byte)[2]) & 0x1F)))
static const UINT32 namingBitmap[] = {
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
0x00000000, 0x04000000, 0x87FFFFFE, 0x07FFFFFE,
0x00000000, 0x00000000, 0xFF7FFFFF, 0xFF7FFFFF,
0xFFFFFFFF, 0x7FF3FFFF, 0xFFFFFDFE, 0x7FFFFFFF,
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFE00F, 0xFC31FFFF,
0x00FFFFFF, 0x00000000, 0xFFFF0000, 0xFFFFFFFF,
0xFFFFFFFF, 0xF80001FF, 0x00000003, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0xFFFFD740, 0xFFFFFFFB, 0x547F7FFF, 0x000FFFFD,
0xFFFFDFFE, 0xFFFFFFFF, 0xDFFEFFFF, 0xFFFFFFFF,
0xFFFF0003, 0xFFFFFFFF, 0xFFFF199F, 0x033FCFFF,
0x00000000, 0xFFFE0000, 0x027FFFFF, 0xFFFFFFFE,
0x0000007F, 0x00000000, 0xFFFF0000, 0x000707FF,
0x00000000, 0x07FFFFFE, 0x000007FE, 0xFFFE0000,
0xFFFFFFFF, 0x7CFFFFFF, 0x002F7FFF, 0x00000060,
0xFFFFFFE0, 0x23FFFFFF, 0xFF000000, 0x00000003,
0xFFF99FE0, 0x03C5FDFF, 0xB0000000, 0x00030003,
0xFFF987E0, 0x036DFDFF, 0x5E000000, 0x001C0000,
0xFFFBAFE0, 0x23EDFDFF, 0x00000000, 0x00000001,
0xFFF99FE0, 0x23CDFDFF, 0xB0000000, 0x00000003,
0xD63DC7E0, 0x03BFC718, 0x00000000, 0x00000000,
0xFFFDDFE0, 0x03EFFDFF, 0x00000000, 0x00000003,
0xFFFDDFE0, 0x03EFFDFF, 0x40000000, 0x00000003,
0xFFFDDFE0, 0x03FFFDFF, 0x00000000, 0x00000003,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0xFFFFFFFE, 0x000D7FFF, 0x0000003F, 0x00000000,
0xFEF02596, 0x200D6CAE, 0x0000001F, 0x00000000,
0x00000000, 0x00000000, 0xFFFFFEFF, 0x000003FF,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0xFFFFFFFF, 0xFFFF003F, 0x007FFFFF,
0x0007DAED, 0x50000000, 0x82315001, 0x002C62AB,
0x40000000, 0xF580C900, 0x00000007, 0x02010800,
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
0x0FFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x03FFFFFF,
0x3F3FFFFF, 0xFFFFFFFF, 0xAAFF3F3F, 0x3FFFFFFF,
0xFFFFFFFF, 0x5FDFFFFF, 0x0FCF1FDC, 0x1FDC1FFF,
0x00000000, 0x00004C40, 0x00000000, 0x00000000,
0x00000007, 0x00000000, 0x00000000, 0x00000000,
0x00000080, 0x000003FE, 0xFFFFFFFE, 0xFFFFFFFF,
0x001FFFFF, 0xFFFFFFFE, 0xFFFFFFFF, 0x07FFFFFF,
0xFFFFFFE0, 0x00001FFF, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
0xFFFFFFFF, 0x0000003F, 0x00000000, 0x00000000,
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
0xFFFFFFFF, 0x0000000F, 0x00000000, 0x00000000,
0x00000000, 0x07FF6000, 0x87FFFFFE, 0x07FFFFFE,
0x00000000, 0x00800000, 0xFF7FFFFF, 0xFF7FFFFF,
0x00FFFFFF, 0x00000000, 0xFFFF0000, 0xFFFFFFFF,
0xFFFFFFFF, 0xF80001FF, 0x00030003, 0x00000000,
0xFFFFFFFF, 0xFFFFFFFF, 0x0000003F, 0x00000003,
0xFFFFD7C0, 0xFFFFFFFB, 0x547F7FFF, 0x000FFFFD,
0xFFFFDFFE, 0xFFFFFFFF, 0xDFFEFFFF, 0xFFFFFFFF,
0xFFFF007B, 0xFFFFFFFF, 0xFFFF199F, 0x033FCFFF,
0x00000000, 0xFFFE0000, 0x027FFFFF, 0xFFFFFFFE,
0xFFFE007F, 0xBBFFFFFB, 0xFFFF0016, 0x000707FF,
0x00000000, 0x07FFFFFE, 0x0007FFFF, 0xFFFF03FF,
0xFFFFFFFF, 0x7CFFFFFF, 0xFFEF7FFF, 0x03FF3DFF,
0xFFFFFFEE, 0xF3FFFFFF, 0xFF1E3FFF, 0x0000FFCF,
0xFFF99FEE, 0xD3C5FDFF, 0xB080399F, 0x0003FFCF,
0xFFF987E4, 0xD36DFDFF, 0x5E003987, 0x001FFFC0,
0xFFFBAFEE, 0xF3EDFDFF, 0x00003BBF, 0x0000FFC1,
0xFFF99FEE, 0xF3CDFDFF, 0xB0C0398F, 0x0000FFC3,
0xD63DC7EC, 0xC3BFC718, 0x00803DC7, 0x0000FF80,
0xFFFDDFEE, 0xC3EFFDFF, 0x00603DDF, 0x0000FFC3,
0xFFFDDFEC, 0xC3EFFDFF, 0x40603DDF, 0x0000FFC3,
0xFFFDDFEC, 0xC3FFFDFF, 0x00803DCF, 0x0000FFC3,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0xFFFFFFFE, 0x07FF7FFF, 0x03FF7FFF, 0x00000000,
0xFEF02596, 0x3BFF6CAE, 0x03FF3F5F, 0x00000000,
0x03000000, 0xC2A003FF, 0xFFFFFEFF, 0xFFFE03FF,
0xFEBF0FDF, 0x02FE3FFF, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x1FFF0000, 0x00000002,
0x000000A0, 0x003EFFFE, 0xFFFFFFFE, 0xFFFFFFFF,
0x661FFFFF, 0xFFFFFFFE, 0xFFFFFFFF, 0x77FFFFFF,
};
static const unsigned char nmstrtPages[] = {
0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x00,
0x00, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
0x10, 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x12, 0x13,
0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x15, 0x16, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01,
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x17,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x01, 0x01,
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x18,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
};
static const unsigned char namePages[] = {
0x19, 0x03, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x00,
0x00, 0x1F, 0x20, 0x21, 0x22, 0x23, 0x24, 0x25,
0x10, 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x12, 0x13,
0x26, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x27, 0x16, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01,
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x17,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x01, 0x01,
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x18,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
};
#endif /* NAMETAB__H */

127
public/parsifal/optcfg.h Normal file
View File

@@ -0,0 +1,127 @@
/*===========================================================================
optcfg.h
This header contains optimization settings for Parsifal library
TODO
A lot. Currently contains only inline versions of some routines to be
selectively replaced in the critical areas of code. And yes, these
critical areas are identified by running profiler ;-)
===========================================================================*/
#ifdef _MSC_VER
#define INLINE __forceinline
#else
#ifdef __GNUC__
#define INLINE __inline__ __attribute__((always_inline))
#else
#define INLINE inline
#endif
#endif
#define GROWSBUF_OPT(l) \
if (sbuf->usePool) { \
if ((sbuf->len + (l)) > sbuf->pool->itemSize) { \
XMLCH *ts = sbuf->str; \
sbuf->usePool = 0; \
sbuf->capacity = COUNTBUFSIZE((sbuf->len + (l)), sbuf->blocksize); \
sbuf->str = (XMLCH*)malloc(sbuf->capacity * sizeof(XMLCH)); \
if (!sbuf->str) return ((XMLCH*)NULL); \
memcpy(sbuf->str, ts, sbuf->len); \
XMLPool_Free(sbuf->pool, ts); \
} \
} \
else if ((sbuf->len + (l)) > sbuf->capacity) { \
sbuf->capacity = COUNTBUFSIZE((sbuf->len + (l)), sbuf->blocksize); \
sbuf->str = (XMLCH*)realloc(sbuf->str, sbuf->capacity * sizeof(XMLCH)); \
if (!sbuf->str) return ((XMLCH*)NULL); \
}
static INLINE XMLCH *XMLStringbuf_Append_Opt(LPXMLSTRINGBUF sbuf, XMLCH *str, int len)
{
GROWSBUF_OPT(len);
if (len == 1) /* gives a slight performance gain */
sbuf->str[sbuf->len++] = *str;
else {
memcpy(sbuf->str+sbuf->len, str, len);
sbuf->len += len;
}
return (sbuf->str);
}
static INLINE XMLCH *ReadCh_Opt(LPXMLPARSER parser, int *chSize)
{
XMLCH *c;
int ret;
if (!PREADER->buf || PREADER->pos >= PREADER->bytesavail) {
ret = PEEKINPUT((const BYTE*)NULL, 1);
if (EINPUT(ret) || ret) {
#ifdef DTD_SUPPORT
if (RT->dtd && !parser->ErrorCode &&
RT->dtd->peStack->length &&
((*((LPBUFFEREDISTREAM*)STACK_PEEK(RT->dtd->peStack)))) == PREADER)
return ReadPERefEnd(parser, chSize);
else {
#endif
*chSize = 0;
return (XMLCH*)NULL;
#ifdef DTD_SUPPORT
}
#endif
}
}
c = PREADER->buf+PREADER->pos;
UTF8LEN(c,*chSize);
if (*chSize == 1) {
if (ISILLBYTE(*c)) {
*chSize = 0;
ErP_(parser, ERR_XMLP_ILLEGAL_CHAR, 1);
return (XMLCH*)NULL;
}
PREADER->pos++;
if (*c == 0xD) {
PREADERDATA->line++;
PREADERDATA->col=0;
PREADER->buf[PREADER->pos-1] = 0xA;
if (PREADER->pos >= PREADER->bytesavail) {
ret = PEEKINPUT((const BYTE*)NULL, 1);
if (EINPUT(ret)) {
*chSize = 0;
return (XMLCH*)NULL;
}
c = PREADER->buf+(PREADER->pos-1);
if (ret) return(c);
}
if (CURCHAR == 0xA) PREADER->pos++;
}
else if (*c == 0xA) {
PREADERDATA->line++;
PREADERDATA->col=0;
}
#ifdef DTD_SUPPORT
else if (*c == '%' && RT->dtd && RT->dtd->expandPEs) {
PREADERDATA->col++;
c = ReadPERefStart(parser, chSize);
}
#endif
else PREADERDATA->col++;
}
else {
if (*chSize == 3 && UTF8_ISILL3(c)) {
*chSize = 0;
ErP_(parser, ERR_XMLP_ILLEGAL_CHAR, 0);
return (XMLCH*)NULL;
}
else if (*chSize == 4 && UTF8_ISILL4(c)) {
*chSize = 0;
ErP_(parser, ERR_XMLP_ILLEGAL_CHAR, 0);
return (XMLCH*)NULL;
}
PREADER->pos += *chSize;
PREADERDATA->col += *chSize;
}
return(c);
}

253
public/parsifal/parsifal.h Normal file
View File

@@ -0,0 +1,253 @@
/*===========================================================================
Parsifal XML Parser
Copyright (c) 2002-2004 Toni Uusitalo
released to the public domain 2002-11-15
http://www.saunalahti.fi/~samiuus/toni/xmlproc/
Parsifal is free for both commercial and non-commercial use and
redistribution, provided that author's copyright and disclaimer are
retained intact. You are free to modify Parsifal for your own use and
to redistribute Parsifal with your modifications, provided that the
modifications are clearly documented.
DISCLAIMER
----------
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
Merchantability or fitness for a particular purpose. Please use it AT
YOUR OWN RISK.
===========================================================================*/
#ifndef PARSIFAL__H
#define PARSIFAL__H
#ifdef __cplusplus
extern "C" {
#endif
#include "bistream.h"
#include "xmlhash.h"
#include "xmlvect.h"
#include "xmlsbuf.h"
#include "xmlpool.h"
#ifndef XMLCH_DEFINED
#define XMLCH_DEFINED
typedef unsigned char XMLCH;
#endif
typedef struct tagXMLRUNTIMETAG
{
XMLCH *qname;
XMLCH *uri;
XMLCH *localName;
XMLCH *prefix;
LPXMLHTABLE Scope;
LPXMLHTABLE prevScope;
XMLSTRINGBUF nameBuf;
} XMLRUNTIMETAG, *LPXMLRUNTIMETAG;
typedef struct tagXMLRUNTIMEATT
{
XMLCH *qname;
XMLCH *value;
XMLCH *uri;
XMLCH *localName;
XMLCH *prefix;
XMLSTRINGBUF nameBuf;
XMLSTRINGBUF valBuf;
} XMLRUNTIMEATT, *LPXMLRUNTIMEATT;
typedef struct tagXMLPARSERRUNTIME
{
LPXMLHTABLE nsScope;
LPXMLHTABLE namedAtts;
LPXMLHTABLE entitiesTable;
LPXMLHTABLE declAttTable;
LPXMLVECTOR atts;
LPXMLVECTOR tagstack;
LPXMLVECTOR entities;
LPBUFFEREDISTREAM refReader;
XMLCH *doctypeName, *publicID, *systemID;
const XMLCH *nameStart;
XMLSTRINGBUF charsBuf;
LPXMLPOOL strPool;
struct tagDTD *dtd;
} XMLPARSERRUNTIME, *LPXMLPARSERRUNTIME;
enum tagXMLERRCODE {
ERR_XMLP_MEMORY_ALLOC = 1,
ERR_XMLP_READER_FATAL,
ERR_XMLP_INVALID_TOKEN,
ERR_XMLP_INVALID_NAME,
ERR_XMLP_INVALID_END_TAG,
ERR_XMLP_UNDEF_ENTITY,
ERR_XMLP_WS_NOT_ALLOWED,
ERR_XMLP_WS_REQUIRED,
ERR_XMLP_UNCLOSED_TAG,
ERR_XMLP_EXPECTED_FOUND,
ERR_XMLP_EXPECTED_TOKEN,
ERR_XMLP_MULTIPLE_TOP,
ERR_XMLP_INVALID_AT_TOP,
ERR_XMLP_UNDEF_NSPREFIX,
ERR_XMLP_DUPL_ATTRIBUTE,
ERR_XMLP_ENCODING,
ERR_XMLP_UNSUP_ENCODING,
ERR_XMLP_INVALID_DECL,
ERR_XMLP_INVALID_ATT_VALUE,
ERR_XMLP_ABORT,
ERR_XMLP_ILLEGAL_CHAR,
ERR_XMLP_RECURSIVE_ENTITY_REF,
ERR_XMLP_IO,
ERR_XMLP_SWITCH_ENCODING
};
typedef enum tagXMLERRCODE XMLERRCODE;
enum tagXMLENTITYTYPE {
XML_ENTITY_INT_PARAM = 1,
XML_ENTITY_INT_GEN,
XML_ENTITY_EXT_PARAM,
XML_ENTITY_EXT_GEN,
XML_ENTITY_UNPARSED,
XML_ENTITY_DOCTYPE
};
typedef enum tagXMLENTITYTYPE XMLENTITYTYPE;
#define XMLFLAG_NAMESPACES 0x1 /* http://xml.org/sax/features/namespaces */
#define XMLFLAG_NAMESPACE_PREFIXES 0x2 /* http://xml.org/sax/features/namespace-prefixes */
#define XMLFLAG_EXTERNAL_GENERAL_ENTITIES 0x4 /* http://xml.org/sax/features/external-general-entities */
#define XMLFLAG_PRESERVE_GENERAL_ENTITIES 0x8
#define XMLFLAG_UNDEF_GENERAL_ENTITIES 0x10
#define XMLFLAG_PRESERVE_WS_ATTRIBUTES 0x20
#define XMLFLAG_CONVERT_EOL 0x40
/* for XML_ATTRIBUTEDECL_HANDLER valueDef param: */
#define XMLATTDECL_DEF_FIXED 1
#define XMLATTDECL_DEF_REQUIRED 2
#define XMLATTDECL_DEF_IMPLIED 3
/* for XML_ATTRIBUTEDECL_HANDLER type param: */
#define XMLATTDECL_TYPE_CDATA 1
#define XMLATTDECL_TYPE_ID 2
#define XMLATTDECL_TYPE_IDREF 3
#define XMLATTDECL_TYPE_IDREFS 4
#define XMLATTDECL_TYPE_ENTITY 5
#define XMLATTDECL_TYPE_ENTITIES 6
#define XMLATTDECL_TYPE_NMTOKEN 7
#define XMLATTDECL_TYPE_NMTOKENS 8
#define XMLATTDECL_TYPE_NOTATION 9
#define XMLATTDECL_TYPE_ENUMERATED 10
typedef struct tagXMLENTITY
{
XMLENTITYTYPE type;
int len;
int open;
XMLCH *name;
XMLCH *value;
XMLCH *publicID;
XMLCH *systemID;
XMLCH *notation;
} XMLENTITY, *LPXMLENTITY;
typedef int (*XML_EVENT_HANDLER)(void *UserData);
typedef int (*XML_START_ELEMENT_HANDLER)(void *UserData, const XMLCH *uri,
const XMLCH *localName, const XMLCH *qName,
LPXMLVECTOR atts);
typedef int (*XML_END_ELEMENT_HANDLER)(void *UserData, const XMLCH *uri,
const XMLCH *localName, const XMLCH *qName);
typedef int (*XML_CHARACTERS_HANDLER)(void *UserData, const XMLCH *chars, int cbSize);
typedef int (*XML_PI_HANDLER)(void *UserData, const XMLCH *target, const XMLCH *data);
typedef int (*XML_START_DTD_HANDLER)(void *UserData, const XMLCH *name,
const XMLCH *publicId, const XMLCH *systemId,
int hasInternalSubset);
typedef int (*XML_XMLDECL_HANDLER)(void *UserData, const XMLCH *version,
const XMLCH *encoding, const XMLCH *standalone);
typedef int (*XML_RESOLVE_ENTITY_HANDLER)(void *UserData, LPXMLENTITY entity,
LPBUFFEREDISTREAM reader);
typedef int (*XML_SKIPPED_ENTITY_HANDLER)(void *UserData, const XMLCH *name);
typedef int (*XML_ENTITY_EVENT_HANDLER)(void *UserData, LPXMLENTITY entity);
typedef int (*XML_ATTRIBUTEDECL_HANDLER)(void *UserData, const XMLCH *eName,
const XMLCH *aName, int type, const XMLCH *typeStr, int valueDef,
const XMLCH *def);
typedef int (*XML_ELEMENTDECL_HANDLER)(void *UserData, const XMLCH *name,
void *contentModel);
typedef int (*XML_NOTATIONDECL_HANDLER)(void *UserData, const XMLCH *name,
const XMLCH *publicID, const XMLCH *systemID);
typedef struct tagXMLPARSER
{
LPBUFFEREDISTREAM reader;
LPXMLPARSERRUNTIME prt;
XMLCH *DocumentElement;
XMLCH ErrorString[128];
int ErrorCode;
int ErrorLine;
int ErrorColumn;
void *UserData;
unsigned long XMLFlags;
XML_EVENT_HANDLER startDocumentHandler;
XML_EVENT_HANDLER endDocumentHandler;
XML_EVENT_HANDLER startCDATAHandler;
XML_EVENT_HANDLER endCDATAHandler;
XML_EVENT_HANDLER endDTDHandler;
XML_CHARACTERS_HANDLER charactersHandler;
XML_CHARACTERS_HANDLER ignorableWhitespaceHandler;
XML_CHARACTERS_HANDLER commentHandler;
XML_CHARACTERS_HANDLER defaultHandler;
XML_START_ELEMENT_HANDLER startElementHandler;
XML_END_ELEMENT_HANDLER endElementHandler;
XML_PI_HANDLER processingInstructionHandler;
XML_START_DTD_HANDLER startDTDHandler;
XML_XMLDECL_HANDLER xmlDeclHandler;
XML_SKIPPED_ENTITY_HANDLER skippedEntityHandler;
XML_ENTITY_EVENT_HANDLER startEntityHandler;
XML_ENTITY_EVENT_HANDLER endEntityHandler;
XML_RESOLVE_ENTITY_HANDLER resolveEntityHandler;
XML_RESOLVE_ENTITY_HANDLER externalEntityParsedHandler;
XML_ATTRIBUTEDECL_HANDLER attributeDeclHandler;
XML_ELEMENTDECL_HANDLER elementDeclHandler;
XML_ENTITY_EVENT_HANDLER entityDeclHandler;
XML_NOTATIONDECL_HANDLER notationDeclHandler;
/*XML_ERROR_HANDLER */ void (*errorHandler)(struct tagXMLPARSER *parser);
} XMLPARSER, *LPXMLPARSER;
typedef void (*XML_ERROR_HANDLER)(LPXMLPARSER parser);
#ifndef XMLAPI
#define XMLAPI
#endif
#define XML_OK 0
#define XML_ABORT 1
#define _XMLParser_SetFlag(parser,flag,valBool) \
((valBool) ? (((LPXMLPARSER)parser)->XMLFlags |= (flag)) : \
(((LPXMLPARSER)parser)->XMLFlags &= ~(flag)) )
#define _XMLParser_GetFlag(parser,flag) \
((((LPXMLPARSER)parser)->XMLFlags & (flag)) == (flag))
#define _XMLParser_AttIsDefaulted(att) (!(att->nameBuf.str))
LPXMLPARSER XMLAPI XMLParser_Create(LPXMLPARSER *parser);
int XMLAPI XMLParser_Parse(LPXMLPARSER parser, LPFNINPUTSRC inputSrc, void *inputData, const XMLCH *encoding);
void XMLAPI XMLParser_Free(LPXMLPARSER parser);
LPXMLRUNTIMEATT XMLAPI XMLParser_GetNamedItem(LPXMLPARSER parser, const XMLCH *name);
XMLCH XMLAPI *XMLParser_GetSystemID(LPXMLPARSER parser);
XMLCH XMLAPI *XMLParser_GetPublicID(LPXMLPARSER parser);
XMLCH XMLAPI *XMLParser_GetPrefixMapping(LPXMLPARSER parser, const XMLCH *prefix);
int XMLAPI XMLParser_GetCurrentLine(LPXMLPARSER parser);
int XMLAPI XMLParser_GetCurrentColumn(LPXMLPARSER parser);
LPXMLENTITY XMLAPI XMLParser_GetCurrentEntity(LPXMLPARSER parser);
XMLCH XMLAPI *XMLParser_GetVersionString();
int XMLAPI XMLNormalizeBuf(XMLCH *buf, int len);
#ifdef __cplusplus
}
#endif /* __cplusplus */
#endif /* PARSIFAL__H */

92
public/parsifal/xmlcfg.h Normal file
View File

@@ -0,0 +1,92 @@
/*===========================================================================
xmlcfg.h
Global/platform spesific definitions + configuration for
GNU libiconv support etc.
===========================================================================*/
#ifndef XMLCFG__H
#define XMLCFG__H
#ifdef ICONV_SUPPORT
#include "iconv.h"
#else
/* definitions for BufferedIStream encoding callback's encerr error values
(same as errnos set by iconv function) */
#include <errno.h>
#ifndef E2BIG
/* there isn't enough space in outbuf for multibyte char -
request for enlarge the buffer */
#define E2BIG 7
#endif
#ifndef EINVAL
/* partial character in inbuf */
#define EINVAL 22
#endif
#ifndef EILSEQ
/* illegal character in inbuf */
#define EILSEQ 42
#endif
#endif /* ICONV_SUPPORT */
/* util routines: */
#ifndef HAS_STRICMP
extern int stricmp(const char *s1, const char *s2);
#endif
extern unsigned char* xmlMemdup(unsigned char* buf, int len);
/* <stdint.h> would do the trick... but: */
#if (defined(__GNUC__) && defined(__i386__) && !defined(UINT16))
#define UINT16 unsigned short int
#define INT16 short int
#define UINT32 unsigned long int
#define INT32 long int
#endif
#if (defined(_MSC_VER) && !defined(UINT16))
#define UINT16 unsigned short int
#define INT16 short int
#define UINT32 unsigned long int
#define INT32 long int
#endif
#if (defined(__BORLANDC__) && !defined(UINT16))
#define UINT16 unsigned short int
#define INT16 short int
#define UINT32 unsigned long int
#define INT32 long int
#endif
#if (defined(VAXC) && !defined(UINT16))
#define UINT16 unsigned short int
#define INT16 short int
#define UINT32 unsigned long int
#define INT32 long int
#endif
#if (defined(_IBMR2) && !defined(UINT16))
#define UINT16 unsigned short int
#define INT16 short int
#define UINT32 unsigned long int
#define INT32 long int
#endif
#if (defined(sparc) && !defined(UINT16))
#define UINT16 unsigned short int
#define INT16 short int
#define UINT32 unsigned int
#define INT32 int
#endif
#if (defined(__mips) && !defined(UINT16))
#define UINT16 unsigned short int
#define INT16 short int
#define UINT32 unsigned int
#define INT32 int
#endif
#ifndef UINT32
#error "Error: UINT32 cannot be determined for your platform - You must define it manually."
#endif
#endif /* XMLCFG__H */

463
public/parsifal/xmldef.h Normal file
View File

@@ -0,0 +1,463 @@
/*===========================================================================
xmldef.h
definitions, macros and function prototypes for parsifal.c
see parsifal.h for copyright info
===========================================================================*/
#ifndef XMLDEF__H
#define XMLDEF__H
#include <assert.h>
#ifdef _MSC_VER
#ifdef _DEBUG
#include <crtdbg.h>
#define _CRTDBG_MAP_ALLOC
#endif
#define vsnprintf _vsnprintf
#endif
#define TOK_XMLNS "xmlns"
#define TOK_XML "xml"
#define UTF8_2BYTES 0xc0
#define UTF8_3BYTES 0xe0
#define UTF8_4BYTES 0xf0
#define UTF8LEN(c,o) \
if (!(*c & 0x80)) o = 1; \
else if ((unsigned int)*c <= 0xdf) o = 2; \
else if ((unsigned int)*c <= 0xef) o = 3; \
else if ((unsigned int)*c <= 0xf7) o = 4; \
else if ((unsigned int)*c <= 0xfb) o = 5; \
else o = 6;
#define XMLATT_NORMAL 0
#define XMLATT_WITHNS 1
#define XMLATT_PREFIXDECL 2
#define XMLATT_DEFAULTDECL 3
#define XMLATT_XMLPREFIXDECL 4
/* isspace gives us trouble trimming some utf-8 trailbytes... */
#ifdef isspace
#undef isspace
#endif
#define isspace(c) (ISMAPCH(whitespace, (c)))
#define SAFE_FREESTR(s) if (s) { free(s); s = (XMLCH*)NULL; }
/* BUFTOSTR. similar to BufferedIStream_ToString, but modifies
(nul terminates) actual buffer. There's no bounds checking like in _ToString,
also must be sure that returned string stays valid (no _Read operations)
Note that like in _ToString, last param is endPos, not length.
macro can be used when BufferedIStream is in pos ?> for example
and we can replace ? with \0 and use buffer as a string and
avoid memory/time consuming _ToString call. */
#define BUFTOSTR(buf,startPos,endPos) \
((!(startPos)) ? (*((buf)+(endPos))='\0', (buf)) : \
(*((buf)+(endPos))='\0', ((buf)+(startPos))) )
/* ISMAPCH macro for comparing ascii map char */
#define ISMAPCH(map, byte) ((map)[(byte) >> 3] & (1 << ((byte) & 7)))
#define ISILLBYTE(c) (c < 32 && ISMAPCH(illByte, (c)))
/*
The code points U+D800 to U+DFFF will never be assigned to characters.
Other invalid sequences are code points 0xFFFF and 0xFFFE
(EF,BF,BF and EF,BF,BE)
5.1 Single UTF-16 surrogates
5.1.1 U+D800 = ed a0 80
5.1.2 U+DB7F = ed ad bf
5.1.3 U+DB80 = ed ae 80
5.1.4 U+DBFF = ed af bf
5.1.5 U+DC00 = ed b0 80
5.1.6 U+DF80 = ed be 80
5.1.7 U+DFFF = ed bf bf
5.3 Other illegal code positions
5.3.1 U+FFFE = ef bf be
5.3.2 U+FFFF = ef bf bf
note:
sequence bytes c[1]-c[3] can never be < 0x80 (10000000)
and last byte can never be > 0xBF (10111111)
these are ensured in UTF-8 conversion */
#define UTF8_ISILL3(c) \
((*c) == 0xEF && (c)[1] == 0xBF ? (c)[2] > 0xBD : \
(*c) == 0xED && (c)[1] > 0x9F)
/* see http://www.unicode.org/unicode/reports/tr28/ table 3.1B */
#define UTF8_ISILL4(c) ((*c) == 0xF4 && (c)[1] > 0x8F)
/* BISREADERDATA is put into parser->reader->userdata.
Each parsed entity has its own reader and its own
BISREADERDATA. The "main parser"'s
BISREADERDATA is allocated in Parser_Create and
initialized in Parser_Parse (see also InitEntityReader) */
typedef struct tagBISREADERDATA
{
LPXMLPARSER parser;
LPXMLENTITY curEnt; /* current entity (NULL for doc entity and internal entities) */
int iCurPE;
int line;
int col;
int stackLevel;
int lEndian; /* flag to indicate that parser's using little-endian encoding, might
not be correct when byte order mark isn't present */
int context; /* reader specific context flags */
XMLCH EncodingName[20];
} BISREADERDATA, *LPBISREADERDATA;
#define XMLREADERCTX_PE_SPACE 0x1
/* DECLATT represents DTD declared attribute. Array of DECLATTs
is stored in declAttTable hashtable for expansion of default attribute
values (value member !NULL) or special normalization rules; if type
isn't CDATA normalization follow rules specified in XMLSPEC
3.3.3 Attribute-Value Normalization */
struct tagDECLATT
{
XMLCH *name, *value;
int nameLen, prefixLen, valueLen, type, defaultDecl;
};
struct tagDTD
{
int expandPEs, expandPEsaved, inLiteral, inclSect;
LPXMLVECTOR peStack;
XMLSTRINGBUF sbuf;
XMLRUNTIMEATT att;
LPBUFFEREDISTREAM refReader;
LPXMLPOOL pePool;
};
/* STACK macro wrapper around Vector; can be used as tag stack in our
case, even though Vector is optimized for sequential index access.
Tag stack isn't likely to grow/shrink that much in xml parsing when
using CapacityIncrement 16 x RUNTIMETAG (tag nesting level).
STACK_POP always removes last item from Vector and that is efficient
too. Note: Vector handles bounds checking in _Get and in _Remove */
#define STACK_PUSH(stack,item) (XMLVector_Append((stack), (item)))
#define STACK_PEEK(stack) (XMLVector_Get((stack),(stack)->length-1))
#define STACK_REMOVE(stack) (XMLVector_Remove((stack), (stack)->length-1))
#define STACK_POP(stack,item) \
( ((stack)->length) ? (memcpy((item), STACK_PEEK((stack)), (stack)->itemSize), \
STACK_REMOVE((stack)), (item)) : NULL)
extern size_t Latin1ToUtf8 (LPBUFFEREDISTREAM r, const BYTE **inbuf, size_t *inbytesleft, BYTE **outbuf, size_t *outbytesleft);
extern size_t Utf8ToUtf8 (LPBUFFEREDISTREAM r, const BYTE **inbuf, size_t *inbytesleft, BYTE **outbuf, size_t *outbytesleft);
#ifdef ICONV_SUPPORT
extern size_t iconvWrapper (LPBUFFEREDISTREAM r, const BYTE **inbuf, size_t *inbytesleft, BYTE **outbuf, size_t *outbytesleft);
#endif
static XMLCH EmptyStr[1] = {'\0'};
static XMLCH *uriXMLNS = "http://www.w3.org/2000/xmlns/";
static XMLCH *uriXML = "http://www.w3.org/XML/1998/namespace";
#define EINPUT(r) (((r) < -2) ? (SetReaderFatal(((LPXMLPARSER)parser), (r)), 1) : 0)
#define ISXMLPREFIX(s) ((*(s) == 'x' && s[1] == 'm' && s[2] == 'l'))
#define ISXMLNSPREFIX(s) ((ISXMLPREFIX((s)) && s[3] == 'n' && s[4] == 's'))
#define ISQUOTE(c) ((c)=='\"' || (c)=='\'')
#define REQUIRE_WS(r) (((r = SkipWS(parser))==0) ? \
ErP_(parser, ERR_XMLP_WS_REQUIRED, 0) : ((r==-1) ? 0 : r))
#define DPOS(bytes) \
PREADER->pos-=(bytes); \
PREADERDATA->col-=(bytes);
#define IPOS(bytes) \
PREADER->pos+=(bytes); \
PREADERDATA->col+=(bytes);
#define DPOS_LF(bytes) \
if (PREADER->buf[PREADER->pos-1] == 0x0A) \
PREADERDATA->line--; \
DPOS(bytes);
#define DTDTOK_START(PEs) (\
RT->dtd->expandPEsaved = RT->dtd->expandPEs, \
RT->dtd->expandPEs = PEs \
)
#define DTDTOK_END \
if (RT->dtd->expandPEs != RT->dtd->expandPEsaved) \
RT->dtd->expandPEs = RT->dtd->expandPEsaved
/* some shortcuts: */
#define RT parser->prt
#define PREADER ((LPBUFFEREDISTREAM)parser->reader)
#define PREADERDATA ((LPBISREADERDATA)PREADER->userdata)
#define HANDLER(n) parser->n##Handler
#define PEEKINPUT(str,len) BufferedIStream_Peek(PREADER,(str),(len),0)
#define CURCHAR (assert(PREADER->pos<PREADER->bytesavail), PREADER->buf[PREADER->pos])
/* programmatically generated trie tables (reTRIEval algorithm). Trie algo
suits our needs very well - brute force strcmp can infact
be more efficient for small dictionarys (usually strcmp is well optimized and
fast) but since we must use ReadCh (in TrieTok()) and memcmp (in TrieRaw()
via BufferedIStream_Peek) we're getting perfomance gain. Especially DTD token
parsing benefits from tries - no excessive ReadCh calls and UTF-8 checks
anymore etc. See TrieTok and TrieRaw */
struct trie {
const char c; /* current char to test */
const struct trie *n; /* next char/trie or token number if c is '\0' */
const struct trie *sib; /* choice or NULL if there is none */
};
#define T_N_ TRxmlTok
static struct trie const T_N_[] = {
/* 0 */ {'!', T_N_+1, T_N_+9},
/* 1 */ {'[', T_N_+2, T_N_+11},
/* 2 */ {'C', T_N_+3, NULL},
/* 3 */ {'D', T_N_+4, NULL},
/* 4 */ {'A', T_N_+5, NULL},
/* 5 */ {'T', T_N_+6, NULL},
/* 6 */ {'A', T_N_+7, NULL},
/* 7 */ {'[', T_N_+8, NULL},
/* 8 */ {'\0', (struct trie*)1, NULL},
/* 9 */ {'?', T_N_+10, T_N_+14},
/* 10 */ {'\0', (struct trie*)2, NULL},
/* 11 */ {'-', T_N_+12, T_N_+16},
/* 12 */ {'-', T_N_+13, NULL},
/* 13 */ {'\0', (struct trie*)3, NULL},
/* 14 */ {'/', T_N_+15, NULL},
/* 15 */ {'\0', (struct trie*)4, NULL},
/* 16 */ {'D', T_N_+17, NULL},
/* 17 */ {'O', T_N_+18, NULL},
/* 18 */ {'C', T_N_+19, NULL},
/* 19 */ {'T', T_N_+20, NULL},
/* 20 */ {'Y', T_N_+21, NULL},
/* 21 */ {'P', T_N_+22, NULL},
/* 22 */ {'E', T_N_+23, NULL},
/* 23 */ {'\0', (struct trie*)5, NULL}
};
#undef T_N_
#define T_N_ TRstdEnt
static struct trie const T_N_[] = {
/* 0 */ {'g', T_N_+1, T_N_+4},
/* 1 */ {'t', T_N_+2, NULL},
/* 2 */ {';', T_N_+3, NULL},
/* 3 */ {'\0', (struct trie*)1, NULL},
/* 4 */ {'l', T_N_+5, T_N_+8},
/* 5 */ {'t', T_N_+6, NULL},
/* 6 */ {';', T_N_+7, NULL},
/* 7 */ {'\0', (struct trie*)2, NULL},
/* 8 */ {'a', T_N_+9, T_N_+18},
/* 9 */ {'m', T_N_+10, T_N_+13},
/* 10 */ {'p', T_N_+11, NULL},
/* 11 */ {';', T_N_+12, NULL},
/* 12 */ {'\0', (struct trie*)3, NULL},
/* 13 */ {'p', T_N_+14, NULL},
/* 14 */ {'o', T_N_+15, NULL},
/* 15 */ {'s', T_N_+16, NULL},
/* 16 */ {';', T_N_+17, NULL},
/* 17 */ {'\0', (struct trie*)4, NULL},
/* 18 */ {'q', T_N_+19, NULL},
/* 19 */ {'u', T_N_+20, NULL},
/* 20 */ {'o', T_N_+21, NULL},
/* 21 */ {'t', T_N_+22, NULL},
/* 22 */ {';', T_N_+23, NULL},
/* 23 */ {'\0', (struct trie*)5, NULL}
};
#undef T_N_
#define T_N_ TRxmlDecl
static struct trie const T_N_[] = {
/* 0 */ {'v', T_N_+1, T_N_+8},
/* 1 */ {'e', T_N_+2, NULL},
/* 2 */ {'r', T_N_+3, NULL},
/* 3 */ {'s', T_N_+4, NULL},
/* 4 */ {'i', T_N_+5, NULL},
/* 5 */ {'o', T_N_+6, NULL},
/* 6 */ {'n', T_N_+7, NULL},
/* 7 */ {'\0', (struct trie*)1, NULL},
/* 8 */ {'e', T_N_+9, T_N_+17},
/* 9 */ {'n', T_N_+10, NULL},
/* 10 */ {'c', T_N_+11, NULL},
/* 11 */ {'o', T_N_+12, NULL},
/* 12 */ {'d', T_N_+13, NULL},
/* 13 */ {'i', T_N_+14, NULL},
/* 14 */ {'n', T_N_+15, NULL},
/* 15 */ {'g', T_N_+16, NULL},
/* 16 */ {'\0', (struct trie*)2, NULL},
/* 17 */ {'s', T_N_+18, NULL},
/* 18 */ {'t', T_N_+19, NULL},
/* 19 */ {'a', T_N_+20, NULL},
/* 20 */ {'n', T_N_+21, NULL},
/* 21 */ {'d', T_N_+22, NULL},
/* 22 */ {'a', T_N_+23, NULL},
/* 23 */ {'l', T_N_+24, NULL},
/* 24 */ {'o', T_N_+25, NULL},
/* 25 */ {'n', T_N_+26, NULL},
/* 26 */ {'e', T_N_+27, NULL},
/* 27 */ {'\0', (struct trie*)3, NULL}
};
#undef T_N_
#define T_N_ TRexternalID
struct trie T_N_[] = {
/* 0 */ {'P', T_N_+1, T_N_+7},
/* 1 */ {'U', T_N_+2, NULL},
/* 2 */ {'B', T_N_+3, NULL},
/* 3 */ {'L', T_N_+4, NULL},
/* 4 */ {'I', T_N_+5, NULL},
/* 5 */ {'C', T_N_+6, NULL},
/* 6 */ {'\0', (struct trie*)1, NULL},
/* 7 */ {'S', T_N_+8, NULL},
/* 8 */ {'Y', T_N_+9, NULL},
/* 9 */ {'S', T_N_+10, NULL},
/* 10 */ {'T', T_N_+11, NULL},
/* 11 */ {'E', T_N_+12, NULL},
/* 12 */ {'M', T_N_+13, NULL},
/* 13 */ {'\0', (struct trie*)2, NULL}
};
#undef T_N_
#ifdef DTD_SUPPORT
#define T_N_ TRdtdTok
static struct trie const T_N_[] = {
/* 0 */ {'<', T_N_+1, NULL},
/* 1 */ {'!', T_N_+2, T_N_+7},
/* 2 */ {'-', T_N_+3, T_N_+5},
/* 3 */ {'-', T_N_+4, NULL},
/* 4 */ {'\0', (struct trie*)1, NULL},
/* 5 */ {'[', T_N_+6, T_N_+9},
/* 6 */ {'\0', (struct trie*)2, NULL},
/* 7 */ {'?', T_N_+8, NULL},
/* 8 */ {'\0', (struct trie*)3, NULL},
/* 9 */ {'E', T_N_+10, T_N_+23},
/* 10 */ {'N', T_N_+11, T_N_+16},
/* 11 */ {'T', T_N_+12, NULL},
/* 12 */ {'I', T_N_+13, NULL},
/* 13 */ {'T', T_N_+14, NULL},
/* 14 */ {'Y', T_N_+15, NULL},
/* 15 */ {'\0', (struct trie*)4, NULL},
/* 16 */ {'L', T_N_+17, NULL},
/* 17 */ {'E', T_N_+18, NULL},
/* 18 */ {'M', T_N_+19, NULL},
/* 19 */ {'E', T_N_+20, NULL},
/* 20 */ {'N', T_N_+21, NULL},
/* 21 */ {'T', T_N_+22, NULL},
/* 22 */ {'\0', (struct trie*)5, NULL},
/* 23 */ {'A', T_N_+24, T_N_+31},
/* 24 */ {'T', T_N_+25, NULL},
/* 25 */ {'T', T_N_+26, NULL},
/* 26 */ {'L', T_N_+27, NULL},
/* 27 */ {'I', T_N_+28, NULL},
/* 28 */ {'S', T_N_+29, NULL},
/* 29 */ {'T', T_N_+30, NULL},
/* 30 */ {'\0', (struct trie*)6, NULL},
/* 31 */ {'N', T_N_+32, NULL},
/* 32 */ {'O', T_N_+33, NULL},
/* 33 */ {'T', T_N_+34, NULL},
/* 34 */ {'A', T_N_+35, NULL},
/* 35 */ {'T', T_N_+36, NULL},
/* 36 */ {'I', T_N_+37, NULL},
/* 37 */ {'O', T_N_+38, NULL},
/* 38 */ {'N', T_N_+39, NULL},
/* 39 */ {'\0', (struct trie*)7, NULL}
};
#undef T_N_
#define T_N_ TRattType
static struct trie const T_N_[] = {
/* 0 */ {'C', T_N_+1, T_N_+6},
/* 1 */ {'D', T_N_+2, NULL},
/* 2 */ {'A', T_N_+3, NULL},
/* 3 */ {'T', T_N_+4, NULL},
/* 4 */ {'A', T_N_+5, NULL},
/* 5 */ {'\0', (struct trie*)1, NULL},
/* 6 */ {'I', T_N_+7, T_N_+15},
/* 7 */ {'D', T_N_+8, NULL},
/* 8 */ {'\0', (struct trie*)2, T_N_+9},
/* 9 */ {'R', T_N_+10, NULL},
/* 10 */ {'E', T_N_+11, NULL},
/* 11 */ {'F', T_N_+12, NULL},
/* 12 */ {'\0', (struct trie*)3, T_N_+13},
/* 13 */ {'S', T_N_+14, NULL},
/* 14 */ {'\0', (struct trie*)4, NULL},
/* 15 */ {'E', T_N_+16, T_N_+26},
/* 16 */ {'N', T_N_+17, NULL},
/* 17 */ {'T', T_N_+18, NULL},
/* 18 */ {'I', T_N_+19, NULL},
/* 19 */ {'T', T_N_+20, NULL},
/* 20 */ {'Y', T_N_+21, T_N_+22},
/* 21 */ {'\0', (struct trie*)5, NULL},
/* 22 */ {'I', T_N_+23, NULL},
/* 23 */ {'E', T_N_+24, NULL},
/* 24 */ {'S', T_N_+25, NULL},
/* 25 */ {'\0', (struct trie*)6, NULL},
/* 26 */ {'N', T_N_+27, NULL},
/* 27 */ {'M', T_N_+28, T_N_+36},
/* 28 */ {'T', T_N_+29, NULL},
/* 29 */ {'O', T_N_+30, NULL},
/* 30 */ {'K', T_N_+31, NULL},
/* 31 */ {'E', T_N_+32, NULL},
/* 32 */ {'N', T_N_+33, NULL},
/* 33 */ {'\0', (struct trie*)7, T_N_+34},
/* 34 */ {'S', T_N_+35, NULL},
/* 35 */ {'\0', (struct trie*)8, NULL},
/* 36 */ {'O', T_N_+37, NULL},
/* 37 */ {'T', T_N_+38, NULL},
/* 38 */ {'A', T_N_+39, NULL},
/* 39 */ {'T', T_N_+40, NULL},
/* 40 */ {'I', T_N_+41, NULL},
/* 41 */ {'O', T_N_+42, NULL},
/* 42 */ {'N', T_N_+43, NULL},
/* 43 */ {'\0', (struct trie*)9, NULL}
};
#undef T_N_
#define T_N_ TRdefDecls
static struct trie const T_N_[] = {
/* 0 */ {'#', T_N_+1, NULL},
/* 1 */ {'F', T_N_+2, T_N_+7},
/* 2 */ {'I', T_N_+3, NULL},
/* 3 */ {'X', T_N_+4, NULL},
/* 4 */ {'E', T_N_+5, NULL},
/* 5 */ {'D', T_N_+6, NULL},
/* 6 */ {'\0', (struct trie*)1, NULL},
/* 7 */ {'R', T_N_+8, T_N_+16},
/* 8 */ {'E', T_N_+9, NULL},
/* 9 */ {'Q', T_N_+10, NULL},
/* 10 */ {'U', T_N_+11, NULL},
/* 11 */ {'I', T_N_+12, NULL},
/* 12 */ {'R', T_N_+13, NULL},
/* 13 */ {'E', T_N_+14, NULL},
/* 14 */ {'D', T_N_+15, NULL},
/* 15 */ {'\0', (struct trie*)2, NULL},
/* 16 */ {'I', T_N_+17, NULL},
/* 17 */ {'M', T_N_+18, NULL},
/* 18 */ {'P', T_N_+19, NULL},
/* 19 */ {'L', T_N_+20, NULL},
/* 20 */ {'I', T_N_+21, NULL},
/* 21 */ {'E', T_N_+22, NULL},
/* 22 */ {'D', T_N_+23, NULL},
/* 23 */ {'\0', (struct trie*)3, NULL}
};
#undef T_N_
#define T_N_ TRelTypes
static struct trie const T_N_[] = {
/* 0 */ {'E', T_N_+1, T_N_+6},
/* 1 */ {'M', T_N_+2, NULL},
/* 2 */ {'P', T_N_+3, NULL},
/* 3 */ {'T', T_N_+4, NULL},
/* 4 */ {'Y', T_N_+5, NULL},
/* 5 */ {'\0', (struct trie*)1, NULL},
/* 6 */ {'A', T_N_+7, T_N_+10},
/* 7 */ {'N', T_N_+8, NULL},
/* 8 */ {'Y', T_N_+9, NULL},
/* 9 */ {'\0', (struct trie*)2, NULL},
/* 10 */ {'(', T_N_+11, NULL},
/* 11 */ {'\0', (struct trie*)3, NULL}
};
#undef T_N_
#endif /* ifdef DTD_SUPPORT */
#endif /* XMLDEF__H */

97
public/parsifal/xmlhash.h Normal file
View File

@@ -0,0 +1,97 @@
/* See xmlhash.c for copyright info */
#ifndef XMLHASH__H
#define XMLHASH__H
#include <stddef.h> /* For size_t */
/*
** A hash table consists of an array of these buckets. Each bucket
** holds a copy of the key, a pointer to the data associated with the
** key, and a pointer to the next bucket that collided with this one,
** if there was one.
*/
typedef struct tagXMLHTABLEBUCKET {
char *key;
void *data;
struct tagXMLHTABLEBUCKET *next;
} XMLHTABLEBUCKET;
/*
** This is what you actually declare an instance of to create a table.
** You then call 'construct_table' with the address of this structure,
** and a guess at the size of the table. Note that more nodes than this
** can be inserted in the table, but performance degrades as this
** happens. Performance should still be quite adequate until 2 or 3
** times as many nodes have been inserted as the table was created with.
*/
typedef struct tagXMLHTABLE {
size_t size;
XMLHTABLEBUCKET **table;
void *userdata;
} XMLHTABLE, *LPXMLHTABLE;
#ifdef __cplusplus
extern "C" {
#endif
/*
** This is used to construct the table. If it doesn't succeed, it sets
** the table's size to 0, and the pointer to the table to NULL.
*/
LPXMLHTABLE XMLHTable_Create(LPXMLHTABLE table,size_t size);
/*
** Inserts a pointer to 'data' in the table, with a copy of 'key' as its
** key. Note that this makes a copy of the key, but NOT of the
** associated data.
*/
void *XMLHTable_Insert(LPXMLHTABLE table, char *key, void *data);
/*
** Returns a pointer to the data associated with a key. If the key has
** not been inserted in the table, returns NULL.
*/
void *XMLHTable_Lookup(LPXMLHTABLE table, char *key);
/*
** Deletes an entry from the table. Returns a pointer to the data that
** was associated with the key so the calling code can dispose of it
** properly.
*/
void *XMLHTable_Remove(LPXMLHTABLE table, char *key);
/*
** Goes through a hash table and calls the function passed to it
** for each node that has been inserted. The function is passed
** a pointer to the key, and a pointer to the data associated
** with it.
*/
int XMLHTable_Enumerate(LPXMLHTABLE table, int (*func)(char *,void *,void *));
/*
** Frees a hash table. For each node that was inserted in the table,
** it calls the function whose address it was passed, with a pointer
** to the data that was in the table. The function is expected to
** free the data. Typical usage would be:
** free_table(&table, free);
** if the data placed in the table was dynamically allocated, or:
** free_table(&table, NULL);
** if not. ( If the parameter passed is NULL, it knows not to call
** any function with the data. )
*/
int XMLHTable_Destroy(LPXMLHTABLE table, int (*func)(char *, void *, void *), int FreeTable);
#ifdef __cplusplus
}
#endif /* __cplusplus */
#endif /* XMLHASH__H */

49
public/parsifal/xmlpool.h Normal file
View File

@@ -0,0 +1,49 @@
/*===========================================================================
xmlpool.h
Based heavily on alloc-pool.c in gcc (some old code)
TODO
- String interning (wraps xmlpool object?)
- Automatically free blocks when all items are unused (in XMLPool_Free)
Add itemsAllocatedThisBlock tracking var to headers?
===========================================================================*/
#ifndef XMLPOOL__H
#define XMLPOOL__H
#include <stddef.h>
#ifndef XMLAPI
#define XMLAPI
#endif
typedef struct tagLPXMLPOOLLIST
{
struct tagLPXMLPOOLLIST *next;
} *LPXMLPOOLLIST;
typedef struct tagLPXMLPOOL
{
int itemSize;
int itemsPerBlock;
int itemsAllocated;
int itemsFree;
int blocksAllocated;
int blockSize;
LPXMLPOOLLIST freeList;
LPXMLPOOLLIST blockList;
} *LPXMLPOOL;
#ifdef __cplusplus
extern "C" {
#endif
LPXMLPOOL XMLAPI XMLPool_Create(int itemSize, int itemsPerBlock);
void XMLAPI XMLPool_FreePool(LPXMLPOOL pool);
void XMLAPI *XMLPool_Alloc(LPXMLPOOL pool);
void XMLAPI XMLPool_Free(LPXMLPOOL pool, void *ptr);
#ifdef __cplusplus
}
#endif /* __cplusplus */
#endif /* XMLPOOL__H */

57
public/parsifal/xmlsbuf.h Normal file
View File

@@ -0,0 +1,57 @@
/*===========================================================================
xmlsbuf.h
see parsifal.h for copyright info
===========================================================================*/
#ifndef XMLSBUF__H
#define XMLSBUF__H
#include <stddef.h>
#include "xmlpool.h"
#ifndef XMLAPI
#define XMLAPI
#endif
#ifndef BYTE
#define BYTE unsigned char
#endif
#ifndef XMLCH_DEFINED
#define XMLCH_DEFINED
typedef unsigned char XMLCH;
#endif
#ifndef COUNTBUFSIZE
#define COUNTBUFSIZE(cBytes, blocksize) \
((!(cBytes)) ? (blocksize) : (!( (cBytes) % (blocksize) ) ? (int)(cBytes) : (int)( (((cBytes) / (blocksize)) + 1) * (blocksize) )) )
#endif
typedef struct tagXMLSTRINGBUF
{
int capacity;
int blocksize;
int len;
int usePool;
XMLCH *str;
LPXMLPOOL pool;
} XMLSTRINGBUF, *LPXMLSTRINGBUF;
#ifdef __cplusplus
extern "C" {
#endif
XMLCH XMLAPI *XMLStringbuf_Append(LPXMLSTRINGBUF sbuf, XMLCH *str, int len);
XMLCH XMLAPI *XMLStringbuf_AppendCh(LPXMLSTRINGBUF sbuf, XMLCH c);
XMLCH XMLAPI *XMLStringbuf_Init(LPXMLSTRINGBUF sbuf, int blockSize, int initSize);
XMLCH XMLAPI *XMLStringbuf_InitUsePool(LPXMLSTRINGBUF sbuf, int blockSize, int initSize, LPXMLPOOL pool);
XMLCH XMLAPI *XMLStringbuf_ToString(LPXMLSTRINGBUF sbuf);
int XMLAPI XMLStringbuf_SetLength(LPXMLSTRINGBUF sbuf, int len);
void XMLAPI XMLStringbuf_Free(LPXMLSTRINGBUF sbuf);
#ifdef __cplusplus
}
#endif /* __cplusplus */
#endif /* XMLSBUF__H */

60
public/parsifal/xmlvect.h Normal file
View File

@@ -0,0 +1,60 @@
/*===========================================================================
xmlvect.h
see parsifal.h for copyright info
===========================================================================*/
#ifndef XMLVECTOR__H
#define XMLVECTOR__H
#include <stddef.h>
#ifndef XMLAPI
#define XMLAPI
#endif
#ifndef BYTE
#define BYTE unsigned char
#endif
#ifndef COUNTBUFSIZE
#define COUNTBUFSIZE(cBytes, blocksize) \
((!(cBytes)) ? (blocksize) : (!( (cBytes) % (blocksize) ) ? (int)(cBytes) : (int)( (((cBytes) / (blocksize)) + 1) * (blocksize) )) )
#endif
typedef struct tagXMLVECTOR
{
int length;
int capacity;
int capacityIncrement;
int itemSize;
BYTE *array;
} XMLVECTOR, *LPXMLVECTOR;
#ifdef __cplusplus
extern "C" {
#endif
LPXMLVECTOR XMLAPI XMLVector_Create(LPXMLVECTOR *vector, int initialCapacity, int itemSize);
void XMLAPI *XMLVector_Replace(LPXMLVECTOR vector, int index, void *item);
int XMLAPI XMLVector_Remove(LPXMLVECTOR vector, int index);
void XMLAPI *XMLVector_Get(LPXMLVECTOR vector, int index);
int XMLAPI XMLVector_Resize(LPXMLVECTOR vector, int newsize);
void XMLAPI *XMLVector_Append(LPXMLVECTOR vector, void *item);
void XMLAPI *XMLVector_InsertBefore(LPXMLVECTOR vector, int index, void *item);
void XMLAPI XMLVector_Free(LPXMLVECTOR vector);
#define _XMLVector_RemoveAll(v) (XMLVector_Resize((v), 0))
#define _XMLVector_Get(v,index) \
(((index) < 0 || (index) > ((v)->length - 1)) ? NULL : (((v)->array+((index)*(v)->itemSize))))
#define _XMLVector_GetP(vect,i,ptype) (*((ptype##**)XMLVector_Get(((LPXMLVECTOR)vect), ((int)i))))
/* e.g. _XMLVector_GetP(v, 0, FILE); expands to *((FILE**)XMLVector_Get(v, 0)) */
#define _XMLVector_GetIterP(v, iterP) ( (iterP) = (void*)((LPXMLVECTOR)v)->array, \
((LPXMLVECTOR)v)->array + (((LPXMLVECTOR)v)->length*((LPXMLVECTOR)v)->itemSize) )
/* GetIterP returns pointer to past the end of v->array, param 2 sets pointer to start */
#ifdef __cplusplus
}
#endif /* __cplusplus */
#endif /* XMLVECTOR__H */