name {name_start_character}{name_character}*
number {Digit}+
number_token {Digit}{name_character}*
name_token {name_character}+
/* 6.2.1 Space */
s {SPACE}|{RE}|{RS}|{SEPCHAR}
ps ({SPACE}|{RE}|{RS}|{SEPCHAR})+
/* trailing white space */
ws ({SPACE}|{RE}|{RS}|{SEPCHAR})*
/* 9.4.5 Reference End */
reference_end ({REFC}|{RE})
/*
* 10.1.2 Parameter Literal
* 7.9.3 Attribute Value Literal
* (we leave recognition of character references and entity references,
* and whitespace compression to further processing)
*
* @# should split this into minimum literal, parameter literal,
* @# and attribute value literal.
*/
literal ({LIT}[^\"]*{LIT})|({LITA}[^\']*{LITA})
/* 9.6.1 Recognition modes */
/*
* Recognition modes are represented here by start conditions.
* The default start condition, INITIAL, represents the
* CON recognition mode. This condition is used to detect markup
* while parsing normal data charcters (mixed content).
*
* The CDATA start condition represents the CON recognition
* mode with the restriction that only end-tags are recognized,
* as in elements with CDATA declared content.
* (@# no way to activate it yet: need hook to parser.)
*
* The TAG recognition mode is split into two start conditions:
* ATTR, for recognizing attribute value list sub-tokens in
* start-tags, and TAG for recognizing the TAGC (">") delimiter
* in end-tags.
*
* The MD start condition is used in markup declarations. The COM
* start condition is used for comment declarations.
*
* The DS condition is an approximation of the declaration subset
* recognition mode in SGML. As we only use this condition after signalling
* an error, it is merely a recovery device.
*
* The CXT, LIT, PI, and REF recognition modes are not separated out
* as start conditions, but handled within the rules of other start
* conditions. The GRP mode is not represented here.
*/
/* EXCERPT ACTIONS: START */
/* %x CON == INITIAL */
%x CDATA
%x TAG
%x ATTR
%x ATTRVAL
%x NETDATA
%x ENDTAG
/* this is only to be permissive with bad end-tags: */
%x JUNKTAG
%x MD
%x COM
%x DS
/* EXCERPT ACTIONS: STOP */
%%
int *types = NULL;
char **strings = NULL;
size_t *lengths = NULL;
int qty = 0;
/*
* See sgml_lex.c for description of
* ADD, CALLBACK, ERROR, TOK macros.
*/