Import new CSS parser from fishman-ctags

Some highlights:
* Fixes handling of comments
* Adds support for attribute and namespace selectors
* Adds support for @supports blocks
* Fixes tag type for many selectors
* Adds support for pseudo-classes with arguments
This commit is contained in:
Colomban Wendling 2014-11-11 02:01:41 +01:00
parent 2072797283
commit f765463af0
9 changed files with 269 additions and 253 deletions

View File

@ -1,224 +1,259 @@
/*************************************************************************** /***************************************************************************
* css.c * css.c
* Character-based parser for Css definitions * Token-based parser for CSS definitions
* Author - Iago Rubio <iagorubio(at)users.sourceforge.net> * Author - Colomban Wendling <colomban@geany.org>
* - Bronisław Białek <after89(at)gmail.com>
**************************************************************************/ **************************************************************************/
#include "general.h" #include "general.h"
#include <string.h> #include <string.h>
#include <ctype.h> #include <ctype.h>
#include "entry.h"
#include "parse.h" #include "parse.h"
#include "read.h" #include "read.h"
typedef enum eCssKinds { typedef enum eCssKinds {
K_NONE = -1, K_SELECTOR, K_ID, K_CLASS K_CLASS, K_SELECTOR, K_ID
} cssKind; } cssKind;
static kindOption CssKinds [] = { static kindOption CssKinds [] = {
{ TRUE, 'c', "class", "classes" },
{ TRUE, 's', "struct", "selectors" }, { TRUE, 's', "struct", "selectors" },
{ TRUE, 'v', "variable", "identities" }, { TRUE, 'v', "variable", "identities" }
{ TRUE, 'c', "class", "classes" }
}; };
typedef enum _CssParserState { /* state of parsing */ typedef enum {
P_STATE_NONE, /* default state */ /* any ASCII */
P_STATE_IN_COMMENT, /* into a comment, only multi line in CSS */ TOKEN_EOF = 257,
P_STATE_IN_SINGLE_STRING, /* into a single quoted string */ TOKEN_SELECTOR,
P_STATE_IN_DOUBLE_STRING, /* into a double quoted string */ TOKEN_STRING
P_STATE_IN_DEFINITION, /* on the body of the style definition, nothing for us */ } tokenType;
P_STATE_IN_MEDIA, /* on a @media declaration, can be multi-line */
P_STATE_IN_IMPORT, /* on a @import declaration, can be multi-line */
P_STATE_IN_NAMESPACE, /* on a @namespace declaration */
P_STATE_IN_PAGE, /* on a @page declaration */
P_STATE_IN_FONTFACE, /* on a @font-face declaration */
P_STATE_AT_END /* end of parsing */
} CssParserState;
static void makeCssSimpleTag( vString *name, cssKind kind, boolean delete ) typedef struct {
tokenType type;
vString *string;
} tokenInfo;
static boolean isSelectorChar (const int c)
{ {
vStringTerminate (name); /* attribute selectors are handled separately */
makeSimpleTag (name, CssKinds, kind); return (isalnum (c) ||
vStringClear (name); c == '_' || // allowed char
if( delete ) c == '-' || // allowed char
vStringDelete (name); c == '+' || // allow all sibling in a single tag
c == '>' || // allow all child in a single tag
c == '|' || // allow namespace separator
c == '(' || // allow pseudo-class arguments
c == ')' ||
c == '.' || // allow classes and selectors
c == ':' || // allow pseudo classes
c == '*' || // allow globs as P + *
c == '#'); // allow ids
} }
static boolean isCssDeclarationAllowedChar( const unsigned char *cp ) static void parseSelector (vString *const string, const int firstChar)
{ {
return isalnum ((int) *cp) || int c = firstChar;
isspace ((int) *cp) || do
*cp == '_' || /* allowed char */ {
*cp == '-' || /* allowed char */ vStringPut (string, (char) c);
*cp == '+' || /* allow all sibling in a single tag */ c = fileGetc ();
*cp == '>' || /* allow all child in a single tag */ } while (isSelectorChar (c));
*cp == '{' || /* allow the start of the declaration */ fileUngetc (c);
*cp == '.' || /* allow classes and selectors */ vStringTerminate (string);
*cp == ',' || /* allow multiple declarations */
*cp == ':' || /* allow pseudo classes */
*cp == '*' || /* allow globs as P + * */
*cp == '#'; /* allow ids */
} }
static CssParserState parseCssDeclaration( const unsigned char **position, cssKind kind, const char *aname) static void readToken (tokenInfo *const token)
{ {
const unsigned char *cp = *position; int c;
vString *name = vStringNew ();
vStringCopyS(name, aname);
/* pick to the end of line including children and sibling vStringClear (token->string);
* if declaration is multiline go for the next line */
while ( isCssDeclarationAllowedChar(cp) || getNextChar:
*cp == '\0' ) /* track the end of line into the loop */
c = fileGetc ();
while (isspace (c))
c = fileGetc ();
token->type = c;
switch (c)
{ {
if( *cp == ',' ) case EOF: token->type = TOKEN_EOF; break;
case '\'':
case '"':
{ {
makeCssSimpleTag(name, kind, TRUE); const int delimiter = c;
*position = cp; do
return P_STATE_NONE; {
vStringPut (token->string, c);
c = fileGetc ();
if (c == '\\')
c = fileGetc ();
} }
else if( *cp == '{' || *cp == '\0' ) while (c != EOF && c != delimiter);
{ /* assume that line end is the same as a starting definition (i.e. the { is on the next line */ if (c != EOF)
makeCssSimpleTag(name, kind, TRUE); vStringPut (token->string, c);
*position = cp; token->type = TOKEN_STRING;
return P_STATE_IN_DEFINITION; break;
} }
vStringPut (name, (int) *cp); case '/': /* maybe comment start */
++cp; {
int d = fileGetc ();
if (d != '*')
{
fileUngetc (d);
vStringPut (token->string, c);
token->type = c;
}
else
{
d = fileGetc ();
do
{
c = d;
d = fileGetc ();
}
while (d != EOF && ! (c == '*' && d == '/'));
goto getNextChar;
}
break;
} }
makeCssSimpleTag(name, kind, TRUE); default:
*position = cp; if (! isSelectorChar (c))
{
return P_STATE_NONE; vStringPut (token->string, c);
token->type = c;
}
else
{
parseSelector (token->string, c);
token->type = TOKEN_SELECTOR;
}
break;
}
} }
static CssParserState parseCssLine( const unsigned char *line, CssParserState state ) /* sets selector kind in @p kind if found, otherwise don't touches @p kind */
static cssKind classifySelector (const vString *const selector)
{ {
vString *aux; size_t i;
vString *stack = vStringNew ();
while( *line != '\0' ) /* fileReadLine returns NULL terminated strings */ for (i = vStringLength (selector); i > 0; --i)
{ {
vStringClear (stack); char c = vStringItem (selector, i - 1);
while (state == P_STATE_NONE && if (c == '.')
(isspace ((int) *line) || isalnum ((int) *line) || ( *line == '*' && *(line-1) != '/' ))) return K_CLASS;
{ else if (c == '#')
if ((stack->length > 0 && isspace((int) *line)) || isalnum ((int) *line) || *line == '*') { return K_ID;
vStringPut(stack, (int) *line);
} }
return K_SELECTOR;
++line;
}
vStringTerminate (stack);
switch( state )
{
case P_STATE_NONE:
if( *line == '.' ) /* a class */
state = parseCssDeclaration( &line, K_CLASS, vStringValue(stack) );
else if( *line == '#' ) /* an id */
state = parseCssDeclaration( &line, K_ID, vStringValue(stack) );
else if( *line == '@' ) /* at-rules, we'll ignore them */
{
++line;
aux = vStringNew();
while( !isspace((int) *line) )
{
vStringPut (aux, (int) *line);
++line;
}
vStringTerminate (aux);
if( strcmp( aux->buffer, "media" ) == 0 )
state = P_STATE_IN_MEDIA;
else if ( strcmp( aux->buffer, "import" ) == 0 )
state = P_STATE_IN_IMPORT;
else if ( strcmp( aux->buffer, "namespace" ) == 0 )
state = P_STATE_IN_NAMESPACE;
else if ( strcmp( aux->buffer, "page" ) == 0 )
state = P_STATE_IN_PAGE;
else if ( strcmp( aux->buffer, "font-face" ) == 0 )
state = P_STATE_IN_FONTFACE;
vStringDelete (aux);
}
else if( *line == '*' && *(line-1) == '/' ) /* multi-line comment */
state = P_STATE_IN_COMMENT;
else if ( stack->length > 0 )
state = parseCssDeclaration( &line, K_SELECTOR, vStringValue(stack) );
break;
case P_STATE_IN_COMMENT:
if( *line == '/' && *(line-1) == '*')
state = P_STATE_NONE;
break;
case P_STATE_IN_SINGLE_STRING:
if( *line == '\'' && *(line-1) != '\\' )
state = P_STATE_IN_DEFINITION; /* PAGE, FONTFACE and DEFINITION are treated the same way */
break;
case P_STATE_IN_DOUBLE_STRING:
if( *line=='"' && *(line-1) != '\\' )
state = P_STATE_IN_DEFINITION; /* PAGE, FONTFACE and DEFINITION are treated the same way */
break;
case P_STATE_IN_MEDIA:
/* skip to start of media body or line end */
while( *line != '{' )
{
if( *line == '\0' )
break;
++line;
}
if( *line == '{' )
state = P_STATE_NONE;
break;
case P_STATE_IN_IMPORT:
case P_STATE_IN_NAMESPACE:
/* skip to end of declaration or line end */
while( *line != ';' )
{
if( *line == '\0' )
break;
++line;
}
if( *line == ';' )
state = P_STATE_NONE;
break;
case P_STATE_IN_PAGE:
case P_STATE_IN_FONTFACE:
case P_STATE_IN_DEFINITION:
if( *line == '\0' )
line = fileReadLine ();
if( *line == '}' )
state = P_STATE_NONE;
else if( *line == '\'' )
state = P_STATE_IN_SINGLE_STRING;
else if( *line == '"' )
state = P_STATE_IN_DOUBLE_STRING;
break;
case P_STATE_AT_END:
return state;
break;
}
if (line == NULL) return P_STATE_AT_END;
line++;
}
vStringDelete (stack);
return state;
} }
static void findCssTags (void) static void findCssTags (void)
{ {
const unsigned char *line; boolean readNextToken = TRUE;
CssParserState state = P_STATE_NONE; tokenInfo token;
while ( (line = fileReadLine ()) != NULL ) token.string = vStringNew ();
do
{ {
state = parseCssLine( line, state ); if (readNextToken)
if( state==P_STATE_AT_END ) return; readToken (&token);
readNextToken = TRUE;
if (token.type == '@')
{ /* At-rules, from the "@" to the next block or semicolon */
boolean useContents;
readToken (&token);
useContents = (strcmp (vStringValue (token.string), "media") == 0 ||
strcmp (vStringValue (token.string), "supports") == 0);
while (token.type != TOKEN_EOF &&
token.type != ';' && token.type != '{')
{
readToken (&token);
} }
/* HACK: we *eat* the opening '{' for medias and the like so that
* the content is parsed as if it was at the root */
readNextToken = useContents && token.type == '{';
}
else if (token.type == TOKEN_SELECTOR)
{ /* collect selectors and make a tag */
cssKind kind = K_SELECTOR;
MIOPos filePosition;
unsigned long lineNumber;
vString *selector = vStringNew ();
do
{
if (vStringLength (selector) > 0)
vStringPut (selector, ' ');
vStringCat (selector, token.string);
kind = classifySelector (token.string);
lineNumber = getSourceLineNumber ();
filePosition = getInputFilePosition ();
readToken (&token);
/* handle attribute selectors */
if (token.type == '[')
{
int depth = 1;
while (depth > 0 && token.type != TOKEN_EOF)
{
vStringCat (selector, token.string);
readToken (&token);
if (token.type == '[')
depth++;
else if (token.type == ']')
depth--;
}
if (token.type != TOKEN_EOF)
vStringCat (selector, token.string);
readToken (&token);
}
}
while (token.type == TOKEN_SELECTOR);
/* we already consumed the next token, don't read it twice */
readNextToken = FALSE;
vStringTerminate (selector);
if (CssKinds[kind].enabled)
{
tagEntryInfo e;
initTagEntry (&e, vStringValue (selector));
e.lineNumber = lineNumber;
e.filePosition = filePosition;
e.kindName = CssKinds[kind].name;
e.kind = (char) CssKinds[kind].letter;
makeTagEntry (&e);
}
vStringDelete (selector);
}
else if (token.type == '{')
{ /* skip over { ... } */
int depth = 1;
while (depth > 0 && token.type != TOKEN_EOF)
{
readToken (&token);
if (token.type == '{')
depth++;
else if (token.type == '}')
depth--;
}
}
}
while (token.type != TOKEN_EOF);
vStringDelete (token.string);
} }
/* parser definition */ /* parser definition */
@ -232,3 +267,4 @@ extern parserDefinition* CssParser (void)
def->parser = findCssTags; def->parser = findCssTags;
return def; return def;
} }

View File

@ -1,3 +1,3 @@
# format=tagmanager # format=tagmanager
body フ2048ヨ0 bodyフ2048ヨ0
html フ2048ヨ0 htmlフ2048ヨ0

View File

@ -1,5 +1,2 @@
# format=tagmanager # format=tagmanager
html Ì2048Ö0 htmlフ2048ヨ0
or Ì2048Ö0
test-property2: 42Ì2048Ö0
test-property: 42Ì2048Ö0

View File

@ -1,12 +1,6 @@
# format=tagmanager # format=tagmanager
aĚ2048Ö0 a[href^="http://"]Ì2048Ö0
background-color: light-greenĚ2048Ö0 a[href^="https://"]Ì2048Ö0
body Ě2048Ö0 a[lang~=en]Ì2048Ö0
color: greenĚ2048Ö0 bodyÌ2048Ö0
color: redĚ2048Ö0 pÌ2048Ö0
enĚ2048Ö0
hrefĚ2048Ö0
http:Ě2048Ö0
https:Ě2048Ö0
langĚ2048Ö0
p Ě2048Ö0

View File

@ -1,5 +1,4 @@
# format=tagmanager # format=tagmanager
*Ě2048Ö0 *|divÌ2048Ö0
a Ě2048Ö0 svg|aÌ2048Ö0
div Ě2048Ö0 |aÌ2048Ö0
svgĚ2048Ö0

View File

@ -1,23 +1,12 @@
# format=tagmanager # format=tagmanager
0n+1Ì2048Ö0 a:lang(en):afterÌ2048Ö0
1Ì2048Ö0 body :not(span)Ì2048Ö0
10n+0Ì2048Ö0 span:not(:first-child)Ì2048Ö0
2nÌ2048Ö0 span:nth-child(-n+3)Ì2048Ö0
2n+1Ì2048Ö0 span:nth-child(0n+1)Ì2048Ö0
a:langÌ2048Ö0 span:nth-child(1)Ì2048Ö0
after Ì2048Ö0 tr:nth-child(10n+0)Ì2048Ö0
background-color: gray Ì2048Ö0 tr:nth-child(2n)Ì2048Ö0
background-color: light-gray Ì2048Ö0 tr:nth-child(2n+1)Ì2048Ö0
body :notÌ2048Ö0 tr:nth-child(even)Ì2048Ö0
color: greenÌ2048Ö0 tr:nth-child(odd)Ì2048Ö0
color: limeÌ2048Ö0
color: redÌ2048Ö0
enÌ2048Ö0
evenÌ2048Ö0
first-childÌ2048Ö0
n+3Ì2048Ö0
oddÌ2048Ö0
spanÌ2048Ö0
span:notÌ2048Ö0
span:nth-childÌ2048Ö0
tr:nth-childÌ2048Ö0

View File

@ -1,11 +1,11 @@
# format=tagmanager # format=tagmanager
#footer Ì16384Ö0 #footerÌ16384Ö0
* Ì2048Ö0 *Ì2048Ö0
.foo aÌ1Ö0 .foo aÌ2048Ö0
.foo b Ì1Ö0 .foo bÌ2048Ö0
.header Ì1Ö0 .headerÌ1Ö0
.red Ì1Ö0 .redÌ1Ö0
div.magic Ì1Ö0 div.magicÌ1Ö0
htmlÌ2048Ö0 htmlÌ2048Ö0
ul > li > a Ì2048Ö0 ul > li > aÌ2048Ö0
ul li Ì2048Ö0 ul liÌ2048Ö0

View File

@ -1,2 +1,3 @@
# format=tagmanager # format=tagmanager
aÌ2048Ö0 aÌ2048Ö0
b<EFBFBD>2048<EFBFBD>0

View File

@ -1,36 +1,36 @@
# format=tagmanager # format=tagmanager
#a #fooÌ16384Ö0 #a #fooÌ16384Ö0
#a #foo #barÌ16384Ö0 #a #foo #barÌ16384Ö0
#a #foo .barÌ16384Ö0 #a #foo .barÌ1Ö0
#a #foo barÌ16384Ö0 #a #foo barÌ2048Ö0
#a .fooÌ16384Ö0 #a .fooÌ1Ö0
#a .foo #barÌ16384Ö0 #a .foo #barÌ16384Ö0
#a .foo .barÌ16384Ö0 #a .foo .barÌ1Ö0
#a .foo barÌ16384Ö0 #a .foo barÌ2048Ö0
#a fooÌ16384Ö0 #a fooÌ2048Ö0
#a foo #barÌ16384Ö0 #a foo #barÌ16384Ö0
#a foo .barÌ16384Ö0 #a foo .barÌ1Ö0
#a foo barÌ16384Ö0 #a foo barÌ2048Ö0
.a #fooÌ1Ö0 .a #fooÌ16384Ö0
.a #foo #barÌ1Ö0 .a #foo #barÌ16384Ö0
.a #foo .barÌ1Ö0 .a #foo .barÌ1Ö0
.a #foo barÌ1Ö0 .a #foo barÌ2048Ö0
.a .fooÌ1Ö0 .a .fooÌ1Ö0
.a .foo #barÌ1Ö0 .a .foo #barÌ16384Ö0
.a .foo .barÌ1Ö0 .a .foo .barÌ1Ö0
.a .foo barÌ1Ö0 .a .foo barÌ2048Ö0
.a fooÌ1Ö0 .a fooÌ2048Ö0
.a foo #barÌ1Ö0 .a foo #barÌ16384Ö0
.a foo .barÌ1Ö0 .a foo .barÌ1Ö0
.a foo barÌ1Ö0 .a foo barÌ2048Ö0
a #fooÌ16384Ö0 a #fooÌ16384Ö0
a #foo #barÌ16384Ö0 a #foo #barÌ16384Ö0
a #foo .barÌ16384Ö0 a #foo .barÌ1Ö0
a #foo barÌ16384Ö0 a #foo barÌ2048Ö0
a .fooÌ1Ö0 a .fooÌ1Ö0
a .foo #barÌ1Ö0 a .foo #barÌ16384Ö0
a .foo .barÌ1Ö0 a .foo .barÌ1Ö0
a .foo barÌ1Ö0 a .foo barÌ2048Ö0
a fooÌ2048Ö0 a fooÌ2048Ö0
a foo #barÌ16384Ö0 a foo #barÌ16384Ö0
a foo .barÌ1Ö0 a foo .barÌ1Ö0