webdump

Unnamed repository; edit this file 'description' to name the repository.
git clone git://git.z3bra.org/webdump.git
Log | Files | Refs | README | LICENSE

commit 0a87ef4d7cdee5b3b0fc5b5430edd21eb9dba8d4
parent de7e902fa925618e4cfb24b044a18b5db2118b03
Author: Hiltjo Posthuma <hiltjo@codemadness.org>
Date:   Sat, 26 Aug 2017 12:49:21 +0200

simplify ignore tags parsing

Diffstat:
main.c | 94++++++++++++++++++++++++++++++++-----------------------------------------------
1 file changed, 38 insertions(+), 56 deletions(-)

diff --git a/main.c b/main.c @@ -19,7 +19,6 @@ static XMLParser parser; struct node { char tag[256]; - int isignore; int ispre; int isinline; int isblock; @@ -40,12 +39,6 @@ static char src[4096]; /* src or href attribute */ static struct node nodes[MAX_DEPTH]; static int curnode; -/* ignored tag, all text between this is interpreted literally and ignored */ -static char *ignoretags[] = { - "style", - "script", -}; - static char *pretags[] = { "pre", "code", @@ -200,12 +193,7 @@ xmldataend(XMLParser *p) static void xmldata(XMLParser *p, const char *data, size_t datalen) { - struct node *cur; - - cur = &nodes[curnode]; string_append(&htmldata, data, datalen); - if (cur->isignore) - return; } static void @@ -227,39 +215,63 @@ xmldataentity(XMLParser *p, const char *data, size_t datalen) } static void -xmltagstart(XMLParser *p, const char *tag, size_t taglen) +xmltagstart(XMLParser *x, const char *t, size_t tl) { - struct node *cur = &nodes[curnode]; - int i; + char tmp[64]; + struct node *cur; + int c, i; + cur = &nodes[curnode]; memset(cur, 0, sizeof(*cur)); src[0] = '\0'; /* src, href */ - strlcpy(cur->tag, tag, sizeof(cur->tag)); + strlcpy(cur->tag, t, sizeof(cur->tag)); - for (i = 0; i < sizeof(ignoretags) / sizeof(*ignoretags); i++) { - if (!strcasecmp(ignoretags[i], tag)) { - cur->isignore = 1; - break; - } - } for (i = 0; i < sizeof(pretags) / sizeof(*pretags); i++) { - if (!strcasecmp(pretags[i], tag)) { + if (!strcasecmp(pretags[i], t)) { cur->ispre = 1; break; } } for (i = 0; i < sizeof(blocktags) / sizeof(*blocktags); i++) { - if (!strcasecmp(blocktags[i], tag)) { + if (!strcasecmp(blocktags[i], t)) { cur->isblock = 1; break; } } for (i = 0; i < sizeof(inlinetags) / sizeof(*inlinetags); i++) { - if (!strcasecmp(inlinetags[i], tag)) { + if (!strcasecmp(inlinetags[i], t)) { cur->isinline = 1; break; } } + + /* HACK: ignored tag is parsed, hook into reader and read raw data + until literal end tag (without using the normal parser). + process (buffered) as xml[c]data (no entity) */ + if (strcasecmp(t, "script") && strcasecmp(t, "style")) + return; + +startignore: + while ((c = x->getnext()) != EOF) { + if (c == '<') + break; + } + if (c == EOF) + return; + if ((c = x->getnext()) != '/') + goto startignore; + for (i = 0; (c = x->getnext()) != EOF; i++) { + if (c == '>') + break; + if (i + 1 >= sizeof(tmp)) + goto startignore; + tmp[i] = c; + } + tmp[i] = '\0'; + + /* compare against current ignored tag */ + if (strcasecmp(t, tmp)) + goto startignore; } static void @@ -271,8 +283,6 @@ xmltagend(XMLParser *p, const char *tag, size_t taglen, int isshort) if (curnode) curnode--; cur = &nodes[curnode]; - if (cur->isignore) - return; #if 0 if (src[0]) @@ -301,37 +311,9 @@ static void xmltagstartparsed(XMLParser *p, const char *tag, size_t taglen, int isshort) { struct node *cur; - char tmp[64]; - int c, i; + int i; cur = &nodes[curnode]; - if (cur->isignore) { - /* HACK: ignored tag is parsed, hook into reader and read raw data - until literal end tag (without using the normal parser). - process (buffered) as xml[c]data (no entity) */ -startignore: - while ((c = p->getnext()) != EOF) { - if (c == '<') - break; - } - if (c == EOF) - return; - if ((c = p->getnext()) != '/') - goto startignore; - for (i = 0; (c = p->getnext()) != EOF; i++) { - if (c == '>') - break; - if (i + 1 >= sizeof(tmp)) - goto startignore; - tmp[i] = c; - } - tmp[i] = '\0'; - - /* compare against current ignored tag */ - if (!strcasecmp(tag, tmp)) - cur->isignore = 0; - return; - } if (cur->isblock) fputs("\n", stdout);