webdump

Unnamed repository; edit this file 'description' to name the repository.
git clone git://git.z3bra.org/webdump.git
Log | Files | Refs | README | LICENSE

commit b0fd3fce528a98b283ee135d2a09da04191223c3
parent 0a87ef4d7cdee5b3b0fc5b5430edd21eb9dba8d4
Author: Hiltjo Posthuma <hiltjo@codemadness.org>
Date:   Sat, 26 Aug 2017 15:33:55 +0200

improve ignore tag handling, cleanup a bit

Diffstat:
main.c | 71++++++++++++++++++++++++++++++++---------------------------------------
1 file changed, 32 insertions(+), 39 deletions(-)

diff --git a/main.c b/main.c @@ -217,9 +217,8 @@ xmldataentity(XMLParser *p, const char *data, size_t datalen) static void xmltagstart(XMLParser *x, const char *t, size_t tl) { - char tmp[64]; struct node *cur; - int c, i; + int i; cur = &nodes[curnode]; memset(cur, 0, sizeof(*cur)); @@ -244,38 +243,10 @@ xmltagstart(XMLParser *x, const char *t, size_t tl) break; } } - - /* HACK: ignored tag is parsed, hook into reader and read raw data - until literal end tag (without using the normal parser). - process (buffered) as xml[c]data (no entity) */ - if (strcasecmp(t, "script") && strcasecmp(t, "style")) - return; - -startignore: - while ((c = x->getnext()) != EOF) { - if (c == '<') - break; - } - if (c == EOF) - return; - if ((c = x->getnext()) != '/') - goto startignore; - for (i = 0; (c = x->getnext()) != EOF; i++) { - if (c == '>') - break; - if (i + 1 >= sizeof(tmp)) - goto startignore; - tmp[i] = c; - } - tmp[i] = '\0'; - - /* compare against current ignored tag */ - if (strcasecmp(t, tmp)) - goto startignore; } static void -xmltagend(XMLParser *p, const char *tag, size_t taglen, int isshort) +xmltagend(XMLParser *p, const char *t, size_t tl, int isshort) { struct node *cur; int i; @@ -290,38 +261,60 @@ xmltagend(XMLParser *p, const char *tag, size_t taglen, int isshort) src[0] = '\0'; #endif - if (!strcasecmp(tag, "tr")) + if (!strcasecmp(t, "tr")) fputs(" | ", stdout); /* HACK */ if (cur->isblock) fputs("\n", stdout); - if (taglen == 2 && tag[0] == 'h' && tag[1] >= '1' && tag[1] <= '6') { - if (tag[1] >= '3') + if (tl == 2 && t[0] == 'h' && t[1] >= '1' && t[1] <= '6') { + if (t[1] >= '3') for (i = 0; i < 36; i++) putchar('-'); - else if (tag[1] >= '1') + else if (t[1] >= '1') for (i = 0; i < 36; i++) putchar('='); putchar('\n'); } } +static char ignoretag[8]; +static XMLParser xo; /* old context */ + static void -xmltagstartparsed(XMLParser *p, const char *tag, size_t taglen, int isshort) +xmlignoretagend(XMLParser *p, const char *t, size_t tl, int isshort) +{ + if (!strcasecmp(t, ignoretag)) + memcpy(p, &xo, sizeof(*p)); /* restore context */ +} + +static void +xmltagstartparsed(XMLParser *p, const char *t, size_t tl, int isshort) { struct node *cur; int i; + /* temporary replace the callback except the reader and end of tag + restore the context once we receive the same ignored tag in the + end tag handler */ + if (!strcasecmp(t, "script") || !strcasecmp(t, "style")) { + strlcpy(ignoretag, t, sizeof(ignoretag)); + memcpy(&xo, p, sizeof(xo)); /* store old context */ + memset(p, 0, sizeof(*p)); + p->xmltagend = xmlignoretagend; + p->getnext = xo.getnext; + return; + } + cur = &nodes[curnode]; if (cur->isblock) fputs("\n", stdout); - if (!strcasecmp(tag, "td") || !strcasecmp(tag, "th")) + if (!strcasecmp(t, "td") || !strcasecmp(t, "th")) fputs(" | ", stdout); /* HACK */ - if (!strcasecmp(cur->tag, "li")) { + if (!strcasecmp(t, "li")) { /* indent nested list items */ for (i = curnode; i; i--) { if (!strcasecmp(nodes[i].tag, "li")) @@ -333,7 +326,7 @@ xmltagstartparsed(XMLParser *p, const char *tag, size_t taglen, int isshort) /* TODO: for <ol>, keep list counter on ol element (parent), support ordered number type only */ fputs("* ", stdout); - } else if (!strcasecmp(cur->tag, "hr")) { + } else if (!strcasecmp(t, "hr")) { for (i = 0; i < 36; i++) putchar('-'); }