webdump

Unnamed repository; edit this file 'description' to name the repository.
git clone git://git.z3bra.org/webdump.git
Log | Files | Refs | README | LICENSE

commit ac91a742d386618a025609433e3e43b303272b3e
parent 54f38abd3722c07e900820343e7c5288c6b0fdce
Author: Hiltjo Posthuma <hiltjo@codemadness.org>
Date:   Thu, 24 Aug 2017 17:17:59 +0200

initial patch to ignore <script> and <style>

Diffstat:
main.c | 43+++++++++++++++++++++++--------------------
1 file changed, 23 insertions(+), 20 deletions(-)

diff --git a/main.c b/main.c @@ -40,7 +40,7 @@ static char src[4096]; /* src or href attribute */ static struct node nodes[MAX_DEPTH]; static int curnode; -/* TODO: support literal text in script somehow? < > */ +/* ignored tag, all text between this is interpreted literally and ignored */ static char *ignoretags[] = { "style", "script", @@ -232,14 +232,12 @@ xmltagstart(XMLParser *p, const char *tag, size_t taglen) src[0] = '\0'; /* src, href */ strlcpy(cur->tag, tag, sizeof(cur->tag)); -#if 1 for (i = 0; i < sizeof(ignoretags) / sizeof(*ignoretags); i++) { if (!strcasecmp(ignoretags[i], tag)) { cur->isignore = 1; break; } } -#endif for (i = 0; i < sizeof(pretags) / sizeof(*pretags); i++) { if (!strcasecmp(pretags[i], tag)) { cur->ispre = 1; @@ -299,25 +297,36 @@ static void xmltagstartparsed(XMLParser *p, const char *tag, size_t taglen, int isshort) { struct node *cur; + char tmp[64]; int c, i; cur = &nodes[curnode]; if (cur->isignore) { -#if 0 /* HACK: ignored tag is parsed, hook into reader and read raw data - until literal end tag (without using the normal parser). */ - - /* TODO: process (buffered) as xml[c]data (no entity) */ - while ((c = getchar()) != EOF) { - if (c == '<') { - /* TODO: check /endtag */ + until literal end tag (without using the normal parser). + process (buffered) as xml[c]data (no entity) */ +startignore: + while ((c = p->getnext()) != EOF) { + if (c == '<') break; - } } - if (c == EOF) { + if (c == EOF) + return; + if ((c = p->getnext()) != '/') + goto startignore; + i = 0; + while ((c = p->getnext()) != EOF) { + if (c == '>') + break; + if (i + 1 >= sizeof(tmp)) + goto startignore; + tmp[i++] = c; } + tmp[i] = '\0'; -#endif + /* compare against current ignored tag */ + if (!strcasecmp(cur->tag, tmp)) + cur->isignore = 0; return; } @@ -363,12 +372,6 @@ xmlattr(XMLParser *p, const char *tag, size_t taglen, const char *name, } int -readchar(void) -{ - return getchar(); -} - -int main(void) { if (pledge("stdio", NULL) < 0) @@ -383,7 +386,7 @@ main(void) parser.xmltagend = xmltagend; parser.xmltagstartparsed = xmltagstartparsed; - parser.getnext = readchar; + parser.getnext = getchar; xml_parse(&parser); putchar('\n');