webdump

Unnamed repository; edit this file 'description' to name the repository.
git clone git://git.z3bra.org/webdump.git
Log | Files | Refs | README | LICENSE

commit dacc8c21011cdd6f6c9dc4ebd177478b2151a2c1
parent 114efd43e79a417abbda2e8c427d9dd57b482bce
Author: Hiltjo Posthuma <hiltjo@codemadness.org>
Date:   Sun, 20 Aug 2017 20:19:56 +0200

support th (hack), disable ignore tags for now (requires parser change)

Diffstat:
main.c | 30+++++++++++++++++++-----------
1 file changed, 19 insertions(+), 11 deletions(-)

diff --git a/main.c b/main.c @@ -19,7 +19,7 @@ static XMLParser parser; struct node { char tag[256]; - int isignore; +/* int isignore;*/ int ispre; int isinline; int isblock; @@ -61,6 +61,7 @@ static char *inlinetags[] = { "span", "img", "td", + "th", }; static char *blocktags[] = { @@ -178,7 +179,7 @@ xmldataend(XMLParser *p) if (cur->ispre) { fwrite(s, 1, e - s, stdout); } else { -#if 0 +#if 1 for (; s < e; s++) { if (isspace(*s)) { if (s != start && !isspace(s[-1])) @@ -190,7 +191,7 @@ xmldataend(XMLParser *p) if (s != start && e != start && !isspace(s[-1]) && isspace(e[-1])) putchar(' '); #endif - printf("DEBUG: |%s|\n", start); +/* printf("DEBUG: |%s|\n", start);*/ } string_clear(&htmldata); @@ -203,8 +204,8 @@ xmldata(XMLParser *p, const char *data, size_t datalen) cur = &nodes[curnode]; string_append(&htmldata, data, datalen); - if (cur->isignore) - return; +/* if (cur->isignore) + return;*/ } static void @@ -232,12 +233,14 @@ xmltagstart(XMLParser *p, const char *tag, size_t taglen) src[0] = '\0'; /* src, href */ strlcpy(cur->tag, tag, sizeof(cur->tag)); +#if 0 for (i = 0; i < sizeof(ignoretags) / sizeof(*ignoretags); i++) { if (!strcmp(ignoretags[i], tag)) { cur->isignore = 1; break; } } +#endif for (i = 0; i < sizeof(pretags) / sizeof(*pretags); i++) { if (!strcmp(pretags[i], tag)) { cur->ispre = 1; @@ -267,8 +270,8 @@ xmltagend(XMLParser *p, const char *tag, size_t taglen, int isshort) if (curnode) curnode--; cur = &nodes[curnode]; - if (cur->isignore) - return; +/* if (cur->isignore) + return;*/ #if 0 if (src[0]) @@ -300,13 +303,13 @@ xmltagstartparsed(XMLParser *p, const char *tag, size_t taglen, int isshort) int i; cur = &nodes[curnode]; - if (cur->isignore) - return; +/* if (cur->isignore) + return;*/ if (cur->isblock) fputs("\n", stdout); - if (!strcmp(tag, "td")) + if (!strcmp(tag, "td") || !strcmp(tag, "th")) fputs(" | ", stdout); /* HACK */ if (!strcmp(cur->tag, "li")) { @@ -343,11 +346,13 @@ xmlattr(XMLParser *p, const char *tag, size_t taglen, const char *name, strlcpy(src, value, sizeof(src)); } -static size_t read_offset; +/*static size_t readoffset;*/ int readchar(void) { + return getchar(); +#if 0 size_t i, j; int c; @@ -366,6 +371,7 @@ readchar(void) return preprocess.data[read_offset++]; } return EOF; +#endif } /* TODO: preprocess data, strip <script>, <style> etc */ @@ -379,6 +385,7 @@ main(void) if (pledge("stdio", NULL) < 0) err(1, "pledge"); +#if 0 /* TODO: optimize later */ while (1) { /* TODO: check read error */ @@ -388,6 +395,7 @@ main(void) buf[n] = '\0'; string_append(&preprocess, buf, n); } +#endif parser.xmlattr = xmlattr; parser.xmlcdata = xmlcdata;