webdump

Unnamed repository; edit this file 'description' to name the repository.
git clone git://git.z3bra.org/webdump.git
Log | Files | Refs | README | LICENSE

commit db8bb6f1a0e1ca29e8cdfd8a6c098fc1076fea80
parent 6437b1c9d5dd27a1e29e10bda42264127383281e
Author: Hiltjo Posthuma <hiltjo@codemadness.org>
Date:   Sat, 22 Jul 2017 15:29:22 +0200

simplify pre-like tags parsing, support some basic entities

remove comment handling

Diffstat:
main.c | 105++++++++++++++++++++++++++++---------------------------------------------------
1 file changed, 37 insertions(+), 68 deletions(-)

diff --git a/main.c b/main.c @@ -27,6 +27,11 @@ struct node { static struct node nodes[MAX_DEPTH]; static int curnode; +static char *pretags[] = { + "pre", + "code", +}; + static char *inlinetags[] = { "b", "i", @@ -36,6 +41,7 @@ static char *inlinetags[] = { "a", "span", "img", + "td", }; static char *blocktags[] = { @@ -57,35 +63,9 @@ static char *blocktags[] = { }; static void -printindent(int count) -{ -/* while (count--) - putchar('\t');*/ -} - -static void -xmlcommentstart(XMLParser *p) -{ - /*printf("<!--");*/ -} - -static void -xmlcomment(XMLParser *p, const char *data, size_t datalen) -{ - /*printf("%s", data);*/ -} - -static void -xmlcommentend(XMLParser *p) -{ - /*printf("-->");*/ -} - -static void xmlcdatastart(XMLParser *p) { iscdatastart = 1; -/* printf("<![CDATA[");*/ } static void @@ -99,7 +79,6 @@ xmlcdata(XMLParser *p, const char *data, size_t datalen) static void xmlcdataend(XMLParser *p) { -/* printf("]]>");*/ iscdatastart = 0; } @@ -152,10 +131,16 @@ xmldata(XMLParser *p, const char *data, size_t datalen) static void xmldataentity(XMLParser *p, const char *data, size_t datalen) { - /* TODO: convert HTML entity */ - /*printf("%s", data);*/ - - xmldata(p, data, datalen); + char buf[16]; + int n; + + /* convert basic XML entities */ + /* TODO: support some more HTML entities */ + n = xml_entitytostr(data, buf, sizeof(buf)); + if (n <= 0) + xmldata(p, data, datalen); + else + fputs(buf, stdout); } static void @@ -167,27 +152,24 @@ xmltagstart(XMLParser *p, const char *tag, size_t taglen) memset(cur, 0, sizeof(*cur)); strlcpy(cur->tag, tag, sizeof(cur->tag)); - if (!strcmp(tag, "pre")) { - cur->ispre = 1; - } else { - for (i = 0; i < sizeof(blocktags) / sizeof(*blocktags); i++) { - if (!strcmp(blocktags[i], tag)) { - cur->isblock = 1; - break; - } + for (i = 0; i < sizeof(pretags) / sizeof(*pretags); i++) { + if (!strcmp(pretags[i], tag)) { + cur->ispre = 1; + break; } - for (i = 0; i < sizeof(inlinetags) / sizeof(*inlinetags); i++) { - if (!strcmp(inlinetags[i], tag)) { - cur->isinline = 1; - break; - } + } + for (i = 0; i < sizeof(blocktags) / sizeof(*blocktags); i++) { + if (!strcmp(blocktags[i], tag)) { + cur->isblock = 1; + break; + } + } + for (i = 0; i < sizeof(inlinetags) / sizeof(*inlinetags); i++) { + if (!strcmp(inlinetags[i], tag)) { + cur->isinline = 1; + break; } } - - if (!cur->isinline) - printindent(curnode); - -/* printf("<%s", tag);*/ } static void @@ -199,15 +181,7 @@ xmltagend(XMLParser *p, const char *tag, size_t taglen, int isshort) if (curnode) curnode--; - if (isshort) { -/* printf("/>");*/ - return; - } cur = &nodes[curnode]; - if (!cur->isinline) - printindent(curnode); -/* printf("</%s>", tag);*/ - if (cur->isblock) fputs("\n", stdout); @@ -238,22 +212,20 @@ xmltagstartparsed(XMLParser *p, const char *tag, size_t taglen, int isshort) if (!strcmp(nodes[i].tag, "li")) continue; if (!strcmp(nodes[i].tag, "ul") || - !strcmp(nodes[i].tag, "ol")) { + !strcmp(nodes[i].tag, "ol")) fputs(" ", stdout); - } } + /* TODO: for <ol>, keep list counter on ol element (parent), + support ordered number type only */ fputs("* ", stdout); } else if (!strcmp(cur->tag, "hr")) { for (i = 0; i < 36; i++) putchar('-'); } - if (isshort) - return; - + if (curnode >= MAX_DEPTH - 2) + errx(1, "max depth reached: %d\n", curnode); curnode++; - -/* printf(">");*/ } static void @@ -272,7 +244,7 @@ xmlattr(XMLParser *p, const char *tag, size_t taglen, const char *name, } int -main(int argc, char *argv[]) +main(void) { if (pledge("stdio", NULL) < 0) err(1, "pledge"); @@ -281,9 +253,6 @@ main(int argc, char *argv[]) parser.xmlcdatastart = xmlcdatastart; parser.xmlcdata = xmlcdata; parser.xmlcdataend = xmlcdataend; - parser.xmlcommentstart = xmlcommentstart; - parser.xmlcomment = xmlcomment; - parser.xmlcommentend = xmlcommentend; parser.xmldatastart = xmldatastart; parser.xmldata = xmldata; parser.xmldataend = xmldataend;