webdump

Unnamed repository; edit this file 'description' to name the repository.
git clone git://git.z3bra.org/webdump.git
Log | Files | Refs | README | LICENSE

commit 6437b1c9d5dd27a1e29e10bda42264127383281e
parent dcc69463abb4a70f95b6126629e5d6ab57e393e3
Author: Hiltjo Posthuma <hiltjo@codemadness.org>
Date:   Sat, 22 Jul 2017 15:14:02 +0200

simplify tag type matching, add nested list-item support...

... indicate page headers (will be improved).

Diffstat:
main.c | 90++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-----------
1 file changed, 78 insertions(+), 12 deletions(-)

diff --git a/main.c b/main.c @@ -27,6 +27,35 @@ struct node { static struct node nodes[MAX_DEPTH]; static int curnode; +static char *inlinetags[] = { + "b", + "i", + "u", + "strong", + "em", + "a", + "span", + "img", +}; + +static char *blocktags[] = { + "h1", + "h2", + "h3", + "h4", + "h5", + "h6", + "p", + "ul", + "lo", + "li", + "hr", + "br", + "title", + "tr", + "table", +}; + static void printindent(int count) { @@ -133,20 +162,27 @@ static void xmltagstart(XMLParser *p, const char *tag, size_t taglen) { struct node *cur = &nodes[curnode]; + int i; memset(cur, 0, sizeof(*cur)); strlcpy(cur->tag, tag, sizeof(cur->tag)); - if (!strcmp(tag, "pre")) + if (!strcmp(tag, "pre")) { cur->ispre = 1; - else if (tag[0] == 'h' && tag[1] >= '1' && tag[1] <= '6' && tag[2] == '\0' || - !strcmp(tag, "p") || !strcmp(tag, "ul") || !strcmp(tag, "ol") || - !strcmp(tag, "li") || !strcmp(tag, "hr") || - !strcmp(tag, "br") || !strcmp(tag, "title") || !strcmp(tag, "tr") || - !strcmp(tag, "table")) - cur->isblock = 1; - else if (!strcmp(tag, "a") || !strcmp(tag, "span") || !strcmp(tag, "img")) - cur->isinline = 1; + } else { + for (i = 0; i < sizeof(blocktags) / sizeof(*blocktags); i++) { + if (!strcmp(blocktags[i], tag)) { + cur->isblock = 1; + break; + } + } + for (i = 0; i < sizeof(inlinetags) / sizeof(*inlinetags); i++) { + if (!strcmp(inlinetags[i], tag)) { + cur->isinline = 1; + break; + } + } + } if (!cur->isinline) printindent(curnode); @@ -158,6 +194,7 @@ static void xmltagend(XMLParser *p, const char *tag, size_t taglen, int isshort) { struct node *cur; + int i; if (curnode) curnode--; @@ -170,22 +207,46 @@ xmltagend(XMLParser *p, const char *tag, size_t taglen, int isshort) if (!cur->isinline) printindent(curnode); /* printf("</%s>", tag);*/ + + if (cur->isblock) fputs("\n", stdout); + + if (taglen == 2 && tag[0] == 'h' && tag[1] >= '1' && tag[1] <= '6') { + if (tag[1] >= '3') + for (i = 0; i < 36; i++) + putchar('-'); + else if (tag[1] >= '1') + for (i = 0; i < 36; i++) + putchar('='); + putchar('\n'); + } } static void xmltagstartparsed(XMLParser *p, const char *tag, size_t taglen, int isshort) { struct node *cur; + int i; cur = &nodes[curnode]; if (cur->isblock) putchar('\n'); - if (!strcmp(cur->tag, "li")) + if (!strcmp(cur->tag, "li")) { + /* indent nested list items */ + for (i = curnode; i; i--) { + if (!strcmp(nodes[i].tag, "li")) + continue; + if (!strcmp(nodes[i].tag, "ul") || + !strcmp(nodes[i].tag, "ol")) { + fputs(" ", stdout); + } + } fputs("* ", stdout); - else if (!strcmp(cur->tag, "hr")) - fputs("----------", stdout); + } else if (!strcmp(cur->tag, "hr")) { + for (i = 0; i < 36; i++) + putchar('-'); + } if (isshort) return; @@ -202,6 +263,11 @@ xmlattr(XMLParser *p, const char *tag, size_t taglen, const char *name, /* if (!strcmp(tag, "a") && !strcmp(name, "href") && valuelen) printf(" [%s]", value);*/ + /* TODO: check alt and title attr also? */ +/* if (!strcmp(tag, "img") && !strcmp(name, "src") && valuelen) { + printf(" [%s]", value); + }*/ + /* printf(" %s=\"%s\"", name, value);*/ }