webdump

Unnamed repository; edit this file 'description' to name the repository.
git clone git://git.z3bra.org/webdump.git
Log | Files | Refs | README | LICENSE

commit d87d026a246edadd201b607c15881172ac2564f1
parent 9ac2648a64f0b2d125da2a39ed8e8f4ff2e234b4
Author: Hiltjo Posthuma <hiltjo@codemadness.org>
Date:   Sat, 21 Sep 2019 15:41:34 +0200

refactor display type handling

Diffstat:
main.c | 130+++++++++++++++++++++++++++++++++++++++----------------------------------------
1 file changed, 64 insertions(+), 66 deletions(-)

diff --git a/main.c b/main.c @@ -32,11 +32,18 @@ static struct linkref *links_cur; static int linkcount; #endif +enum DisplayType { + DisplayInline = 1, + DisplayPre = 2, + DisplayInlineBlock = 4, + DisplayBlock = 8, + DisplayListItem = 16, + DisplayTableCell = 32, +}; + struct node { char tag[256]; - int ispre; - int isinline; - int isblock; + enum DisplayType displaytype; }; typedef struct node Node; @@ -63,43 +70,45 @@ static int curnode; atleast: inline-block, inline, block, pre */ static int ignoredata; -static char *pretags[] = { - "pre", - "code", -}; - -static char *inlinetags[] = { - "b", - "i", - "u", - "strong", - "em", - "a", - "span", - "img", - "td", - "th", - "label", -}; - -static char *blocktags[] = { - "h1", - "h2", - "h3", - "h4", - "h5", - "h6", - "p", - "ul", - "lo", - "li", - "hr", - "br", - "title", - "tr", - "table", - "blockquote", - "div", +static struct { + char *tag; + enum DisplayType displaytype; +} tags[] = { + /* pre */ + { "pre", DisplayPre }, + { "code", DisplayPre }, + /* inline */ + { "b", DisplayInline }, + { "i", DisplayInline }, + { "u", DisplayInline }, + { "strong", DisplayInline }, + { "em", DisplayInline }, + { "a", DisplayInline }, + { "span", DisplayInline }, + { "img", DisplayInline }, + { "label", DisplayInline }, + /* table-cell */ + { "td", DisplayTableCell }, + { "th", DisplayTableCell }, + /* list-item */ + { "li", DisplayListItem }, + /* block */ + { "h1", DisplayBlock }, + { "h2", DisplayBlock }, + { "h3", DisplayBlock }, + { "h4", DisplayBlock }, + { "h5", DisplayBlock }, + { "h6", DisplayBlock }, + { "p", DisplayBlock }, + { "ul", DisplayBlock }, + { "lo", DisplayBlock }, + { "hr", DisplayBlock }, + { "br", DisplayBlock }, + { "title", DisplayBlock }, + { "tr", DisplayBlock }, + { "table", DisplayBlock }, + { "blockquote", DisplayBlock }, + { "div", DisplayBlock }, }; static String htmldata; @@ -389,6 +398,7 @@ xmldataend(XMLParser *p) s = start; e = s + strlen(s); #else + /* TODO: white-space handling */ for (s = start; *s; s++) { if (*s != '\r' && *s != '\n') break; @@ -400,10 +410,9 @@ xmldataend(XMLParser *p) } #endif - if (cur->ispre) { + if (cur->displaytype & DisplayPre) { fwrite(s, 1, e - s, stdout); } else { -#if 1 for (; s < e; s++) { if (isspace((unsigned char)*s)) { if (s != start && !isspace((unsigned char)s[-1])) @@ -416,8 +425,6 @@ xmldataend(XMLParser *p) if (s != start && e != start && !isspace((unsigned char)s[-1]) && isspace((unsigned char)e[-1])) putchar(' '); -#endif -/* printf("DEBUG: |%s|\n", start);*/ } string_clear(&htmldata); @@ -469,21 +476,9 @@ xmltagstart(XMLParser *x, const char *t, size_t tl) else if (!strcasecmp(t, "td") || !strcasecmp(t, "th")) ignoredata = 0; - for (i = 0; i < sizeof(pretags) / sizeof(*pretags); i++) { - if (!strcasecmp(pretags[i], t)) { - cur->ispre = 1; - break; - } - } - for (i = 0; i < sizeof(blocktags) / sizeof(*blocktags); i++) { - if (!strcasecmp(blocktags[i], t)) { - cur->isblock = 1; - break; - } - } - for (i = 0; i < sizeof(inlinetags) / sizeof(*inlinetags); i++) { - if (!strcasecmp(inlinetags[i], t)) { - cur->isinline = 1; + for (i = 0; i < sizeof(tags) / sizeof(*tags); i++) { + if (!strcasecmp(tags[i].tag, t)) { + cur->displaytype |= tags[i].displaytype; break; } } @@ -507,7 +502,7 @@ xmltagend(XMLParser *p, const char *t, size_t tl, int isshort) ignoredata = 0; } - if (cur->isblock) + if (cur->displaytype & DisplayBlock) fputs("\n", stdout); if (tl == 2 && t[0] == 'h' && t[1] >= '1' && t[1] <= '6') { @@ -546,6 +541,8 @@ xmltagstartparsed(XMLParser *p, const char *t, size_t tl, int isshort) cur = &nodes[curnode]; + if (cur->displaytype & DisplayBlock) + fputs("\n", stdout); #if 0 /* show links as reference at the bottom */ if (src[0]) { @@ -569,15 +566,16 @@ xmltagstartparsed(XMLParser *p, const char *t, size_t tl, int isshort) char absurl[1024]; if (absuri(absurl, sizeof(absurl), src, basehref) != -1) { if (!strcasecmp(t, "img") || !strcasecmp(t, "video") || - !strcasecmp(t, "audio")) { - printf("[%s](%s) ", t, absurl); - } else { - printf("[%s](%s) ", "link", absurl); - } + !strcasecmp(t, "audio")) + printf("[%s](", t); + else + printf("[%s](", "link"); + printsafe(absurl); + putchar(')'); } } - if (cur->isblock) + if (cur->displaytype & DisplayBlock) fputs("\n", stdout); if (!strcasecmp(t, "td") || !strcasecmp(t, "th"))