webdump

Fork of git://git.codemadness.org/webdump
git clone git://git.z3bra.org/webdump.git
Log | Files | Refs | README | LICENSE

commit 0f33b39502a125c0761ecbec28e478392cd33a43
parent 3d97a618cb1a5740c8356fda3060f95d9610430d
Author: Willy Goiffon <dev@z3bra.org>
Date:   Wed,  9 Oct 2019 12:28:07 +0200

Ignore white spaces following <br> tags

Diffstat:
webdump.c | 14+++++++++++---
1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/webdump.c b/webdump.c @@ -81,7 +81,7 @@ static char src[4096]; /* src or href attribute */ #define MAX_DEPTH 256 static struct node nodes[MAX_DEPTH]; -static int curnode; +static int curnode, prvnode; static struct { char *tag; @@ -416,7 +416,7 @@ xmlcdata(XMLParser *p, const char *data, size_t datalen) static void xmldataend(XMLParser *p) { - struct node *cur; + struct node *cur, *prv; char *start, *s, *e; // printf("DEBUG: %s\n", __func__); @@ -425,6 +425,7 @@ xmldataend(XMLParser *p) return; cur = &nodes[curnode]; + prv = &nodes[prvnode]; // printf("DEBUG: node: %s, type: %d\n", cur->tag, cur->displaytype); @@ -436,6 +437,11 @@ xmldataend(XMLParser *p) start = htmldata.data; e = htmldata.data + htmldata.len; + /* ignore whitespaces following a <br> tag */ + if (!strcasecmp(prv->tag, "br")) + while (isspace(*start) && start != e) + start++; + /* TODO: better white-space handling, for example if there is only white-space between 2 block elements then it can be ignored. */ for (s = start; s < e; s++) { @@ -522,6 +528,7 @@ xmltagstart(XMLParser *x, const char *t, size_t tl) if (!strcasecmp(cur->tag, "em")) printf("\033[3m"); if (!strcasecmp(cur->tag, "u")) printf("\033[4m"); if (!strcasecmp(cur->tag, "br")) { + prvnode = curnode; curnode--; printf("\n"); } @@ -567,10 +574,11 @@ xmltagend(XMLParser *p, const char *t, size_t tl, int isshort) if (!strcasecmp(t, "em")) fputs("\033[0m", stdout); if (!strcasecmp(t, "u")) fputs("\033[0m", stdout); } else if (!strcasecmp(t, "br")) { + prvnode = curnode; + curnode--; fputs("\n", stdout); } - curnode--; } static void