webdump

Unnamed repository; edit this file 'description' to name the repository.
git clone git://git.z3bra.org/webdump.git
Log | Files | Refs | README | LICENSE

commit e4a9e2404be2db1687430631e912f1809992a23b
parent 4fea38d503fe668601fef6ffc9b21c94e24c542e
Author: Hiltjo Posthuma <hiltjo@codemadness.org>
Date:   Sat, 21 Sep 2019 19:14:40 +0200

hide tag if DisplayNone, separate white-space handling

Diffstat:
TODO | 2+-
main.c | 19+++++++++----------
2 files changed, 10 insertions(+), 11 deletions(-)

diff --git a/TODO b/TODO @@ -1,10 +1,10 @@ - base href. specify and parse relative url, allow to specify base and also parse <base href=""> +- handle whitespace, and tag types properly: atleast: inline-block, inline, block, pre. - detect <link /> to RSS/Atom feed, show as link. example: <link rel="alternate" href="atom.xml" type="application/atom+xml" title="Codemadness Atom Feed" /> or <link rel="alternate" title="Tweakers Mixed RSS feed" type="application/rss+xml" href="https://tweakers.net/feeds/mixed.xml"> -- handle whitespace, and tag types properly: atleast: inline-block, inline, block, pre - print safe (not certain control chars, except newline, TAB etc). - improve/remove duplicate white-space/newlines? - <code> should not be treated as a block (<pre> does?) diff --git a/main.c b/main.c @@ -390,7 +390,6 @@ absuri(char *buf, size_t bufsiz, const char *link, const char *base) return encodeuri(buf, bufsiz, tmp); } - static void xmlcdata(XMLParser *p, const char *data, size_t datalen) { @@ -409,19 +408,19 @@ xmldataend(XMLParser *p) struct node *cur; char *start, *s, *e; - cur = &nodes[curnode]; if (!htmldata.data || !htmldata.len) return; - start = htmldata.data; - - /* TODO: white-space handling */ - s = start; - e = s + strlen(s); - - if (cur->displaytype & DisplayPre) { - fwrite(s, 1, e - s, stdout); + cur = &nodes[curnode]; + if (cur->displaytype & DisplayNone) { + /* nothing */ + } else if (cur->displaytype & DisplayPre) { + fwrite(htmldata.data, 1, htmldata.len, stdout); } else { + start = htmldata.data; + s = start; + e = s + htmldata.len; + /* TODO: better white-space handling */ for (; s < e; s++) { if (isspace((unsigned char)*s)) { if (s != start && !isspace((unsigned char)s[-1]))