webdump

Unnamed repository; edit this file 'description' to name the repository.
git clone git://git.z3bra.org/webdump.git
Log | Files | Refs | README | LICENSE

commit 26361ccd0ab0f19276d7727b8f589b1109cfbfd1
parent d22cedcf1a4d6a4066489e029ee2888d76308318
Author: Hiltjo Posthuma <hiltjo@codemadness.org>
Date:   Wed, 12 Sep 2018 22:29:07 +0200

add work-in-progress code, listing of goals/scope and TODO

Diffstat:
LICENSE | 15+++++++++++++++
README | 15+++++++++++++++
TODO | 7+++++++
main.c | 54++++++++++++++++++++++++++++++++++++++++++++----------
4 files changed, 81 insertions(+), 10 deletions(-)

diff --git a/LICENSE b/LICENSE @@ -0,0 +1,15 @@ +ISC License + +Copyright (c) 2017-2018 Hiltjo Posthuma <hiltjo@codemadness.org> + +Permission to use, copy, modify, and/or distribute this software for any +purpose with or without fee is hereby granted, provided that the above +copyright notice and this permission notice appear in all copies. + +THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. diff --git a/README b/README @@ -0,0 +1,15 @@ +Work in progress: + + +text-based webpage viewer + + +Goals: + +- The tool will render a webpage only to stdout, similarly like links -dump or + lynx -dump. +- It will be usable and secure for rendering HTML mails. +- Without the user consent no remote resources or Javascript code will be executed. +- No filesystem access will be required. + +Will use OpenBSD pledge(2). diff --git a/TODO b/TODO @@ -1,3 +1,10 @@ +- improve/remove duplicate white-space/newlines? +- cleanup code. + +=== + - <code> should not be treated as a block (<pre> does?) ? xml.c: make sure to always call xmldata handler even if datalen == 0 ? + +- add links as reference, for example on page: http://absmagazin.de/2018 the MP3 urls. diff --git a/main.c b/main.c @@ -1,5 +1,5 @@ /* TODO: escape control characters */ -/* TODO: specify and parse relative url */ +/* TODO: specify and parse relative url, allow to specify base and also parse <base href=""> ? */ #include <ctype.h> #include <err.h> @@ -16,6 +16,16 @@ static XMLParser parser; +struct linkref { + char *type; + char *url; + struct linkref *next; +}; + +static struct linkref *links_head; +static struct linkref *links_cur; +static int linkcount; + struct node { char tag[256]; int ispre; @@ -173,14 +183,15 @@ xmldataend(XMLParser *p) } else { #if 1 for (; s < e; s++) { - if (isspace(*s)) { - if (s != start && !isspace(s[-1])) + if (isspace((unsigned char)*s)) { + if (s != start && !isspace((unsigned char)s[-1])) putchar(' '); } else { putchar(*s); } } - if (s != start && e != start && !isspace(s[-1]) && isspace(e[-1])) + if (s != start && e != start && !isspace((unsigned char)s[-1]) && + isspace((unsigned char)e[-1])) putchar(' '); #endif /* printf("DEBUG: |%s|\n", start);*/ @@ -254,12 +265,6 @@ xmltagend(XMLParser *p, const char *t, size_t tl, int isshort) curnode--; cur = &nodes[curnode]; -#if 0 - if (src[0]) - printf(" [%s]", src); - src[0] = '\0'; -#endif - if (!strcasecmp(t, "tr")) fputs(" | ", stdout); /* HACK */ @@ -307,6 +312,23 @@ xmltagstartparsed(XMLParser *p, const char *t, size_t tl, int isshort) cur = &nodes[curnode]; +#if 1 + if (src[0]) { + printf(" [%d]", ++linkcount); + if (!strcasecmp(t, "img") || !strcasecmp(t, "video") || + !strcasecmp(t, "audio")) + printf("[%s]", t); + /* TODO: check allocation */ + if (!links_head) + links_cur = links_head = calloc(1, sizeof(*links_head)); + else + links_cur = links_cur->next = calloc(1, sizeof(*links_head)); + links_cur->type = strdup(t); + links_cur->url = strdup(src); + } + src[0] = '\0'; +#endif + if (cur->isblock) fputs("\n", stdout); @@ -348,6 +370,16 @@ xmlattr(XMLParser *p, const char *tag, size_t taglen, const char *name, strlcpy(src, value, sizeof(src)); } +void +printlinkrefs(void) +{ + size_t i; + + /* TODO: add title attribute or some basic description? */ + for (i = 1, links_cur = links_head; links_cur; links_cur = links_cur->next, i++) + printf("[%zu] - %s (%s)\n", i, links_cur->url, links_cur->type); +} + int main(void) { @@ -365,6 +397,8 @@ main(void) parser.getnext = getchar; xml_parse(&parser); + + printlinkrefs(); putchar('\n'); return 0;