phroxy

Gopher to HTTP proxy
git clone git://git.z3bra.org/phroxy.git
Log | Files | Refs | LICENSE

phroxy.c (9212B)


      1 #include <err.h>
      2 #include <errno.h>
      3 #include <limits.h>
      4 #include <netdb.h>
      5 #include <signal.h>
      6 #include <stdio.h>
      7 #include <stdlib.h>
      8 #include <string.h>
      9 #include <time.h>
     10 #include <unistd.h>
     11 
     12 #include <sys/types.h>
     13 #include <sys/socket.h>
     14 #include <sys/socket.h>
     15 
     16 /* supported items */
     17 enum {
     18         ITEM_0,
     19         ITEM_1,
     20         ITEM_2,
     21         ITEM_3,
     22         ITEM_4,
     23         ITEM_5,
     24         ITEM_6,
     25         ITEM_7,
     26         ITEM_8,
     27         ITEM_9,
     28         ITEM_I,
     29         ITEM_g,
     30         ITEM_h,
     31         ITEM_s,
     32 };
     33 
     34 #include "config.h"
     35 
     36 void *
     37 xreallocarray(void *m, const size_t n, const size_t s)
     38 {
     39 	void *nm;
     40 
     41 	if (n == 0 || s == 0) {
     42 		free(m);
     43 		return NULL;
     44 	}
     45 	if (s && n > (size_t)-1/s)
     46 		errx(1, "realloc: overflow");
     47 	if (!(nm = realloc(m, n * s)))
     48 		errx(1, "realloc: %s", strerror(errno));
     49 
     50 	return nm;
     51 }
     52 
     53 
     54 static int
     55 connectto(const char *host, const char *port)
     56 {
     57 	sigset_t set, oset;
     58 	static const struct addrinfo hints = {
     59 	    .ai_family = AF_UNSPEC,
     60 	    .ai_socktype = SOCK_STREAM,
     61 	    .ai_protocol = IPPROTO_TCP,
     62 	};
     63 	struct addrinfo *addrs, *addr;
     64 	int r, sock = -1;
     65 
     66 	sigemptyset(&set);
     67 	sigaddset(&set, SIGWINCH);
     68 	sigprocmask(SIG_BLOCK, &set, &oset);
     69 
     70 	if ((r = getaddrinfo(host, port, &hints, &addrs))) {
     71 		fprintf(stderr, "Can't resolve hostname \"%s\": %s\n", host, gai_strerror(r));
     72 		goto err;
     73 	}
     74 
     75 	for (addr = addrs; addr; addr = addr->ai_next) {
     76 		if ((sock = socket(addr->ai_family, addr->ai_socktype,
     77 		                   addr->ai_protocol)) < 0)
     78 			continue;
     79 		if ((r = connect(sock, addr->ai_addr, addr->ai_addrlen)) < 0) {
     80 			close(sock);
     81 			continue;
     82 		}
     83 		break;
     84 	}
     85 
     86 	freeaddrinfo(addrs);
     87 
     88 	if (sock < 0) {
     89 		fprintf(stderr, "Can't open socket: %s\n", strerror(errno));
     90 		goto err;
     91 	}
     92 	if (r < 0) {
     93 		fprintf(stderr, "Can't connect to: %s:%s: %s\n", host, port, strerror(errno));
     94 		goto err;
     95 	}
     96 
     97 	sigprocmask(SIG_SETMASK, &oset, NULL);
     98 	return sock;
     99 
    100 err:
    101 	sigprocmask(SIG_SETMASK, &oset, NULL);
    102 	return -1;
    103 }
    104 
    105 int
    106 sendselector(int sock, const char *selector, const char *search)
    107 {
    108 	char *msg, *p;
    109 	char *fmt = "%s\r\n";
    110 	size_t ln;
    111 	ssize_t n;
    112 
    113 	ln = strlen(selector) + 3;
    114 	if (search) {
    115 		fmt = "%s\t%s\r\n";
    116 		ln += strlen(search) + 1;
    117 	}
    118 
    119 	msg = p = malloc(ln);
    120 	snprintf(msg, ln--, fmt, selector, search);
    121 
    122 	while ((n = write(sock, p, ln)) > 0) {
    123 		ln -= n;
    124 		p += n;
    125 	}
    126 
    127 	free(msg);
    128 	if (n == -1)
    129 		fprintf(stderr, "Can't send message: %s\n", strerror(errno));
    130 
    131 	return n;
    132 }
    133 
    134 static char
    135 hex2bin(const unsigned char *in)
    136 {
    137   int out;
    138 
    139   if (*in == '%')
    140     in++;
    141 
    142   if ('A' <= in[0] && in[0] <= 'F') out  = 16 * (in[0] - 'A' + 10);
    143   if ('0' <= in[0] && in[0] <= '9') out  = 16 * (in[0] - '0');
    144 
    145   if ('A' <= in[1] && in[1] <= 'F') out += (in[1] - 'A' + 10);
    146   if ('0' <= in[1] && in[1] <= '9') out += (in[1] - '0');
    147 
    148   return out;
    149 }
    150 
    151 char *
    152 urldec(char *search)
    153 {
    154 	char *msg, *p;
    155 
    156 	if (!search)
    157 		return NULL;
    158 
    159 	msg = p = search;
    160 	for (p = msg; *p != '\0'; msg++, p++) {
    161 		switch(*p) {
    162 		case '+':
    163 			*msg = ' ';
    164 			break;
    165 		case '%':
    166 			*msg = hex2bin((unsigned char *)p);
    167 			p += 2;
    168 			break;
    169 		default:
    170 			*msg = *p;
    171 		}
    172 	}
    173 	*msg = '\0';
    174 
    175 	return search;
    176 }
    177 
    178 
    179 char *
    180 getrawitem(int sock, size_t *sz)
    181 {
    182 	char *raw, *buf;
    183 	size_t bn, bs;
    184 	ssize_t n;
    185 
    186 	raw = buf = NULL;
    187 	bn = bs = n = 0;
    188 
    189 	do {
    190 		bs -= n;
    191 		buf += n;
    192 		if (bs < 1) {
    193 			raw = xreallocarray(raw, ++bn, BUFSIZ);
    194 			buf = raw + (bn-1) * BUFSIZ;
    195 			bs = BUFSIZ;
    196 		}
    197 	} while ((n = read(sock, buf, bs)) > 0);
    198 
    199 	*buf = '\0';
    200 
    201 	if (sz)
    202 		*sz = buf - raw;
    203 
    204 	if (n < 0) {
    205 		fprintf(stderr, "Can't read socket: %s\n", strerror(errno));
    206 		free(raw);
    207 	}
    208 
    209 	return raw;
    210 }
    211 
    212 void
    213 printhttp(int code)
    214 {
    215 	switch (code) {
    216 	case 400: printf("HTTP/1.1 400 That's Illegal\r\n"); break;
    217 	case 404: printf("HTTP/1.1 404 Google Broke The Web\r\n"); break;
    218 	case 405: printf("HTTP/1.1 405 Don't Do That\r\n"); break;
    219 	case 415: printf("HTTP/1.1 415 Gopher Type Not Handled\r\n"); break;
    220 	case 500: printf("HTTP/1.1 500 You Broke The Web\r\n"); break;
    221 	}
    222 	printf("\r\n");
    223 }
    224 
    225 char *
    226 contenttype(char i, char *path)
    227 {
    228 	static char *ext, type[12];
    229 
    230 	switch(i) {
    231 	case '0':
    232 	case '1':
    233 	case '7':
    234 	case 'h':
    235 		return "text/html; charset=utf-8";
    236 		break; /* NOTREACHED */
    237 	case '6':
    238 		return "text/x-uuencode";
    239 		break; /* NOTREACHED */
    240 	case '4':
    241 	case '5':
    242 	case '9':
    243 		return "application/octet-stream";
    244 		break; /* NOTREACHED */
    245 	case 'I':
    246 		if (!(ext = strrchr(path, '.')))
    247 			return "image/*";
    248 
    249 		/* assume 4 chars max for extension */
    250 		snprintf(type, 11, "image/%s", ext + 1);
    251 		return type;
    252 		break; /* NOTREACHED */
    253 	case 'g':
    254 		return "image/gif";
    255 		break; /* NOTREACHED */
    256 	}
    257 
    258 	return NULL;
    259 }
    260 
    261 const char *
    262 itemname(char i)
    263 {
    264 	switch(i) {
    265 	case '0': return items[ITEM_0];
    266 	case '1': return items[ITEM_1];
    267 	case '2': return items[ITEM_2];
    268 	case '3': return items[ITEM_3];
    269 	case '4': return items[ITEM_4];
    270 	case '5': return items[ITEM_5];
    271 	case '6': return items[ITEM_6];
    272 	case '7': return items[ITEM_7];
    273 	case '8': return items[ITEM_8];
    274 	case '9': return items[ITEM_9];
    275 	case 'I': return items[ITEM_I];
    276 	case 'g': return items[ITEM_g];
    277 	case 'h': return items[ITEM_h];
    278 	case 's': return items[ITEM_s];
    279 	}
    280 
    281 	return NULL;
    282 }
    283 
    284 
    285 void
    286 printheaders(char *ctype)
    287 {
    288 	time_t t;
    289 
    290 	t = time(NULL);
    291 	if (t > 0)
    292 		printf("Date: %s", asctime(gmtime(&t)));
    293 	if (ctype)
    294 		printf("Content-Type: %s\r\n", ctype);
    295 	printf("Server: phroxy\r\n");
    296 	printf("Host: %s\r\n", http_host);
    297 	printf("Connection: close\r\n");
    298 }
    299 
    300 int
    301 printmenu(int fd, char *data)
    302 {
    303 	char i, *p, a[LINE_MAX], *f[4];
    304 	char *ifmt = "<div class='item'><span> </span><code>%s</code></div>\n";
    305 	char *afmt = "<div class='item'><span>%s</span><a href='http://%s/%s:%s/%c%s'>%s</a></div>\n";
    306 	char *sfmt = "<div class='item'><span>%s</span><details><summary>%s</summary><form method='get' action='http://%s/%s:%s/%c%s'><input type='text' name='q'></form></details></div>\n";
    307 
    308 	p = data;
    309 
    310 	while((p = strsep(&data, "\n"))) {
    311 		i = *p++;
    312 		if (i == '.')
    313 			break;
    314 
    315 		f[0] = strsep(&p, "\t");
    316 		f[1] = strsep(&p, "\t");
    317 		f[2] = strsep(&p, "\t");
    318 		f[3] = strsep(&p, "\r");
    319 		if (!f[1])
    320 			continue;
    321 
    322 		switch(i) {
    323 		case 'i':
    324 			snprintf(a, sizeof(a), ifmt, f[0]);
    325 			break;
    326 		case '7':
    327 			snprintf(a, sizeof(a), sfmt, itemname(i), f[0], http_host, f[2], f[3], i, f[1]);
    328 			break;
    329 		default:
    330 			snprintf(a, sizeof(a), afmt, itemname(i), http_host, f[2], f[3], i, f[1], f[0]);
    331 		}
    332 
    333 		write(fd, a, strlen(a));
    334 	}
    335 
    336 	return 0;
    337 }
    338 
    339 int
    340 printhtml(int fd, const char *data, size_t len)
    341 {
    342 	size_t r, n;
    343 	const char *s, *e, *x;
    344 
    345 	write(fd, "<pre>", 5);
    346 
    347 	for (n = 0; n < len; n++) {
    348 
    349 		s = data + n;
    350 
    351 		/* escape XML characters */
    352 		x = NULL;
    353 		switch (*s) {
    354 		case '&': x = x ? x : "&amp;"; /* FALLTHROUGH */
    355 		case '<': x = x ? x : "&lt;";  /* FALLTHROUGH */
    356 		case '>': x = x ? x : "&gt;";  /* FALLTHROUGH */
    357 			write(fd, x, strlen(x));
    358 			break;
    359 		default:
    360 			e = strpbrk(s, "&<>");
    361 			r = e ? (size_t)(e - s) : len - n;
    362 			if (r) {
    363 				write(fd, s, r);
    364 				n += r - 1;
    365 			}
    366 		}
    367 	}
    368 	write(fd, "</pre>\n", 7);
    369 	return 0;
    370 }
    371 
    372 int
    373 serveitem(char item, char *path, char *data, size_t len)
    374 {
    375 	char *send;
    376 	int sent;
    377 
    378 
    379 	if (!contenttype(item, path)) {
    380 		printhttp(415);
    381 		return 1;
    382 	}
    383 
    384 	printf("HTTP/1.1 200 OK\r\n");
    385 	printheaders(contenttype(item, path));
    386 
    387 	switch(item) {
    388 	case '7': // search
    389 	case '1': // menu
    390 	case '0': // text
    391 		printf("\r\n");
    392 		fflush(stdout);
    393 		write(1, head, strlen(head));
    394 		if (item == '1' || item == '7') printmenu(1, data);
    395 		if (item == '0') printhtml(1, data, len);
    396 		write(1, foot, strlen(foot));
    397 		break;
    398 
    399 	case '4': // BinHexed Macintosh file
    400 	case '5': // DOS binary archive of some sort
    401 	case '6': // uuencoded
    402 	case '9': // binary
    403 	case 'g': // gif
    404 	case 'I': // image
    405 	case 'h': // http redirect
    406 		printf("Content-Length: %ld\r\n", len);
    407 		printf("\r\n");
    408 		fflush(stdout);
    409 		send = data;
    410 		while (len > 0) {
    411 			if ((sent = write(1, send, len)) < 0)
    412 				return 1;
    413 			len -= sent;
    414 			send += sent;
    415 		}
    416 		break;
    417 
    418 	case '2': // CSO phone-book server
    419 	case '3': // Error
    420 	case '8': // telnet session.
    421 	case 'T': // tn3270 session.
    422 	case '+': // mirror link
    423 	default:
    424 		/* IGNORE */
    425 		break;
    426 	}
    427 
    428 	free(data);
    429 	fflush(stdout);
    430 
    431 	return 0;
    432 }
    433 
    434 int
    435 phroxy(char *url)
    436 {
    437 	int sock;
    438 	size_t len;
    439 	char item = 0;
    440 	char *hole, *path, *host, *port;
    441 	char *data = NULL, *srch = NULL;
    442 
    443 	url++;
    444 	hole = strsep(&url, "/");
    445 	if (!hole || !strnlen(hole, 1))
    446 		hole = default_hole;
    447 
    448 	host = strsep(&hole, ":");
    449 	port = strsep(&hole, "\0");
    450 	if (!port)
    451 		port = "70";
    452 
    453 	if (url)
    454 		item = *url++;
    455 
    456 	if (!item)
    457 		item = '1';
    458 
    459 	path = strsep(&url, "\0");
    460 	if (!path || *path == '\0')
    461 		path = "/";
    462 
    463 	if((srch = strchr(path, '?'))) {
    464 		*srch = '\0';
    465 		srch += 3; /* go past "?q=" in URL, to fetch actual query */
    466 	}
    467 
    468 	if ((sock = connectto(host, port)) < 0) {
    469 		printhttp(500);
    470 		return 1;
    471 	}
    472 
    473 	if (!sendselector(sock, path, urldec(srch)))
    474 		data = getrawitem(sock, &len);
    475 
    476 	close(sock);
    477 
    478 	if (!data) {
    479 		printhttp(444);
    480 		return 1;
    481 	}
    482 
    483 	serveitem(item, path, data, len);
    484 
    485 	return 0;
    486 }
    487 
    488 int
    489 main(void)
    490 {
    491 	ssize_t rlen;
    492 	char request[512], *url;
    493 
    494 	rlen = read(0, request, sizeof(request) - 1);
    495 	if (rlen < 0)
    496 		return 1;
    497 
    498 	request[rlen] = '\0';
    499 
    500 	if (strncmp(request, "GET ", 4)) {
    501 		printhttp(405);
    502 		return 1;
    503 	}
    504 
    505 	url = strtok(request + 4, " ");
    506 
    507 	return phroxy(url);
    508 }