view src/html.c @ 312:3069be4c291e

[gaim-migrate @ 322] I don't know why I did this. I have homework due in 15 hours that I haven't started yet, and it's in a language I don't know and it's a project I don't understand. If my teacher knew about this, he would be pissed. He looks pissed all the time, even when he's not. When he smiles he looks devilish. Maybe I only think that because literally half the class flunked the midterm. I am not joking about that. More people got F's than A, B, and C combined. It's 2 am and the homework's due at 5 tomorrow so what do I do? Get chat to work. Wow. That's going to look good on my resume. "Why did you flunk this class?" "Because I was getting chat in Instant Messenger to work." Not that that's not something to be proud of, but I wonder which is more important to employers. The big battle, experience versus education. Just because you got good grades in college doesn't mean you're smarter than someone who flunked, it just means you put in the effort necessary to get a better grade and the other person didn't. Maybe the person who flunked was working on real honest-to-god actually *used* software, as opposed to some stupid tree that only gets used for a fringe branch of computer science that doesn't offer much more than a normal heap or binary search tree offers. Maybe the person was out there reverse-engineering protocols and allowing cross- platform communication to occur, creating interoperability and causing a greater demand not only for the product, but for the platform it runs on! Given the choices, who would you pick? Someone who was told how to code a tree and managed to get it to work, or someone who increases your userbase and marketability? Enough of my rant for a while. I've had waaaaay too much sugar (gummy candy is deadly). committer: Tailor Script <tailor@pidgin.im>
author Eric Warmenhoven <eric@warmenhoven.org>
date Fri, 02 Jun 2000 09:11:48 +0000
parents 29e1669b006b
children b402a23f35df
line wrap: on
line source

/*
 * gaim
 *
 * Copyright (C) 1998-1999, Mark Spencer <markster@marko.net>
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 *
 */

#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/time.h>
#include <unistd.h>
#include <gtk/gtk.h>
#include <gdk/gdkprivate.h>
#include <gdk/gdkx.h>
#include "gaim.h"
#include <sys/types.h>
#include <sys/socket.h>
#include <netdb.h>
#include <netinet/in.h>
#include <fcntl.h>
#include <errno.h>

gchar * strip_html(gchar * text)
{
	int i, j;
	int visible = 1;
	gchar *text2 = g_malloc(strlen(text) + 1);
	
	strcpy(text2, text);
	for (i = 0, j = 0;text2[i]; i++)
	{
		if(text2[i]=='<')
		{	
			visible = 0;
			continue;
		}
		else if(text2[i]=='>')
		{
			visible = 1;
			continue;
		}
		if(visible)
		{
			text2[j++] = text2[i];
		}
	}
	text2[j] = '\0';
	return text2;
}

struct g_url parse_url(char *url)
{
	struct g_url test;
	char scan_info[255];
	char port[5];
	int f;

	if (strstr(url, "http://"))
		g_snprintf(scan_info, sizeof(scan_info), "http://%%[A-Za-z0-9.]:%%[0-9]/%%[A-Za-z0-9.~_-/&%%?]");
	else
		g_snprintf(scan_info, sizeof(scan_info), "%%[A-Za-z0-9.]:%%[0-9]/%%[A-Za-z0-9.~_-/&%%?]");
	f = sscanf(url, scan_info, test.address, port, test.page);
	if (f == 1) {
		if (strstr(url, "http://"))
			g_snprintf(scan_info, sizeof(scan_info), "http://%%[A-Za-z0-9.]/%%[A-Za-z0-9.~_-/&%%?]");
		else
			g_snprintf(scan_info, sizeof(scan_info), "%%[A-Za-z0-9.]/%%[A-Za-z0-9.~_-/&%%?]");
        f = sscanf(url, scan_info, test.address, test.page);
        g_snprintf(port, sizeof(test.port), "80");
        port[2] = 0;
	}
	if (f == 1) {
		if (strstr(url, "http://"))
			g_snprintf(scan_info, sizeof(scan_info), "http://%%[A-Za-z0-9.]");
        else
		g_snprintf(scan_info, sizeof(scan_info), "%%[A-Za-z0-9.]");
		f = sscanf(url, scan_info, test.address);
		g_snprintf(test.page, sizeof(test.page), "%c", '\0');
	}

	sscanf(port, "%d", &test.port);
	return test;
}

char *grab_url(char *url)
{
	struct g_url website;
	char *webdata = NULL;
        int sock;
        int len;
	int read_rv;
	int datalen = 0;
	struct in_addr *host;
	char buf[256];
	char data;
        int startsaving = 0;
        GtkWidget *pw = NULL, *pbar = NULL, *label;

        website = parse_url(url);

	/*
	host = gethostbyname(website.address);
	if (!host) { return g_strdup("g001: Error resolving host\n"); }

	if ((sock = connect_address(inet_addr(host->h_addr), website.port)) <= -1)
		return g_strdup("g003: Error opening connection.\n");
	*/
	host = (struct in_addr *)get_address(website.address);
	if (!host) { return g_strdup("g001: Error resolving host\n"); }
	if ((sock = connect_address(host->s_addr, website.port)) < 0)
		return g_strdup("g003: Error opening connection.\n");

	g_snprintf(buf, sizeof(buf), "GET /%s HTTP/1.0\n\n", website.page);
	g_snprintf(debug_buff, sizeof(debug_buff), "Request: %s\n", buf);
	debug_print(debug_buff);
	write(sock, buf, strlen(buf));
	fcntl(sock, F_SETFL, O_NONBLOCK);

        webdata = NULL;
        len = 0;
	
	/*
	 * avoid fgetc(), it causes problems on solaris
	while ((data = fgetc(sockfile)) != EOF) {
	*/
	/* read_rv will be 0 on EOF and < 0 on error, so this should be fine */
	while ((read_rv = read(sock, &data, 1)) > 0 || errno == EWOULDBLOCK) {
		if (errno == EWOULDBLOCK) {
			errno = 0;
			continue;
		}

		if (!data)
			continue;
		
		if (!startsaving && data == '<') {
#ifdef HAVE_STRSTR
			char *cs = strstr(webdata, "Content-Length");
			if (cs) {
				char tmpbuf[1024];
				sscanf(cs, "Content-Length: %d", &datalen);

                                g_snprintf(tmpbuf, 1024, "Getting %d bytes from %s", datalen, url);
                                pw = gtk_dialog_new();

				label = gtk_label_new(tmpbuf);
				gtk_widget_show(label);
				gtk_box_pack_start(GTK_BOX(GTK_DIALOG(pw)->vbox),
						   label, FALSE, FALSE, 5);
				
				pbar = gtk_progress_bar_new();
				gtk_box_pack_start(GTK_BOX(GTK_DIALOG(pw)->action_area),
						   pbar, FALSE, FALSE, 5);
                                gtk_widget_show(pbar);
                                
                                gtk_window_set_title(GTK_WINDOW(pw), "Getting Data");
                                
                                gtk_widget_realize(pw);
                                aol_icon(pw->window);

                                gtk_widget_show(pw);
                        } else
				datalen = 0;
#else
			datalen = 0;
#endif
			g_free(webdata);
			webdata = NULL;
			len = 0;
			startsaving = 1;
		}

		len++;
		webdata = g_realloc(webdata, len);
		webdata[len - 1] = data;

		if (pbar)
			gtk_progress_bar_update(GTK_PROGRESS_BAR(pbar),
                                                ((100 * len) / datalen) / 100.0);
		
		while (gtk_events_pending())
			gtk_main_iteration();
	}

        webdata = g_realloc(webdata, len+1);
        webdata[len] = 0;


        g_snprintf(debug_buff, sizeof(debug_buff), "Receieved: '%s'\n", webdata);
        debug_print(debug_buff);

        if (pw)
                gtk_widget_destroy(pw);

	close(sock);
	return webdata;
}

char *fix_url(gchar *buf)
{
	char *new,*tmp;
	int size;

	size=8;
	size+=strlen(quad_addr);
	tmp=strchr(strchr(buf,':')+1,':');
	size+=strlen(tmp);
	new=g_malloc(size);
	strcpy(new,"http://");
	strcat(new,quad_addr);
	strcat(new,tmp);
	return(new);
}