comparison src/html.c @ 7094:2343c3aa1dec

[gaim-migrate @ 7659] grab_url() and parse_url() are gone, replaced with gaim_url_fetch() and gaim_url_parse(). They were also moved to util.[ch]. committer: Tailor Script <tailor@pidgin.im>
author Christian Hammond <chipx86@chipx86.com>
date Wed, 01 Oct 2003 03:01:25 +0000
parents 67c4e9d39242
children
comparison
equal deleted inserted replaced
7093:3650612c7daa 7094:2343c3aa1dec
67 text2[j++] = text2[i]; 67 text2[j++] = text2[i];
68 } 68 }
69 } 69 }
70 text2[j] = '\0'; 70 text2[j] = '\0';
71 return text2; 71 return text2;
72 }
73
74 struct g_url *parse_url(char *url)
75 {
76 struct g_url *test = g_new0(struct g_url, 1);
77 char scan_info[255];
78 char port[5];
79 int f;
80 char* turl;
81 /* hyphen at end includes it in control set */
82 char addr_ctrl[] = "A-Za-z0-9.-";
83 char port_ctrl[] = "0-9";
84 char page_ctrl[] = "A-Za-z0-9.~_/:*!@&%%?=+^-";
85
86 if((turl=strstr(url, "http://")) || (turl=strstr(url, "HTTP://")))
87 url=turl+=7;
88
89 snprintf(scan_info, sizeof(scan_info),
90 "%%[%s]:%%[%s]/%%[%s]",
91 addr_ctrl, port_ctrl, page_ctrl);
92
93 f = sscanf(url, scan_info, test->address, port, test->page);
94 if (f == 1) {
95 snprintf(scan_info, sizeof(scan_info),
96 "%%[%s]/%%[%s]",
97 addr_ctrl, page_ctrl);
98 f = sscanf(url, scan_info, test->address, test->page);
99 snprintf(port, sizeof(port), "80");
100 }
101 if (f == 1)
102 test->page[0] = '\0';
103
104 sscanf(port, "%d", &test->port);
105 return test;
106 }
107
108 struct grab_url_data {
109 void (* callback)(gpointer, char *, unsigned long);
110 gpointer data;
111 struct g_url *website;
112 char *url;
113 gboolean full;
114 char *user_agent;
115 int http11;
116
117 int inpa;
118
119 gboolean sentreq;
120 gboolean newline;
121 gboolean startsaving;
122 char *webdata;
123 unsigned long len;
124 unsigned long data_len;
125 };
126
127 static gboolean
128 parse_redirect(const char *data, size_t data_len, gint sock,
129 struct grab_url_data *gunk)
130 {
131 gchar *s;
132
133 if ((s = g_strstr_len(data, data_len, "Location: ")) != NULL) {
134 gchar *new_url, *temp_url, *end;
135 gboolean full;
136 int len;
137
138 s += strlen("Location: ");
139 end = strchr(s, '\r');
140
141 /* Just in case :) */
142 if (end == NULL)
143 end = strchr(s, '\n');
144
145 len = end - s;
146
147 new_url = g_malloc(len + 1);
148 strncpy(new_url, s, len);
149 new_url[len] = '\0';
150
151 full = gunk->full;
152
153 if (*new_url == '/' || g_strstr_len(new_url, len, "://") == NULL) {
154 temp_url = new_url;
155
156 new_url = g_strdup_printf("%s:%d%s", gunk->website->address,
157 gunk->website->port, temp_url);
158
159 g_free(temp_url);
160
161 full = FALSE;
162 }
163
164 /* Close the existing stuff. */
165 gaim_input_remove(gunk->inpa);
166 close(sock);
167
168 gaim_debug(GAIM_DEBUG_INFO, "grab_url",
169 "Redirecting to %s\n", new_url);
170
171 /* Try again, with this new location. */
172 grab_url(new_url, full, gunk->callback, gunk->data, gunk->user_agent, gunk->http11);
173
174 /* Free up. */
175 g_free(new_url);
176 g_free(gunk->webdata);
177 g_free(gunk->website);
178 g_free(gunk->url);
179 g_free(gunk->user_agent);
180 g_free(gunk);
181
182 return TRUE;
183 }
184
185 return FALSE;
186 }
187
188 static size_t
189 parse_content_len(const char *data, size_t data_len)
190 {
191 size_t content_len = 0;
192
193 sscanf(data, "Content-Length: %d", &content_len);
194
195 return content_len;
196 }
197
198 static void grab_url_callback(gpointer dat, gint sock, GaimInputCondition cond)
199 {
200 struct grab_url_data *gunk = dat;
201 char data;
202
203 if (sock == -1) {
204 gunk->callback(gunk->data, NULL, 0);
205 g_free(gunk->website);
206 g_free(gunk->url);
207 g_free(gunk->user_agent);
208 g_free(gunk);
209 return;
210 }
211
212 if (!gunk->sentreq) {
213 char buf[1024];
214
215 if(gunk->user_agent) {
216 if(gunk->http11)
217 g_snprintf(buf, sizeof(buf), "GET %s%s HTTP/1.1\r\nUser-Agent: \"%s\"\r\nHost: %s\r\n\r\n", gunk->full ? "" : "/",
218 gunk->full ? gunk->url : gunk->website->page, gunk->user_agent, gunk->website->address);
219 else
220 g_snprintf(buf, sizeof(buf), "GET %s%s HTTP/1.0\r\nUser-Agent: \"%s\"\r\n\r\n", gunk->full ? "" : "/",
221 gunk->full ? gunk->url : gunk->website->page, gunk->user_agent);
222 }
223 else {
224 if(gunk->http11)
225 g_snprintf(buf, sizeof(buf), "GET %s%s HTTP/1.1\r\nHost: %s\r\n\r\n", gunk->full ? "" : "/",
226 gunk->full ? gunk->url : gunk->website->page, gunk->website->address);
227 else
228 g_snprintf(buf, sizeof(buf), "GET %s%s HTTP/1.0\r\n\r\n", gunk->full ? "" : "/",
229 gunk->full ? gunk->url : gunk->website->page);
230
231 }
232 gaim_debug(GAIM_DEBUG_MISC, "grab_url_callback",
233 "Request: %s\n", buf);
234
235 write(sock, buf, strlen(buf));
236 fcntl(sock, F_SETFL, O_NONBLOCK);
237 gunk->sentreq = TRUE;
238 gunk->inpa = gaim_input_add(sock, GAIM_INPUT_READ, grab_url_callback, dat);
239 gunk->data_len = 4096;
240 gunk->webdata = g_malloc(gunk->data_len);
241 return;
242 }
243
244 if (read(sock, &data, 1) > 0 || errno == EWOULDBLOCK) {
245 if (errno == EWOULDBLOCK) {
246 errno = 0;
247 return;
248 }
249
250 gunk->len++;
251
252 if (gunk->len == gunk->data_len + 1) {
253 gunk->data_len += (gunk->data_len) / 2;
254
255 gunk->webdata = g_realloc(gunk->webdata, gunk->data_len);
256 }
257
258 gunk->webdata[gunk->len - 1] = data;
259
260 if (!gunk->startsaving) {
261 if (data == '\r')
262 return;
263 if (data == '\n') {
264 if (gunk->newline) {
265 size_t content_len;
266 gunk->startsaving = TRUE;
267
268 /* See if we can find a redirect. */
269 if (parse_redirect(gunk->webdata, gunk->len, sock, gunk))
270 return;
271
272 /* No redirect. See if we can find a content length. */
273 content_len = parse_content_len(gunk->webdata, gunk->len);
274
275 if (content_len == 0) {
276 /* We'll stick with an initial 8192 */
277 content_len = 8192;
278 }
279
280 /* Out with the old... */
281 gunk->len = 0;
282 g_free(gunk->webdata);
283 gunk->webdata = NULL;
284
285 /* In with the new. */
286 gunk->data_len = content_len;
287 gunk->webdata = g_malloc(gunk->data_len);
288 }
289 else
290 gunk->newline = TRUE;
291 return;
292 }
293 gunk->newline = FALSE;
294 }
295 } else if (errno != ETIMEDOUT) {
296 gunk->webdata = g_realloc(gunk->webdata, gunk->len + 1);
297 gunk->webdata[gunk->len] = 0;
298
299 gaim_debug(GAIM_DEBUG_MISC, "grab_url_callback",
300 "Received: '%s'\n", gunk->webdata);
301
302 gaim_input_remove(gunk->inpa);
303 close(sock);
304 gunk->callback(gunk->data, gunk->webdata, gunk->len);
305 if (gunk->webdata)
306 g_free(gunk->webdata);
307 g_free(gunk->website);
308 g_free(gunk->url);
309 g_free(gunk->user_agent);
310 g_free(gunk);
311 } else {
312 gaim_input_remove(gunk->inpa);
313 close(sock);
314 gunk->callback(gunk->data, NULL, 0);
315 if (gunk->webdata)
316 g_free(gunk->webdata);
317 g_free(gunk->website);
318 g_free(gunk->url);
319 g_free(gunk->user_agent);
320 g_free(gunk);
321 }
322 }
323
324 void grab_url(char *url, gboolean full, void callback(gpointer, char *, unsigned long),
325 gpointer data, char *user_agent, int http11)
326 {
327 int sock;
328 struct grab_url_data *gunk = g_new0(struct grab_url_data, 1);
329
330 gunk->callback = callback;
331 gunk->data = data;
332 gunk->url = g_strdup(url);
333 gunk->user_agent = (user_agent) ? g_strdup(user_agent) : NULL;
334 gunk->http11 = http11;
335 gunk->website = parse_url(url);
336 gunk->full = full;
337
338 if ((sock = gaim_proxy_connect(NULL, gunk->website->address,
339 gunk->website->port, grab_url_callback,
340 gunk)) < 0) {
341 g_free(gunk->website);
342 g_free(gunk->url);
343 g_free(gunk->user_agent);
344 g_free(gunk);
345 callback(data, g_strdup(_("g003: Error opening connection.\n")), 0);
346 }
347 } 72 }
348 73
349 struct gaim_parse_tag { 74 struct gaim_parse_tag {
350 char *src_tag; 75 char *src_tag;
351 char *dest_tag; 76 char *dest_tag;
653 *plain_out = g_strdup(plain->str); 378 *plain_out = g_strdup(plain->str);
654 g_string_free(xhtml, TRUE); 379 g_string_free(xhtml, TRUE);
655 g_string_free(plain, TRUE); 380 g_string_free(plain, TRUE);
656 } 381 }
657 382
658 int info_extract_field(char *original, char *add_to, char *start_tok, 383 int info_extract_field(const char *original, char *add_to, char *start_tok,
659 int skip, char *end_tok, char check_value, char *no_value_tok, 384 int skip, char *end_tok, char check_value, char *no_value_tok,
660 char *display_name, int islink, char *link_prefix) 385 char *display_name, int islink, char *link_prefix)
661 { 386 {
662 char *p, *q; 387 char *p, *q;
663 char buf[1024]; 388 char buf[1024];