Mercurial > pidgin
comparison src/html.c @ 7094:2343c3aa1dec
[gaim-migrate @ 7659]
grab_url() and parse_url() are gone, replaced with gaim_url_fetch() and
gaim_url_parse(). They were also moved to util.[ch].
committer: Tailor Script <tailor@pidgin.im>
| author | Christian Hammond <chipx86@chipx86.com> |
|---|---|
| date | Wed, 01 Oct 2003 03:01:25 +0000 |
| parents | 67c4e9d39242 |
| children |
comparison
equal
deleted
inserted
replaced
| 7093:3650612c7daa | 7094:2343c3aa1dec |
|---|---|
| 67 text2[j++] = text2[i]; | 67 text2[j++] = text2[i]; |
| 68 } | 68 } |
| 69 } | 69 } |
| 70 text2[j] = '\0'; | 70 text2[j] = '\0'; |
| 71 return text2; | 71 return text2; |
| 72 } | |
| 73 | |
| 74 struct g_url *parse_url(char *url) | |
| 75 { | |
| 76 struct g_url *test = g_new0(struct g_url, 1); | |
| 77 char scan_info[255]; | |
| 78 char port[5]; | |
| 79 int f; | |
| 80 char* turl; | |
| 81 /* hyphen at end includes it in control set */ | |
| 82 char addr_ctrl[] = "A-Za-z0-9.-"; | |
| 83 char port_ctrl[] = "0-9"; | |
| 84 char page_ctrl[] = "A-Za-z0-9.~_/:*!@&%%?=+^-"; | |
| 85 | |
| 86 if((turl=strstr(url, "http://")) || (turl=strstr(url, "HTTP://"))) | |
| 87 url=turl+=7; | |
| 88 | |
| 89 snprintf(scan_info, sizeof(scan_info), | |
| 90 "%%[%s]:%%[%s]/%%[%s]", | |
| 91 addr_ctrl, port_ctrl, page_ctrl); | |
| 92 | |
| 93 f = sscanf(url, scan_info, test->address, port, test->page); | |
| 94 if (f == 1) { | |
| 95 snprintf(scan_info, sizeof(scan_info), | |
| 96 "%%[%s]/%%[%s]", | |
| 97 addr_ctrl, page_ctrl); | |
| 98 f = sscanf(url, scan_info, test->address, test->page); | |
| 99 snprintf(port, sizeof(port), "80"); | |
| 100 } | |
| 101 if (f == 1) | |
| 102 test->page[0] = '\0'; | |
| 103 | |
| 104 sscanf(port, "%d", &test->port); | |
| 105 return test; | |
| 106 } | |
| 107 | |
| 108 struct grab_url_data { | |
| 109 void (* callback)(gpointer, char *, unsigned long); | |
| 110 gpointer data; | |
| 111 struct g_url *website; | |
| 112 char *url; | |
| 113 gboolean full; | |
| 114 char *user_agent; | |
| 115 int http11; | |
| 116 | |
| 117 int inpa; | |
| 118 | |
| 119 gboolean sentreq; | |
| 120 gboolean newline; | |
| 121 gboolean startsaving; | |
| 122 char *webdata; | |
| 123 unsigned long len; | |
| 124 unsigned long data_len; | |
| 125 }; | |
| 126 | |
| 127 static gboolean | |
| 128 parse_redirect(const char *data, size_t data_len, gint sock, | |
| 129 struct grab_url_data *gunk) | |
| 130 { | |
| 131 gchar *s; | |
| 132 | |
| 133 if ((s = g_strstr_len(data, data_len, "Location: ")) != NULL) { | |
| 134 gchar *new_url, *temp_url, *end; | |
| 135 gboolean full; | |
| 136 int len; | |
| 137 | |
| 138 s += strlen("Location: "); | |
| 139 end = strchr(s, '\r'); | |
| 140 | |
| 141 /* Just in case :) */ | |
| 142 if (end == NULL) | |
| 143 end = strchr(s, '\n'); | |
| 144 | |
| 145 len = end - s; | |
| 146 | |
| 147 new_url = g_malloc(len + 1); | |
| 148 strncpy(new_url, s, len); | |
| 149 new_url[len] = '\0'; | |
| 150 | |
| 151 full = gunk->full; | |
| 152 | |
| 153 if (*new_url == '/' || g_strstr_len(new_url, len, "://") == NULL) { | |
| 154 temp_url = new_url; | |
| 155 | |
| 156 new_url = g_strdup_printf("%s:%d%s", gunk->website->address, | |
| 157 gunk->website->port, temp_url); | |
| 158 | |
| 159 g_free(temp_url); | |
| 160 | |
| 161 full = FALSE; | |
| 162 } | |
| 163 | |
| 164 /* Close the existing stuff. */ | |
| 165 gaim_input_remove(gunk->inpa); | |
| 166 close(sock); | |
| 167 | |
| 168 gaim_debug(GAIM_DEBUG_INFO, "grab_url", | |
| 169 "Redirecting to %s\n", new_url); | |
| 170 | |
| 171 /* Try again, with this new location. */ | |
| 172 grab_url(new_url, full, gunk->callback, gunk->data, gunk->user_agent, gunk->http11); | |
| 173 | |
| 174 /* Free up. */ | |
| 175 g_free(new_url); | |
| 176 g_free(gunk->webdata); | |
| 177 g_free(gunk->website); | |
| 178 g_free(gunk->url); | |
| 179 g_free(gunk->user_agent); | |
| 180 g_free(gunk); | |
| 181 | |
| 182 return TRUE; | |
| 183 } | |
| 184 | |
| 185 return FALSE; | |
| 186 } | |
| 187 | |
| 188 static size_t | |
| 189 parse_content_len(const char *data, size_t data_len) | |
| 190 { | |
| 191 size_t content_len = 0; | |
| 192 | |
| 193 sscanf(data, "Content-Length: %d", &content_len); | |
| 194 | |
| 195 return content_len; | |
| 196 } | |
| 197 | |
| 198 static void grab_url_callback(gpointer dat, gint sock, GaimInputCondition cond) | |
| 199 { | |
| 200 struct grab_url_data *gunk = dat; | |
| 201 char data; | |
| 202 | |
| 203 if (sock == -1) { | |
| 204 gunk->callback(gunk->data, NULL, 0); | |
| 205 g_free(gunk->website); | |
| 206 g_free(gunk->url); | |
| 207 g_free(gunk->user_agent); | |
| 208 g_free(gunk); | |
| 209 return; | |
| 210 } | |
| 211 | |
| 212 if (!gunk->sentreq) { | |
| 213 char buf[1024]; | |
| 214 | |
| 215 if(gunk->user_agent) { | |
| 216 if(gunk->http11) | |
| 217 g_snprintf(buf, sizeof(buf), "GET %s%s HTTP/1.1\r\nUser-Agent: \"%s\"\r\nHost: %s\r\n\r\n", gunk->full ? "" : "/", | |
| 218 gunk->full ? gunk->url : gunk->website->page, gunk->user_agent, gunk->website->address); | |
| 219 else | |
| 220 g_snprintf(buf, sizeof(buf), "GET %s%s HTTP/1.0\r\nUser-Agent: \"%s\"\r\n\r\n", gunk->full ? "" : "/", | |
| 221 gunk->full ? gunk->url : gunk->website->page, gunk->user_agent); | |
| 222 } | |
| 223 else { | |
| 224 if(gunk->http11) | |
| 225 g_snprintf(buf, sizeof(buf), "GET %s%s HTTP/1.1\r\nHost: %s\r\n\r\n", gunk->full ? "" : "/", | |
| 226 gunk->full ? gunk->url : gunk->website->page, gunk->website->address); | |
| 227 else | |
| 228 g_snprintf(buf, sizeof(buf), "GET %s%s HTTP/1.0\r\n\r\n", gunk->full ? "" : "/", | |
| 229 gunk->full ? gunk->url : gunk->website->page); | |
| 230 | |
| 231 } | |
| 232 gaim_debug(GAIM_DEBUG_MISC, "grab_url_callback", | |
| 233 "Request: %s\n", buf); | |
| 234 | |
| 235 write(sock, buf, strlen(buf)); | |
| 236 fcntl(sock, F_SETFL, O_NONBLOCK); | |
| 237 gunk->sentreq = TRUE; | |
| 238 gunk->inpa = gaim_input_add(sock, GAIM_INPUT_READ, grab_url_callback, dat); | |
| 239 gunk->data_len = 4096; | |
| 240 gunk->webdata = g_malloc(gunk->data_len); | |
| 241 return; | |
| 242 } | |
| 243 | |
| 244 if (read(sock, &data, 1) > 0 || errno == EWOULDBLOCK) { | |
| 245 if (errno == EWOULDBLOCK) { | |
| 246 errno = 0; | |
| 247 return; | |
| 248 } | |
| 249 | |
| 250 gunk->len++; | |
| 251 | |
| 252 if (gunk->len == gunk->data_len + 1) { | |
| 253 gunk->data_len += (gunk->data_len) / 2; | |
| 254 | |
| 255 gunk->webdata = g_realloc(gunk->webdata, gunk->data_len); | |
| 256 } | |
| 257 | |
| 258 gunk->webdata[gunk->len - 1] = data; | |
| 259 | |
| 260 if (!gunk->startsaving) { | |
| 261 if (data == '\r') | |
| 262 return; | |
| 263 if (data == '\n') { | |
| 264 if (gunk->newline) { | |
| 265 size_t content_len; | |
| 266 gunk->startsaving = TRUE; | |
| 267 | |
| 268 /* See if we can find a redirect. */ | |
| 269 if (parse_redirect(gunk->webdata, gunk->len, sock, gunk)) | |
| 270 return; | |
| 271 | |
| 272 /* No redirect. See if we can find a content length. */ | |
| 273 content_len = parse_content_len(gunk->webdata, gunk->len); | |
| 274 | |
| 275 if (content_len == 0) { | |
| 276 /* We'll stick with an initial 8192 */ | |
| 277 content_len = 8192; | |
| 278 } | |
| 279 | |
| 280 /* Out with the old... */ | |
| 281 gunk->len = 0; | |
| 282 g_free(gunk->webdata); | |
| 283 gunk->webdata = NULL; | |
| 284 | |
| 285 /* In with the new. */ | |
| 286 gunk->data_len = content_len; | |
| 287 gunk->webdata = g_malloc(gunk->data_len); | |
| 288 } | |
| 289 else | |
| 290 gunk->newline = TRUE; | |
| 291 return; | |
| 292 } | |
| 293 gunk->newline = FALSE; | |
| 294 } | |
| 295 } else if (errno != ETIMEDOUT) { | |
| 296 gunk->webdata = g_realloc(gunk->webdata, gunk->len + 1); | |
| 297 gunk->webdata[gunk->len] = 0; | |
| 298 | |
| 299 gaim_debug(GAIM_DEBUG_MISC, "grab_url_callback", | |
| 300 "Received: '%s'\n", gunk->webdata); | |
| 301 | |
| 302 gaim_input_remove(gunk->inpa); | |
| 303 close(sock); | |
| 304 gunk->callback(gunk->data, gunk->webdata, gunk->len); | |
| 305 if (gunk->webdata) | |
| 306 g_free(gunk->webdata); | |
| 307 g_free(gunk->website); | |
| 308 g_free(gunk->url); | |
| 309 g_free(gunk->user_agent); | |
| 310 g_free(gunk); | |
| 311 } else { | |
| 312 gaim_input_remove(gunk->inpa); | |
| 313 close(sock); | |
| 314 gunk->callback(gunk->data, NULL, 0); | |
| 315 if (gunk->webdata) | |
| 316 g_free(gunk->webdata); | |
| 317 g_free(gunk->website); | |
| 318 g_free(gunk->url); | |
| 319 g_free(gunk->user_agent); | |
| 320 g_free(gunk); | |
| 321 } | |
| 322 } | |
| 323 | |
| 324 void grab_url(char *url, gboolean full, void callback(gpointer, char *, unsigned long), | |
| 325 gpointer data, char *user_agent, int http11) | |
| 326 { | |
| 327 int sock; | |
| 328 struct grab_url_data *gunk = g_new0(struct grab_url_data, 1); | |
| 329 | |
| 330 gunk->callback = callback; | |
| 331 gunk->data = data; | |
| 332 gunk->url = g_strdup(url); | |
| 333 gunk->user_agent = (user_agent) ? g_strdup(user_agent) : NULL; | |
| 334 gunk->http11 = http11; | |
| 335 gunk->website = parse_url(url); | |
| 336 gunk->full = full; | |
| 337 | |
| 338 if ((sock = gaim_proxy_connect(NULL, gunk->website->address, | |
| 339 gunk->website->port, grab_url_callback, | |
| 340 gunk)) < 0) { | |
| 341 g_free(gunk->website); | |
| 342 g_free(gunk->url); | |
| 343 g_free(gunk->user_agent); | |
| 344 g_free(gunk); | |
| 345 callback(data, g_strdup(_("g003: Error opening connection.\n")), 0); | |
| 346 } | |
| 347 } | 72 } |
| 348 | 73 |
| 349 struct gaim_parse_tag { | 74 struct gaim_parse_tag { |
| 350 char *src_tag; | 75 char *src_tag; |
| 351 char *dest_tag; | 76 char *dest_tag; |
| 653 *plain_out = g_strdup(plain->str); | 378 *plain_out = g_strdup(plain->str); |
| 654 g_string_free(xhtml, TRUE); | 379 g_string_free(xhtml, TRUE); |
| 655 g_string_free(plain, TRUE); | 380 g_string_free(plain, TRUE); |
| 656 } | 381 } |
| 657 | 382 |
| 658 int info_extract_field(char *original, char *add_to, char *start_tok, | 383 int info_extract_field(const char *original, char *add_to, char *start_tok, |
| 659 int skip, char *end_tok, char check_value, char *no_value_tok, | 384 int skip, char *end_tok, char check_value, char *no_value_tok, |
| 660 char *display_name, int islink, char *link_prefix) | 385 char *display_name, int islink, char *link_prefix) |
| 661 { | 386 { |
| 662 char *p, *q; | 387 char *p, *q; |
| 663 char buf[1024]; | 388 char buf[1024]; |
