urlapi: fix redirect from file:// with query, and simplify

- fix redirect from file:// URL with query part
- find_host_sep() simplify
- urlencode_str() simplify
- redirect_url() simplify
- made more const char *
- add more redirect URL test cases to test 1560

Closes #16498
This commit is contained in:
Daniel Stenberg 2025-02-26 11:13:36 +01:00
parent c028a243f2
commit bc24c60512
No known key found for this signature in database
GPG Key ID: 5CC908FDB71E12C2
3 changed files with 97 additions and 106 deletions

View File

@ -34,15 +34,9 @@
#endif #endif
#else /* HAVE_MEMRCHR */ #else /* HAVE_MEMRCHR */
#if (!defined(CURL_DISABLE_HTTP) && !defined(CURL_DISABLE_COOKIES)) || \
defined(USE_OPENSSL) || \
defined(USE_SCHANNEL)
void *Curl_memrchr(const void *s, int c, size_t n); void *Curl_memrchr(const void *s, int c, size_t n);
#define memrchr(x,y,z) Curl_memrchr((x),(y),(z)) #define memrchr(x,y,z) Curl_memrchr((x),(y),(z))
#endif
#endif /* HAVE_MEMRCHR */ #endif /* HAVE_MEMRCHR */
#endif /* HEADER_CURL_MEMRCHR_H */ #endif /* HEADER_CURL_MEMRCHR_H */

View File

@ -35,6 +35,7 @@
#include "strdup.h" #include "strdup.h"
#include "idn.h" #include "idn.h"
#include "strparse.h" #include "strparse.h"
#include "curl_memrchr.h"
/* The last 3 #include files should be in this order */ /* The last 3 #include files should be in this order */
#include "curl_printf.h" #include "curl_printf.h"
@ -110,26 +111,18 @@ static void free_urlhandle(struct Curl_URL *u)
*/ */
static const char *find_host_sep(const char *url) static const char *find_host_sep(const char *url)
{ {
const char *sep;
const char *query;
/* Find the start of the hostname */ /* Find the start of the hostname */
sep = strstr(url, "//"); const char *sep = strstr(url, "//");
if(!sep) if(!sep)
sep = url; sep = url;
else else
sep += 2; sep += 2;
query = strchr(sep, '?'); /* Find first / or ? */
sep = strchr(sep, '/'); while(*sep && *sep != '/' && *sep != '?')
sep++;
if(!sep) return sep;
sep = url + strlen(url);
if(!query)
query = url + strlen(url);
return sep < query ? sep : query;
} }
/* convert CURLcode to CURLUcode */ /* convert CURLcode to CURLUcode */
@ -155,46 +148,40 @@ static CURLUcode urlencode_str(struct dynbuf *o, const char *url,
bool left = !query; bool left = !query;
const unsigned char *iptr; const unsigned char *iptr;
const unsigned char *host_sep = (const unsigned char *) url; const unsigned char *host_sep = (const unsigned char *) url;
CURLcode result; CURLcode result = CURLE_OK;
if(!relative) if(!relative) {
size_t n;
host_sep = (const unsigned char *) find_host_sep(url); host_sep = (const unsigned char *) find_host_sep(url);
for(iptr = (unsigned char *)url; /* read from here */ /* output the first piece as-is */
len; iptr++, len--) { n = (const char *)host_sep - url;
result = Curl_dyn_addn(o, url, n);
if(iptr < host_sep) { len -= n;
result = Curl_dyn_addn(o, iptr, 1);
if(result)
return cc2cu(result);
continue;
} }
for(iptr = host_sep; len && !result; iptr++, len--) {
if(*iptr == ' ') { if(*iptr == ' ') {
if(left) if(left)
result = Curl_dyn_addn(o, "%20", 3); result = Curl_dyn_addn(o, "%20", 3);
else else
result = Curl_dyn_addn(o, "+", 1); result = Curl_dyn_addn(o, "+", 1);
if(result)
return cc2cu(result);
continue;
} }
else if(urlchar_needs_escaping(*iptr)) {
if(*iptr == '?')
left = FALSE;
if(urlchar_needs_escaping(*iptr)) {
char out[3]={'%'}; char out[3]={'%'};
out[1] = hexdigits[*iptr >> 4]; out[1] = hexdigits[*iptr >> 4];
out[2] = hexdigits[*iptr & 0xf]; out[2] = hexdigits[*iptr & 0xf];
result = Curl_dyn_addn(o, out, 3); result = Curl_dyn_addn(o, out, 3);
} }
else else {
result = Curl_dyn_addn(o, iptr, 1); result = Curl_dyn_addn(o, iptr, 1);
if(result) if(*iptr == '?')
return cc2cu(result); left = FALSE;
}
} }
if(result)
return cc2cu(result);
return CURLUE_OK; return CURLUE_OK;
} }
@ -247,87 +234,76 @@ size_t Curl_is_absolute_url(const char *url, char *buf, size_t buflen,
} }
/* /*
* Concatenate a relative URL to a base URL making it absolute. * Concatenate a relative URL onto a base URL making it absolute.
*
* Note that this function destroys the 'base' string.
*/ */
static CURLUcode redirect_url(char *base, const char *relurl, static CURLUcode redirect_url(const char *base, const char *relurl,
CURLU *u, unsigned int flags) CURLU *u, unsigned int flags)
{ {
struct dynbuf urlbuf; struct dynbuf urlbuf;
bool host_changed = FALSE; bool host_changed = FALSE;
const char *useurl = relurl; const char *useurl = relurl;
CURLcode result = CURLE_OK; const char *cutoff = NULL;
size_t prelen;
CURLUcode uc; CURLUcode uc;
/* protsep points to the start of the hostname */
char *protsep = strstr(base, "//");
DEBUGASSERT(protsep);
if(!protsep)
protsep = base;
else
protsep += 2; /* pass the slashes */
if(('/' != relurl[0]) && ('#' != relurl[0])) { /* protsep points to the start of the hostname, after [scheme]:// */
/* First we need to find out if there is a ?-letter in the original URL, const char *protsep = base + strlen(u->scheme) + 3;
and cut it and the right-side of that off */ DEBUGASSERT(base && relurl && u); /* all set here */
char *pathsep = strchr(protsep, '?'); if(!base)
if(pathsep) return CURLUE_MALFORMED_INPUT; /* should never happen */
*pathsep = 0;
else {
/* if not, cut off the potential fragment */
pathsep = strchr(protsep, '#');
if(pathsep)
*pathsep = 0;
}
/* if the redirect-to piece is not just a query, cut the path after the
last slash */
if(useurl[0] != '?') {
pathsep = strrchr(protsep, '/');
if(pathsep)
pathsep[1] = 0; /* leave the slash */
}
}
else if('/' == relurl[0]) {
/* We got a new absolute path for this server */
/* handle different relative URL types */
switch(relurl[0]) {
case '/':
if(relurl[1] == '/') { if(relurl[1] == '/') {
/* the new URL starts with //, just keep the protocol part from the /* protocol-relative URL: //example.com/path */
original one */ cutoff = protsep;
*protsep = 0; useurl = &relurl[2];
useurl = &relurl[2]; /* we keep the slashes from the original, so we
skip the new ones */
host_changed = TRUE; host_changed = TRUE;
} }
else { else
/* cut the original URL at first slash */ /* absolute /path */
char *pathsep = strchr(protsep, '/'); cutoff = strchr(protsep, '/');
if(pathsep) break;
*pathsep = 0;
}
}
else {
/* the relative piece starts with '#' */
/* If there is a fragment in the original URL, cut it off */ case '#':
char *pathsep = strchr(protsep, '#'); /* fragment-only change */
if(pathsep) if(u->fragment)
*pathsep = 0; cutoff = strchr(protsep, '#');
break;
default:
/* path or query-only change */
if(u->query && u->query[0])
/* remove existing query */
cutoff = strchr(protsep, '?');
else if(u->fragment && u->fragment[0])
/* Remove existing fragment */
cutoff = strchr(protsep, '#');
if(relurl[0] != '?') {
/* append a relative path after the last slash */
cutoff = memrchr(protsep, '/',
cutoff ? (size_t)(cutoff - protsep) : strlen(protsep));
if(cutoff)
cutoff++; /* truncate after last slash */
}
break;
} }
prelen = cutoff ? (size_t)(cutoff - base) : strlen(base);
/* build new URL */
Curl_dyn_init(&urlbuf, CURL_MAX_INPUT_LENGTH); Curl_dyn_init(&urlbuf, CURL_MAX_INPUT_LENGTH);
/* copy over the root URL part */ if(!Curl_dyn_addn(&urlbuf, base, prelen) &&
result = Curl_dyn_add(&urlbuf, base); !urlencode_str(&urlbuf, useurl, strlen(useurl), !host_changed, FALSE)) {
if(result)
return cc2cu(result);
/* then append the new piece on the right side */
uc = urlencode_str(&urlbuf, useurl, strlen(useurl), !host_changed,
FALSE);
if(!uc)
uc = parseurl_and_replace(Curl_dyn_ptr(&urlbuf), u, uc = parseurl_and_replace(Curl_dyn_ptr(&urlbuf), u,
flags & ~CURLU_PATH_AS_IS); flags & ~CURLU_PATH_AS_IS);
}
else
uc = CURLUE_OUT_OF_MEMORY;
Curl_dyn_free(&urlbuf); Curl_dyn_free(&urlbuf);
return uc; return uc;
} }
@ -1440,8 +1416,10 @@ CURLUcode curl_url_get(const CURLU *u, CURLUPart what,
punycode = (flags & CURLU_PUNYCODE) ? 1 : 0; punycode = (flags & CURLU_PUNYCODE) ? 1 : 0;
depunyfy = (flags & CURLU_PUNY2IDN) ? 1 : 0; depunyfy = (flags & CURLU_PUNY2IDN) ? 1 : 0;
if(u->scheme && strcasecompare("file", u->scheme)) { if(u->scheme && strcasecompare("file", u->scheme)) {
url = aprintf("file://%s%s%s", url = aprintf("file://%s%s%s%s%s",
u->path, u->path,
show_query ? "?": "",
u->query ? u->query : "",
show_fragment ? "#": "", show_fragment ? "#": "",
u->fragment ? u->fragment : ""); u->fragment ? u->fragment : "");
} }
@ -1795,7 +1773,7 @@ CURLUcode curl_url_set(CURLU *u, CURLUPart what,
|| curl_url_get(u, CURLUPART_URL, &oldurl, flags)) { || curl_url_get(u, CURLUPART_URL, &oldurl, flags)) {
return parseurl_and_replace(part, u, flags); return parseurl_and_replace(part, u, flags);
} }
DEBUGASSERT(oldurl); /* it is set here */
/* apply the relative part to create a new URL */ /* apply the relative part to create a new URL */
uc = redirect_url(oldurl, part, u, flags); uc = redirect_url(oldurl, part, u, flags);
free(oldurl); free(oldurl);

View File

@ -1143,6 +1143,25 @@ static CURLUcode updateurl(CURLU *u, const char *cmd, unsigned int setflags)
} }
static const struct redircase set_url_list[] = { static const struct redircase set_url_list[] = {
{"http://firstplace.example.com/want/1314",
"//somewhere.example.com/reply/1314",
"http://somewhere.example.com/reply/1314",
0, 0, CURLUE_OK },
{"http://127.0.0.1:46383/want?uri=http://anything/276?secondq/276",
"data/2760002.txt?coolsite=http://anotherurl/?a_second/2760002",
"http://127.0.0.1:46383/"
"data/2760002.txt?coolsite=http://anotherurl/?a_second/2760002",
0, 0, CURLUE_OK },
{"file:///basic#", "#yay",
"file:///basic#yay", 0, 0, CURLUE_OK},
{"file:///basic", "?yay",
"file:///basic?yay", 0, 0, CURLUE_OK},
{"file:///basic?", "?yay",
"file:///basic?yay", 0, 0, CURLUE_OK},
{"file:///basic?hello", "#frag",
"file:///basic?hello#frag", 0, 0, CURLUE_OK},
{"file:///basic?hello", "?q",
"file:///basic?q", 0, 0, CURLUE_OK},
{"http://example.org#withs/ash", "/moo#frag", {"http://example.org#withs/ash", "/moo#frag",
"http://example.org/moo#frag", "http://example.org/moo#frag",
0, 0, CURLUE_OK}, 0, 0, CURLUE_OK},