urlapi: leaner with fewer allocs

Slightly faster with more robust code. Uses fewer and smaller mallocs.

- remove two fields from the URL handle struct
- reduce copies and allocs
- use dynbuf buffers more instead of custom malloc + copies
- uses dynbuf to build the host name in reduces serial alloc+free within
  the same function.
- move dedotdotify into urlapi.c and make it static, not strdup the input
  and optimize it by checking for . and / before using strncmp
- remove a few strlen() calls
- add Curl_dyn_setlen() that can "trim" an existing dynbuf

Closes #9408
This commit is contained in:
Daniel Stenberg 2022-09-01 10:16:24 +02:00
parent 2ae81e680b
commit f703cf971c
No known key found for this signature in database
GPG Key ID: 5CC908FDB71E12C2
11 changed files with 565 additions and 596 deletions

View File

@ -76,7 +76,8 @@ CURLcode Curl_dyn_tail(struct dynbuf *s, size_t length);
Keep `length` bytes of the buffer tail (the last `length` bytes of the
buffer). The rest of the buffer is dropped. The specified `length` must not be
larger than the buffer length.
larger than the buffer length. To instead keep the leading part, see
`Curl_dyn_setlen()`.
## ptr
@ -106,3 +107,13 @@ size_t Curl_dyn_len(const struct dynbuf *s);
Returns the length of the buffer in bytes. Does not include the terminating
zero byte.
## setlen
```c
CURLcode Curl_dyn_setlen(struct dynbuf *s, size_t len);
```
Sets the new shorter length of the buffer in number of bytes. Keeps the
leftmost set number of bytes, discards the rest. To instead keep the tail part
of the buffer, see `Curl_dyn_tail()`.

View File

@ -128,7 +128,6 @@ LIB_CFILES = \
curl_threads.c \
dict.c \
doh.c \
dotdot.c \
dynbuf.c \
easy.c \
easygetopt.c \
@ -262,7 +261,6 @@ LIB_HFILES = \
curlx.h \
dict.h \
doh.h \
dotdot.h \
dynbuf.h \
easy_lock.h \
easyif.h \

View File

@ -1,184 +0,0 @@
/***************************************************************************
* _ _ ____ _
* Project ___| | | | _ \| |
* / __| | | | |_) | |
* | (__| |_| | _ <| |___
* \___|\___/|_| \_\_____|
*
* Copyright (C) 1998 - 2022, Daniel Stenberg, <daniel@haxx.se>, et al.
*
* This software is licensed as described in the file COPYING, which
* you should have received as part of this distribution. The terms
* are also available at https://curl.se/docs/copyright.html.
*
* You may opt to use, copy, modify, merge, publish, distribute and/or sell
* copies of the Software, and permit persons to whom the Software is
* furnished to do so, under the terms of the COPYING file.
*
* This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
* KIND, either express or implied.
*
* SPDX-License-Identifier: curl
*
***************************************************************************/
#include "curl_setup.h"
#include <curl/curl.h>
#include "dotdot.h"
#include "curl_memory.h"
/* The last #include file should be: */
#include "memdebug.h"
/*
* "Remove Dot Segments"
* https://datatracker.ietf.org/doc/html/rfc3986#section-5.2.4
*/
/*
* Curl_dedotdotify()
* @unittest: 1395
*
* This function gets a null-terminated path with dot and dotdot sequences
* passed in and strips them off according to the rules in RFC 3986 section
* 5.2.4.
*
* The function handles a query part ('?' + stuff) appended but it expects
* that fragments ('#' + stuff) have already been cut off.
*
* RETURNS
*
* an allocated dedotdotified output string
*/
char *Curl_dedotdotify(const char *input)
{
size_t inlen = strlen(input);
char *clone;
size_t clen = inlen; /* the length of the cloned input */
char *out = malloc(inlen + 1);
char *outptr;
char *orgclone;
char *queryp;
if(!out)
return NULL; /* out of memory */
*out = 0; /* null-terminates, for inputs like "./" */
/* get a cloned copy of the input */
clone = strdup(input);
if(!clone) {
free(out);
return NULL;
}
orgclone = clone;
outptr = out;
if(!*clone) {
/* zero length string, return that */
free(out);
return clone;
}
/*
* To handle query-parts properly, we must find it and remove it during the
* dotdot-operation and then append it again at the end to the output
* string.
*/
queryp = strchr(clone, '?');
if(queryp)
*queryp = 0;
do {
/* A. If the input buffer begins with a prefix of "../" or "./", then
remove that prefix from the input buffer; otherwise, */
if(!strncmp("./", clone, 2)) {
clone += 2;
clen -= 2;
}
else if(!strncmp("../", clone, 3)) {
clone += 3;
clen -= 3;
}
/* B. if the input buffer begins with a prefix of "/./" or "/.", where
"." is a complete path segment, then replace that prefix with "/" in
the input buffer; otherwise, */
else if(!strncmp("/./", clone, 3)) {
clone += 2;
clen -= 2;
}
else if(!strcmp("/.", clone)) {
clone[1]='/';
clone++;
clen -= 1;
}
/* C. if the input buffer begins with a prefix of "/../" or "/..", where
".." is a complete path segment, then replace that prefix with "/" in
the input buffer and remove the last segment and its preceding "/" (if
any) from the output buffer; otherwise, */
else if(!strncmp("/../", clone, 4)) {
clone += 3;
clen -= 3;
/* remove the last segment from the output buffer */
while(outptr > out) {
outptr--;
if(*outptr == '/')
break;
}
*outptr = 0; /* null-terminate where it stops */
}
else if(!strcmp("/..", clone)) {
clone[2]='/';
clone += 2;
clen -= 2;
/* remove the last segment from the output buffer */
while(outptr > out) {
outptr--;
if(*outptr == '/')
break;
}
*outptr = 0; /* null-terminate where it stops */
}
/* D. if the input buffer consists only of "." or "..", then remove
that from the input buffer; otherwise, */
else if(!strcmp(".", clone) || !strcmp("..", clone)) {
*clone = 0;
*out = 0;
}
else {
/* E. move the first path segment in the input buffer to the end of
the output buffer, including the initial "/" character (if any) and
any subsequent characters up to, but not including, the next "/"
character or the end of the input buffer. */
do {
*outptr++ = *clone++;
clen--;
} while(*clone && (*clone != '/'));
*outptr = 0;
}
} while(*clone);
if(queryp) {
size_t qlen;
/* There was a query part, append that to the output. The 'clone' string
may now have been altered so we copy from the original input string
from the correct index. */
size_t oindex = queryp - orgclone;
qlen = strlen(&input[oindex]);
memcpy(outptr, &input[oindex], qlen + 1); /* include the end zero byte */
}
free(orgclone);
return out;
}

View File

@ -1,27 +0,0 @@
#ifndef HEADER_CURL_DOTDOT_H
#define HEADER_CURL_DOTDOT_H
/***************************************************************************
* _ _ ____ _
* Project ___| | | | _ \| |
* / __| | | | |_) | |
* | (__| |_| | _ <| |___
* \___|\___/|_| \_\_____|
*
* Copyright (C) 1998 - 2022, Daniel Stenberg, <daniel@haxx.se>, et al.
*
* This software is licensed as described in the file COPYING, which
* you should have received as part of this distribution. The terms
* are also available at https://curl.se/docs/copyright.html.
*
* You may opt to use, copy, modify, merge, publish, distribute and/or sell
* copies of the Software, and permit persons to whom the Software is
* furnished to do so, under the terms of the COPYING file.
*
* This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
* KIND, either express or implied.
*
* SPDX-License-Identifier: curl
*
***************************************************************************/
char *Curl_dedotdotify(const char *input);
#endif /* HEADER_CURL_DOTDOT_H */

View File

@ -128,7 +128,6 @@ void Curl_dyn_reset(struct dynbuf *s)
s->leng = 0;
}
#ifdef USE_NGTCP2
/*
* Specify the size of the tail to keep (number of bytes from the end of the
* buffer). The rest will be dropped.
@ -153,7 +152,6 @@ CURLcode Curl_dyn_tail(struct dynbuf *s, size_t trail)
return CURLE_OK;
}
#endif
/*
* Appends a buffer with length.
@ -255,3 +253,18 @@ size_t Curl_dyn_len(const struct dynbuf *s)
DEBUGASSERT(!s->leng || s->bufr);
return s->leng;
}
/*
* Set a new (smaller) length.
*/
CURLcode Curl_dyn_setlen(struct dynbuf *s, size_t set)
{
DEBUGASSERT(s);
DEBUGASSERT(s->init == DYNINIT);
DEBUGASSERT(!s->leng || s->bufr);
if(set > s->leng)
return CURLE_BAD_FUNCTION_ARGUMENT;
s->leng = set;
s->bufr[s->leng] = 0;
return CURLE_OK;
}

View File

@ -38,6 +38,7 @@
#define Curl_dyn_len(a) curlx_dyn_len(a)
#define Curl_dyn_reset(a) curlx_dyn_reset(a)
#define Curl_dyn_tail(a,b) curlx_dyn_tail(a,b)
#define Curl_dyn_setlen(a,b) curlx_dyn_setlen(a,b)
#define curlx_dynbuf dynbuf /* for the struct name */
#endif
@ -63,6 +64,7 @@ CURLcode Curl_dyn_vaddf(struct dynbuf *s, const char *fmt, va_list ap)
WARN_UNUSED_RESULT;
void Curl_dyn_reset(struct dynbuf *s);
CURLcode Curl_dyn_tail(struct dynbuf *s, size_t trail);
CURLcode Curl_dyn_setlen(struct dynbuf *s, size_t set);
char *Curl_dyn_ptr(const struct dynbuf *s);
unsigned char *Curl_dyn_uptr(const struct dynbuf *s);
size_t Curl_dyn_len(const struct dynbuf *s);

View File

@ -128,7 +128,6 @@ bool curl_win32_idn_to_ascii(const char *in, char **out);
#include "http_proxy.h"
#include "conncache.h"
#include "multihandle.h"
#include "dotdot.h"
#include "strdup.h"
#include "setopt.h"
#include "altsvc.h"

View File

@ -25,10 +25,11 @@
***************************************************************************/
#include "curl_setup.h"
bool Curl_is_absolute_url(const char *url, char *scheme, size_t buflen);
size_t Curl_is_absolute_url(const char *url, char *buf, size_t buflen);
#ifdef DEBUGBUILD
CURLUcode Curl_parse_port(struct Curl_URL *u, char *hostname, bool);
CURLUcode Curl_parse_port(struct Curl_URL *u, struct dynbuf *host,
bool has_scheme);
#endif
#endif /* HEADER_CURL_URLAPI_INT_H */

File diff suppressed because it is too large Load Diff

View File

@ -23,7 +23,8 @@
***************************************************************************/
#include "curlcheck.h"
#include "dotdot.h"
/* copied from urlapi.c */
extern char *dedotdotify(const char *input, size_t clen);
#include "memdebug.h"
@ -77,7 +78,7 @@ UNITTEST_START
};
for(i = 0; i < sizeof(pairs)/sizeof(pairs[0]); i++) {
char *out = Curl_dedotdotify(pairs[i].input);
char *out = dedotdotify(pairs[i].input, strlen(pairs[i].input));
abort_unless(out != NULL, "returned NULL!");
if(strcmp(out, pairs[i].output)) {

View File

@ -44,6 +44,19 @@ unit_stop(void)
#define free_and_clear(x) free(x); x = NULL
static CURLUcode parse_port(CURLU *url,
char *h, bool has_scheme)
{
struct dynbuf host;
CURLUcode ret;
Curl_dyn_init(&host, 10000);
if(Curl_dyn_add(&host, h))
return CURLUE_OUT_OF_MEMORY;
ret = Curl_parse_port(url, &host, has_scheme);
Curl_dyn_free(&host);
return ret;
}
UNITTEST_START
{
CURLUcode ret;
@ -57,8 +70,8 @@ UNITTEST_START
ipv6port = strdup("[fe80::250:56ff:fea7:da15]");
if(!ipv6port)
goto fail;
ret = Curl_parse_port(u, ipv6port, FALSE);
fail_unless(ret == CURLUE_OK, "Curl_parse_port returned error");
ret = parse_port(u, ipv6port, FALSE);
fail_unless(ret == CURLUE_OK, "parse_port returned error");
ret = curl_url_get(u, CURLUPART_PORT, &portnum, CURLU_NO_DEFAULT_PORT);
fail_unless(ret != CURLUE_OK, "curl_url_get portnum returned something");
free_and_clear(ipv6port);
@ -71,8 +84,8 @@ UNITTEST_START
ipv6port = strdup("[fe80::250:56ff:fea7:da15|");
if(!ipv6port)
goto fail;
ret = Curl_parse_port(u, ipv6port, FALSE);
fail_unless(ret != CURLUE_OK, "Curl_parse_port true on error");
ret = parse_port(u, ipv6port, FALSE);
fail_unless(ret != CURLUE_OK, "parse_port true on error");
free_and_clear(ipv6port);
curl_url_cleanup(u);
@ -82,8 +95,8 @@ UNITTEST_START
ipv6port = strdup("[fe80::250:56ff;fea7:da15]:80");
if(!ipv6port)
goto fail;
ret = Curl_parse_port(u, ipv6port, FALSE);
fail_unless(ret != CURLUE_OK, "Curl_parse_port true on error");
ret = parse_port(u, ipv6port, FALSE);
fail_unless(ret != CURLUE_OK, "parse_port true on error");
free_and_clear(ipv6port);
curl_url_cleanup(u);
@ -94,8 +107,8 @@ UNITTEST_START
ipv6port = strdup("[fe80::250:56ff:fea7:da15%25eth3]:80");
if(!ipv6port)
goto fail;
ret = Curl_parse_port(u, ipv6port, FALSE);
fail_unless(ret == CURLUE_OK, "Curl_parse_port returned error");
ret = parse_port(u, ipv6port, FALSE);
fail_unless(ret == CURLUE_OK, "parse_port returned error");
ret = curl_url_get(u, CURLUPART_PORT, &portnum, 0);
fail_unless(ret == CURLUE_OK, "curl_url_get portnum returned error");
fail_unless(portnum && !strcmp(portnum, "80"), "Check portnumber");
@ -110,8 +123,8 @@ UNITTEST_START
ipv6port = strdup("[fe80::250:56ff:fea7:da15%25eth3]");
if(!ipv6port)
goto fail;
ret = Curl_parse_port(u, ipv6port, FALSE);
fail_unless(ret == CURLUE_OK, "Curl_parse_port returned error");
ret = parse_port(u, ipv6port, FALSE);
fail_unless(ret == CURLUE_OK, "parse_port returned error");
free_and_clear(ipv6port);
curl_url_cleanup(u);
@ -122,8 +135,8 @@ UNITTEST_START
ipv6port = strdup("[fe80::250:56ff:fea7:da15]:81");
if(!ipv6port)
goto fail;
ret = Curl_parse_port(u, ipv6port, FALSE);
fail_unless(ret == CURLUE_OK, "Curl_parse_port returned error");
ret = parse_port(u, ipv6port, FALSE);
fail_unless(ret == CURLUE_OK, "parse_port returned error");
ret = curl_url_get(u, CURLUPART_PORT, &portnum, 0);
fail_unless(ret == CURLUE_OK, "curl_url_get portnum returned error");
fail_unless(portnum && !strcmp(portnum, "81"), "Check portnumber");
@ -138,8 +151,8 @@ UNITTEST_START
ipv6port = strdup("[fe80::250:56ff:fea7:da15];81");
if(!ipv6port)
goto fail;
ret = Curl_parse_port(u, ipv6port, FALSE);
fail_unless(ret != CURLUE_OK, "Curl_parse_port true on error");
ret = parse_port(u, ipv6port, FALSE);
fail_unless(ret != CURLUE_OK, "parse_port true on error");
free_and_clear(ipv6port);
curl_url_cleanup(u);
@ -149,8 +162,8 @@ UNITTEST_START
ipv6port = strdup("[fe80::250:56ff:fea7:da15]80");
if(!ipv6port)
goto fail;
ret = Curl_parse_port(u, ipv6port, FALSE);
fail_unless(ret != CURLUE_OK, "Curl_parse_port true on error");
ret = parse_port(u, ipv6port, FALSE);
fail_unless(ret != CURLUE_OK, "parse_port true on error");
free_and_clear(ipv6port);
curl_url_cleanup(u);
@ -162,8 +175,8 @@ UNITTEST_START
ipv6port = strdup("[fe80::250:56ff:fea7:da15]:");
if(!ipv6port)
goto fail;
ret = Curl_parse_port(u, ipv6port, TRUE);
fail_unless(ret == CURLUE_OK, "Curl_parse_port returned error");
ret = parse_port(u, ipv6port, TRUE);
fail_unless(ret == CURLUE_OK, "parse_port returned error");
free_and_clear(ipv6port);
curl_url_cleanup(u);
@ -174,8 +187,8 @@ UNITTEST_START
ipv6port = strdup("[fe80::250:56ff:fea7:da15!25eth3]:80");
if(!ipv6port)
goto fail;
ret = Curl_parse_port(u, ipv6port, FALSE);
fail_unless(ret != CURLUE_OK, "Curl_parse_port returned non-error");
ret = parse_port(u, ipv6port, FALSE);
fail_unless(ret != CURLUE_OK, "parse_port returned non-error");
free_and_clear(ipv6port);
curl_url_cleanup(u);
@ -186,8 +199,8 @@ UNITTEST_START
ipv6port = strdup("[fe80::250:56ff:fea7:da15%eth3]:80");
if(!ipv6port)
goto fail;
ret = Curl_parse_port(u, ipv6port, FALSE);
fail_unless(ret == CURLUE_OK, "Curl_parse_port returned error");
ret = parse_port(u, ipv6port, FALSE);
fail_unless(ret == CURLUE_OK, "parse_port returned error");
free_and_clear(ipv6port);
curl_url_cleanup(u);
@ -200,8 +213,8 @@ UNITTEST_START
"aaaaaaaaaaaaaaaaaaaaaa:");
if(!ipv6port)
goto fail;
ret = Curl_parse_port(u, ipv6port, FALSE);
fail_unless(ret == CURLUE_BAD_PORT_NUMBER, "Curl_parse_port did wrong");
ret = parse_port(u, ipv6port, FALSE);
fail_unless(ret == CURLUE_BAD_PORT_NUMBER, "parse_port did wrong");
fail:
free(ipv6port);
curl_url_cleanup(u);