urlapi: leaner with fewer allocs

Slightly faster with more robust code. Uses fewer and smaller mallocs. - remove two fields from the URL handle struct - reduce copies and allocs - use dynbuf buffers more instead of custom malloc + copies - uses dynbuf to build the host name in reduces serial alloc+free within the same function. - move dedotdotify into urlapi.c and make it static, not strdup the input and optimize it by checking for . and / before using strncmp - remove a few strlen() calls - add Curl_dyn_setlen() that can "trim" an existing dynbuf Closes #9408
2025-09-17 01:22:41 +03:00 · 2022-09-01 10:16:24 +02:00 · 2022-09-01 10:16:24 +02:00 · f703cf971c
commit f703cf971c
parent 2ae81e680b
11 changed files with 565 additions and 596 deletions
--- a/docs/DYNBUF.md
+++ b/docs/DYNBUF.md
@ -76,7 +76,8 @@ CURLcode Curl_dyn_tail(struct dynbuf *s, size_t length);

 Keep `length` bytes of the buffer tail (the last `length` bytes of the
 buffer). The rest of the buffer is dropped. The specified `length` must not be
-larger than the buffer length.
+larger than the buffer length. To instead keep the leading part, see
+`Curl_dyn_setlen()`.

 ## ptr

@ -106,3 +107,13 @@ size_t Curl_dyn_len(const struct dynbuf *s);

 Returns the length of the buffer in bytes. Does not include the terminating
 zero byte.
+
+## setlen
+
+```c
+CURLcode Curl_dyn_setlen(struct dynbuf *s, size_t len);
+```
+
+Sets the new shorter length of the buffer in number of bytes. Keeps the
+leftmost set number of bytes, discards the rest. To instead keep the tail part
+of the buffer, see `Curl_dyn_tail()`.
--- a/lib/Makefile.inc
+++ b/lib/Makefile.inc
@ -128,7 +128,6 @@ LIB_CFILES =         \
  curl_threads.c     \
  dict.c             \
  doh.c              \
-  dotdot.c           \
  dynbuf.c           \
  easy.c             \
  easygetopt.c       \
@ -262,7 +261,6 @@ LIB_HFILES =         \
  curlx.h            \
  dict.h             \
  doh.h              \
-  dotdot.h           \
  dynbuf.h           \
  easy_lock.h        \
  easyif.h           \
--- a/lib/dotdot.c
+++ b/lib/dotdot.c
@ -1,184 +0,0 @@
-/***************************************************************************
- *                                  _   _ ____  _
- *  Project                     ___| | | |  _ \| |
- *                             / __| | | | |_) | |
- *                            | (__| |_| |  _ <| |___
- *                             \___|\___/|_| \_\_____|
- *
- * Copyright (C) 1998 - 2022, Daniel Stenberg, <daniel@haxx.se>, et al.
- *
- * This software is licensed as described in the file COPYING, which
- * you should have received as part of this distribution. The terms
- * are also available at https://curl.se/docs/copyright.html.
- *
- * You may opt to use, copy, modify, merge, publish, distribute and/or sell
- * copies of the Software, and permit persons to whom the Software is
- * furnished to do so, under the terms of the COPYING file.
- *
- * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
- * KIND, either express or implied.
- *
- * SPDX-License-Identifier: curl
- *
- ***************************************************************************/
-
-#include "curl_setup.h"
-
-#include <curl/curl.h>
-
-#include "dotdot.h"
-#include "curl_memory.h"
-
-/* The last #include file should be: */
-#include "memdebug.h"
-
-/*
- * "Remove Dot Segments"
- * https://datatracker.ietf.org/doc/html/rfc3986#section-5.2.4
- */
-
-/*
- * Curl_dedotdotify()
- * @unittest: 1395
- *
- * This function gets a null-terminated path with dot and dotdot sequences
- * passed in and strips them off according to the rules in RFC 3986 section
- * 5.2.4.
- *
- * The function handles a query part ('?' + stuff) appended but it expects
- * that fragments ('#' + stuff) have already been cut off.
- *
- * RETURNS
- *
- * an allocated dedotdotified output string
- */
-char *Curl_dedotdotify(const char *input)
-{
-  size_t inlen = strlen(input);
-  char *clone;
-  size_t clen = inlen; /* the length of the cloned input */
-  char *out = malloc(inlen + 1);
-  char *outptr;
-  char *orgclone;
-  char *queryp;
-  if(!out)
-    return NULL; /* out of memory */
-
-  *out = 0; /* null-terminates, for inputs like "./" */
-
-  /* get a cloned copy of the input */
-  clone = strdup(input);
-  if(!clone) {
-    free(out);
-    return NULL;
-  }
-  orgclone = clone;
-  outptr = out;
-
-  if(!*clone) {
-    /* zero length string, return that */
-    free(out);
-    return clone;
-  }
-
-  /*
-   * To handle query-parts properly, we must find it and remove it during the
-   * dotdot-operation and then append it again at the end to the output
-   * string.
-   */
-  queryp = strchr(clone, '?');
-  if(queryp)
-    *queryp = 0;
-
-  do {
-
-    /*  A.  If the input buffer begins with a prefix of "../" or "./", then
-        remove that prefix from the input buffer; otherwise, */
-
-    if(!strncmp("./", clone, 2)) {
-      clone += 2;
-      clen -= 2;
-    }
-    else if(!strncmp("../", clone, 3)) {
-      clone += 3;
-      clen -= 3;
-    }
-
-    /*  B.  if the input buffer begins with a prefix of "/./" or "/.", where
-        "."  is a complete path segment, then replace that prefix with "/" in
-        the input buffer; otherwise, */
-    else if(!strncmp("/./", clone, 3)) {
-      clone += 2;
-      clen -= 2;
-    }
-    else if(!strcmp("/.", clone)) {
-      clone[1]='/';
-      clone++;
-      clen -= 1;
-    }
-
-    /*  C.  if the input buffer begins with a prefix of "/../" or "/..", where
-        ".." is a complete path segment, then replace that prefix with "/" in
-        the input buffer and remove the last segment and its preceding "/" (if
-        any) from the output buffer; otherwise, */
-
-    else if(!strncmp("/../", clone, 4)) {
-      clone += 3;
-      clen -= 3;
-      /* remove the last segment from the output buffer */
-      while(outptr > out) {
-        outptr--;
-        if(*outptr == '/')
-          break;
-      }
-      *outptr = 0; /* null-terminate where it stops */
-    }
-    else if(!strcmp("/..", clone)) {
-      clone[2]='/';
-      clone += 2;
-      clen -= 2;
-      /* remove the last segment from the output buffer */
-      while(outptr > out) {
-        outptr--;
-        if(*outptr == '/')
-          break;
-      }
-      *outptr = 0; /* null-terminate where it stops */
-    }
-
-    /*  D.  if the input buffer consists only of "." or "..", then remove
-        that from the input buffer; otherwise, */
-
-    else if(!strcmp(".", clone) || !strcmp("..", clone)) {
-      *clone = 0;
-      *out = 0;
-    }
-
-    else {
-      /*  E.  move the first path segment in the input buffer to the end of
-          the output buffer, including the initial "/" character (if any) and
-          any subsequent characters up to, but not including, the next "/"
-          character or the end of the input buffer. */
-
-      do {
-        *outptr++ = *clone++;
-        clen--;
-      } while(*clone && (*clone != '/'));
-      *outptr = 0;
-    }
-
-  } while(*clone);
-
-  if(queryp) {
-    size_t qlen;
-    /* There was a query part, append that to the output. The 'clone' string
-       may now have been altered so we copy from the original input string
-       from the correct index. */
-    size_t oindex = queryp - orgclone;
-    qlen = strlen(&input[oindex]);
-    memcpy(outptr, &input[oindex], qlen + 1); /* include the end zero byte */
-  }
-
-  free(orgclone);
-  return out;
-}
--- a/lib/dotdot.h
+++ b/lib/dotdot.h
@ -1,27 +0,0 @@
-#ifndef HEADER_CURL_DOTDOT_H
-#define HEADER_CURL_DOTDOT_H
-/***************************************************************************
- *                                  _   _ ____  _
- *  Project                     ___| | | |  _ \| |
- *                             / __| | | | |_) | |
- *                            | (__| |_| |  _ <| |___
- *                             \___|\___/|_| \_\_____|
- *
- * Copyright (C) 1998 - 2022, Daniel Stenberg, <daniel@haxx.se>, et al.
- *
- * This software is licensed as described in the file COPYING, which
- * you should have received as part of this distribution. The terms
- * are also available at https://curl.se/docs/copyright.html.
- *
- * You may opt to use, copy, modify, merge, publish, distribute and/or sell
- * copies of the Software, and permit persons to whom the Software is
- * furnished to do so, under the terms of the COPYING file.
- *
- * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
- * KIND, either express or implied.
- *
- * SPDX-License-Identifier: curl
- *
- ***************************************************************************/
-char *Curl_dedotdotify(const char *input);
-#endif /* HEADER_CURL_DOTDOT_H */
--- a/lib/dynbuf.c
+++ b/lib/dynbuf.c
@ -128,7 +128,6 @@ void Curl_dyn_reset(struct dynbuf *s)
  s->leng = 0;
 }

-#ifdef USE_NGTCP2
 /*
 * Specify the size of the tail to keep (number of bytes from the end of the
 * buffer). The rest will be dropped.
@ -153,7 +152,6 @@ CURLcode Curl_dyn_tail(struct dynbuf *s, size_t trail)
  return CURLE_OK;

 }
-#endif

 /*
 * Appends a buffer with length.
@ -255,3 +253,18 @@ size_t Curl_dyn_len(const struct dynbuf *s)
  DEBUGASSERT(!s->leng || s->bufr);
  return s->leng;
 }
+
+/*
+ * Set a new (smaller) length.
+ */
+CURLcode Curl_dyn_setlen(struct dynbuf *s, size_t set)
+{
+  DEBUGASSERT(s);
+  DEBUGASSERT(s->init == DYNINIT);
+  DEBUGASSERT(!s->leng || s->bufr);
+  if(set > s->leng)
+    return CURLE_BAD_FUNCTION_ARGUMENT;
+  s->leng = set;
+  s->bufr[s->leng] = 0;
+  return CURLE_OK;
+}
--- a/lib/dynbuf.h
+++ b/lib/dynbuf.h
@ -38,6 +38,7 @@
 #define Curl_dyn_len(a) curlx_dyn_len(a)
 #define Curl_dyn_reset(a) curlx_dyn_reset(a)
 #define Curl_dyn_tail(a,b) curlx_dyn_tail(a,b)
+#define Curl_dyn_setlen(a,b) curlx_dyn_setlen(a,b)
 #define curlx_dynbuf dynbuf /* for the struct name */
 #endif

@ -63,6 +64,7 @@ CURLcode Curl_dyn_vaddf(struct dynbuf *s, const char *fmt, va_list ap)
  WARN_UNUSED_RESULT;
 void Curl_dyn_reset(struct dynbuf *s);
 CURLcode Curl_dyn_tail(struct dynbuf *s, size_t trail);
+CURLcode Curl_dyn_setlen(struct dynbuf *s, size_t set);
 char *Curl_dyn_ptr(const struct dynbuf *s);
 unsigned char *Curl_dyn_uptr(const struct dynbuf *s);
 size_t Curl_dyn_len(const struct dynbuf *s);
--- a/lib/url.c
+++ b/lib/url.c
@ -128,7 +128,6 @@ bool curl_win32_idn_to_ascii(const char *in, char **out);
 #include "http_proxy.h"
 #include "conncache.h"
 #include "multihandle.h"
-#include "dotdot.h"
 #include "strdup.h"
 #include "setopt.h"
 #include "altsvc.h"
--- a/lib/urlapi-int.h
+++ b/lib/urlapi-int.h
@ -25,10 +25,11 @@
 ***************************************************************************/
 #include "curl_setup.h"

-bool Curl_is_absolute_url(const char *url, char *scheme, size_t buflen);
+size_t Curl_is_absolute_url(const char *url, char *buf, size_t buflen);

 #ifdef DEBUGBUILD
-CURLUcode Curl_parse_port(struct Curl_URL *u, char *hostname, bool);
+CURLUcode Curl_parse_port(struct Curl_URL *u, struct dynbuf *host,
+                          bool has_scheme);
 #endif

 #endif /* HEADER_CURL_URLAPI_INT_H */
--- a/lib/urlapi.c
+++ b/lib/urlapi.c
--- a/tests/unit/unit1395.c
+++ b/tests/unit/unit1395.c
@ -23,7 +23,8 @@
 ***************************************************************************/
 #include "curlcheck.h"

-#include "dotdot.h"
+/* copied from urlapi.c */
+extern char *dedotdotify(const char *input, size_t clen);

 #include "memdebug.h"

@ -77,7 +78,7 @@ UNITTEST_START
  };

  for(i = 0; i < sizeof(pairs)/sizeof(pairs[0]); i++) {
-    char *out = Curl_dedotdotify(pairs[i].input);
+    char *out = dedotdotify(pairs[i].input, strlen(pairs[i].input));
    abort_unless(out != NULL, "returned NULL!");

    if(strcmp(out, pairs[i].output)) {
--- a/tests/unit/unit1653.c
+++ b/tests/unit/unit1653.c
@ -44,6 +44,19 @@ unit_stop(void)

 #define free_and_clear(x) free(x); x = NULL

+static CURLUcode parse_port(CURLU *url,
+                           char *h, bool has_scheme)
+{
+  struct dynbuf host;
+  CURLUcode ret;
+  Curl_dyn_init(&host, 10000);
+  if(Curl_dyn_add(&host, h))
+    return CURLUE_OUT_OF_MEMORY;
+  ret = Curl_parse_port(url, &host, has_scheme);
+  Curl_dyn_free(&host);
+  return ret;
+}
+
 UNITTEST_START
 {
  CURLUcode ret;
@ -57,8 +70,8 @@ UNITTEST_START
  ipv6port = strdup("[fe80::250:56ff:fea7:da15]");
  if(!ipv6port)
    goto fail;
-  ret = Curl_parse_port(u, ipv6port, FALSE);
-  fail_unless(ret == CURLUE_OK, "Curl_parse_port returned error");
+  ret = parse_port(u, ipv6port, FALSE);
+  fail_unless(ret == CURLUE_OK, "parse_port returned error");
  ret = curl_url_get(u, CURLUPART_PORT, &portnum, CURLU_NO_DEFAULT_PORT);
  fail_unless(ret != CURLUE_OK, "curl_url_get portnum returned something");
  free_and_clear(ipv6port);
@ -71,8 +84,8 @@ UNITTEST_START
  ipv6port = strdup("[fe80::250:56ff:fea7:da15|");
  if(!ipv6port)
    goto fail;
-  ret = Curl_parse_port(u, ipv6port, FALSE);
-  fail_unless(ret != CURLUE_OK, "Curl_parse_port true on error");
+  ret = parse_port(u, ipv6port, FALSE);
+  fail_unless(ret != CURLUE_OK, "parse_port true on error");
  free_and_clear(ipv6port);
  curl_url_cleanup(u);

@ -82,8 +95,8 @@ UNITTEST_START
  ipv6port = strdup("[fe80::250:56ff;fea7:da15]:80");
  if(!ipv6port)
    goto fail;
-  ret = Curl_parse_port(u, ipv6port, FALSE);
-  fail_unless(ret != CURLUE_OK, "Curl_parse_port true on error");
+  ret = parse_port(u, ipv6port, FALSE);
+  fail_unless(ret != CURLUE_OK, "parse_port true on error");
  free_and_clear(ipv6port);
  curl_url_cleanup(u);

@ -94,8 +107,8 @@ UNITTEST_START
  ipv6port = strdup("[fe80::250:56ff:fea7:da15%25eth3]:80");
  if(!ipv6port)
    goto fail;
-  ret = Curl_parse_port(u, ipv6port, FALSE);
-  fail_unless(ret == CURLUE_OK, "Curl_parse_port returned error");
+  ret = parse_port(u, ipv6port, FALSE);
+  fail_unless(ret == CURLUE_OK, "parse_port returned error");
  ret = curl_url_get(u, CURLUPART_PORT, &portnum, 0);
  fail_unless(ret == CURLUE_OK, "curl_url_get portnum returned error");
  fail_unless(portnum && !strcmp(portnum, "80"), "Check portnumber");
@ -110,8 +123,8 @@ UNITTEST_START
  ipv6port = strdup("[fe80::250:56ff:fea7:da15%25eth3]");
  if(!ipv6port)
    goto fail;
-  ret = Curl_parse_port(u, ipv6port, FALSE);
-  fail_unless(ret == CURLUE_OK, "Curl_parse_port returned error");
+  ret = parse_port(u, ipv6port, FALSE);
+  fail_unless(ret == CURLUE_OK, "parse_port returned error");
  free_and_clear(ipv6port);
  curl_url_cleanup(u);

@ -122,8 +135,8 @@ UNITTEST_START
  ipv6port = strdup("[fe80::250:56ff:fea7:da15]:81");
  if(!ipv6port)
    goto fail;
-  ret = Curl_parse_port(u, ipv6port, FALSE);
-  fail_unless(ret == CURLUE_OK, "Curl_parse_port returned error");
+  ret = parse_port(u, ipv6port, FALSE);
+  fail_unless(ret == CURLUE_OK, "parse_port returned error");
  ret = curl_url_get(u, CURLUPART_PORT, &portnum, 0);
  fail_unless(ret == CURLUE_OK, "curl_url_get portnum returned error");
  fail_unless(portnum && !strcmp(portnum, "81"), "Check portnumber");
@ -138,8 +151,8 @@ UNITTEST_START
  ipv6port = strdup("[fe80::250:56ff:fea7:da15];81");
  if(!ipv6port)
    goto fail;
-  ret = Curl_parse_port(u, ipv6port, FALSE);
-  fail_unless(ret != CURLUE_OK, "Curl_parse_port true on error");
+  ret = parse_port(u, ipv6port, FALSE);
+  fail_unless(ret != CURLUE_OK, "parse_port true on error");
  free_and_clear(ipv6port);
  curl_url_cleanup(u);

@ -149,8 +162,8 @@ UNITTEST_START
  ipv6port = strdup("[fe80::250:56ff:fea7:da15]80");
  if(!ipv6port)
    goto fail;
-  ret = Curl_parse_port(u, ipv6port, FALSE);
-  fail_unless(ret != CURLUE_OK, "Curl_parse_port true on error");
+  ret = parse_port(u, ipv6port, FALSE);
+  fail_unless(ret != CURLUE_OK, "parse_port true on error");
  free_and_clear(ipv6port);
  curl_url_cleanup(u);

@ -162,8 +175,8 @@ UNITTEST_START
  ipv6port = strdup("[fe80::250:56ff:fea7:da15]:");
  if(!ipv6port)
    goto fail;
-  ret = Curl_parse_port(u, ipv6port, TRUE);
-  fail_unless(ret == CURLUE_OK, "Curl_parse_port returned error");
+  ret = parse_port(u, ipv6port, TRUE);
+  fail_unless(ret == CURLUE_OK, "parse_port returned error");
  free_and_clear(ipv6port);
  curl_url_cleanup(u);

@ -174,8 +187,8 @@ UNITTEST_START
  ipv6port = strdup("[fe80::250:56ff:fea7:da15!25eth3]:80");
  if(!ipv6port)
    goto fail;
-  ret = Curl_parse_port(u, ipv6port, FALSE);
-  fail_unless(ret != CURLUE_OK, "Curl_parse_port returned non-error");
+  ret = parse_port(u, ipv6port, FALSE);
+  fail_unless(ret != CURLUE_OK, "parse_port returned non-error");
  free_and_clear(ipv6port);
  curl_url_cleanup(u);

@ -186,8 +199,8 @@ UNITTEST_START
  ipv6port = strdup("[fe80::250:56ff:fea7:da15%eth3]:80");
  if(!ipv6port)
    goto fail;
-  ret = Curl_parse_port(u, ipv6port, FALSE);
-  fail_unless(ret == CURLUE_OK, "Curl_parse_port returned error");
+  ret = parse_port(u, ipv6port, FALSE);
+  fail_unless(ret == CURLUE_OK, "parse_port returned error");
  free_and_clear(ipv6port);
  curl_url_cleanup(u);

@ -200,8 +213,8 @@ UNITTEST_START
                    "aaaaaaaaaaaaaaaaaaaaaa:");
  if(!ipv6port)
    goto fail;
-  ret = Curl_parse_port(u, ipv6port, FALSE);
-  fail_unless(ret == CURLUE_BAD_PORT_NUMBER, "Curl_parse_port did wrong");
+  ret = parse_port(u, ipv6port, FALSE);
+  fail_unless(ret == CURLUE_BAD_PORT_NUMBER, "parse_port did wrong");
  fail:
  free(ipv6port);
  curl_url_cleanup(u);