[hackers] [quark] Ignore queries and fragments in URIs || Laslo Hunhold

From: <git_AT_suckless.org>
Date: Sat, 30 Jan 2021 13:11:17 +0100 (CET)

commit 319ba7083fdde836d6614c6b8b228bf3a9849e95
Author: Laslo Hunhold <dev_AT_frign.de>
AuthorDate: Sat Jan 30 12:53:00 2021 +0100
Commit: Laslo Hunhold <dev_AT_frign.de>
CommitDate: Sat Jan 30 13:10:32 2021 +0100

    Ignore queries and fragments in URIs
    
    Previously, a request for "/index.html" would yield a 200, while a
    request for "/index.html?foo=bar" would yield a 404, as quark would
    look for the file "index.html?foo=bar" in the serve directory.
    
    To accomodate this behaviour, it's no longer sufficient to just compare
    realuri and req->uri. Instead, we set a "dirty" flag every time we
    change the URI in such a way that it requires a redirect.
    
    According to RFC 3986 section 3, queries and fragments are there
    to (further) "identify a resource within the scope of the URI's scheme
    and naming authority (if any)". However, it's perfectly legitimate to
    just ignore this further specification when the URI itself is already
    pointing at a unique resource (i.e. "/index.html").
    
    This behaviour is consistent with dynamic web applications which usually
    ignore parameters they don't care about. Quark is too much Zen to care
    about any parameters. This has the added bonus that you can now clone
    repositories (read-only) via the "dumb" HTTP git-protocol, so
    
            git clone https://example.org/git/project.git
    
    is now possible (provided you run update-server-info during the
    post-update-hook). This wouldn't work previously because git, when
    asked to clone via HTTP, would first probe the server with a request for
    
            project.git/info/refs?service=git-upload-pack
    
    (i.e. asking for the "smart" HTTP git-protocol to confirm). Quark would
    return a 404, though, while git only gracefully "downgrades" to the
    "dumb" HTTP git-protocol if the request succeeds but only yields a basic
    200 response without special git-headers.
    
    This way, it is now trivial to also share git-repositories (and other
    gracefully-downgrading protocols). While the "dumb" HTTP git-protocol
    only supports read-only-access, I don't think that's much of an overall
    loss (to the contrary!).
    
    HTTP authentication is broken and it makes much more sense to enable
    ssh-access to contributors and make them push changes via ssh. The key
    advantage of HTTP-cloning over git://-cloning is the fact that the git
    protocol can be tampered with, while the HTTP-protocol can be encapsulated
    into a secure TLS connection.
    
    Signed-off-by: Laslo Hunhold <dev_AT_frign.de>

diff --git a/http.c b/http.c
index b74c61d..f52da8a 100644
--- a/http.c
+++ b/http.c
_AT_@ -368,12 +368,12 @@ static int
 normabspath(char *path)
 {
         size_t len;
- int last = 0;
+ int dirty = 0, last = 0;
         char *p, *q;
 
         /* require and skip first slash */
         if (path[0] != '/') {
- return 1;
+ return -1;
         }
         p = path + 1;
 
_AT_@ -387,7 +387,9 @@ normabspath(char *path)
                         last = 1;
                 }
 
- if (p == q || (q - p == 1 && p[0] == '.')) {
+ if (*p == '\0') {
+ break;
+ } else if (p == q || (q - p == 1 && p[0] == '.')) {
                         /* "/" or "./" */
                         goto squash;
                 } else if (q - p == 2 && p[0] == '.' && p[1] == '.') {
_AT_@ -412,9 +414,10 @@ squash:
                         memmove(p, q + 1, len - ((q + 1) - path) + 2);
                         len -= (q + 1) - p;
                 }
+ dirty = 1;
         }
 
- return 0;
+ return dirty;
 }
 
 static enum status
_AT_@ -562,7 +565,7 @@ http_prepare_response(const struct request *req, struct response *res,
         struct tm tm = { 0 };
         struct vhost *vhost;
         size_t len, i;
- int hasport, ipv6host;
+ int dirty = 0, hasport, ipv6host;
         static char realuri[PATH_MAX], tmpuri[PATH_MAX];
         char *p, *mime;
         const char *targethost;
_AT_@ -570,11 +573,29 @@ http_prepare_response(const struct request *req, struct response *res,
         /* empty all response fields */
         memset(res, 0, sizeof(*res));
 
- /* make a working copy of the URI and normalize it */
+ /*
+ * make a working copy of the URI, strip queries and fragments
+ * (ignorable according to RFC 3986 section 3) and normalize it
+ */
         memcpy(realuri, req->uri, sizeof(realuri));
- if (normabspath(realuri)) {
+
+ if ((p = strchr(realuri, '?'))) {
+ *p = '\0';
+ } else if ((p = strchr(realuri, '#'))) {
+ *p = '\0';
+ }
+
+ switch (normabspath(realuri)) {
+ case -1:
                 s = S_BAD_REQUEST;
                 goto err;
+ case 0:
+ /* string is unchanged */
+ break;
+ case 1:
+ /* string was changed */
+ dirty = 1;
+ break;
         }
 
         /* match vhost */
_AT_@ -594,10 +615,12 @@ http_prepare_response(const struct request *req, struct response *res,
                 }
 
                 /* if we have a vhost prefix, prepend it to the URI */
- if (vhost->prefix &&
- prepend(realuri, LEN(realuri), vhost->prefix)) {
- s = S_REQUEST_TOO_LARGE;
- goto err;
+ if (vhost->prefix) {
+ if (prepend(realuri, LEN(realuri), vhost->prefix)) {
+ s = S_REQUEST_TOO_LARGE;
+ goto err;
+ }
+ dirty = 1;
                 }
         }
 
_AT_@ -618,14 +641,23 @@ http_prepare_response(const struct request *req, struct response *res,
                                 s = S_REQUEST_TOO_LARGE;
                                 goto err;
                         }
+ dirty = 1;
                         break;
                 }
         }
 
         /* normalize URI again, in case we introduced dirt */
- if (normabspath(realuri)) {
+ switch (normabspath(realuri)) {
+ case -1:
                 s = S_BAD_REQUEST;
                 goto err;
+ case 0:
+ /* string is unchanged */
+ break;
+ case 1:
+ /* string was changed */
+ dirty = 1;
+ break;
         }
 
         /* stat the relative path derived from the URI */
_AT_@ -644,6 +676,7 @@ http_prepare_response(const struct request *req, struct response *res,
                 if (len > 0 && realuri[len - 1] != '/') {
                         realuri[len] = '/';
                         realuri[len + 1] = '\0';
+ dirty = 1;
                 }
         }
 
_AT_@ -658,10 +691,10 @@ http_prepare_response(const struct request *req, struct response *res,
         }
 
         /*
- * redirect if the original URI and the "real" URI differ or if
- * the requested host is non-canonical
+ * redirect if the URI needs to be redirected or the requested
+ * host is non-canonical
          */
- if (strcmp(req->uri, realuri) || (srv->vhost && vhost &&
+ if (dirty || (srv->vhost && vhost &&
             strcmp(req->field[REQ_HOST], vhost->chost))) {
                 res->status = S_MOVED_PERMANENTLY;
 
_AT_@ -716,12 +749,12 @@ http_prepare_response(const struct request *req, struct response *res,
                  * (optionally including the vhost servedir as a prefix)
                  * into the actual response-path
                  */
- if (esnprintf(res->uri, sizeof(res->uri), "%s", req->uri)) {
+ if (esnprintf(res->uri, sizeof(res->uri), "%s", realuri)) {
                         s = S_REQUEST_TOO_LARGE;
                         goto err;
                 }
                 if (esnprintf(res->path, sizeof(res->path), "%s%s",
- vhost ? vhost->dir : "", RELPATH(req->uri))) {
+ vhost ? vhost->dir : "", RELPATH(realuri))) {
                         s = S_REQUEST_TOO_LARGE;
                         goto err;
                 }
_AT_@ -733,7 +766,7 @@ http_prepare_response(const struct request *req, struct response *res,
                  * the URI
                  */
                 if (esnprintf(tmpuri, sizeof(tmpuri), "%s%s",
- req->uri, srv->docindex)) {
+ realuri, srv->docindex)) {
                         s = S_REQUEST_TOO_LARGE;
                         goto err;
                 }
Received on Sat Jan 30 2021 - 13:11:17 CET

This archive was generated by hypermail 2.3.0 : Sat Jan 30 2021 - 13:12:36 CET