Re: [hackers] [quark] Ignore queries and fragments in URIs || Laslo Hunhold

From: Hiltjo Posthuma <hiltjo_AT_codemadness.org>
Date: Sat, 30 Jan 2021 14:30:12 +0100

On Sat, Jan 30, 2021 at 01:11:17PM +0100, git_AT_suckless.org wrote:
> commit 319ba7083fdde836d6614c6b8b228bf3a9849e95
> Author: Laslo Hunhold <dev_AT_frign.de>
> AuthorDate: Sat Jan 30 12:53:00 2021 +0100
> Commit: Laslo Hunhold <dev_AT_frign.de>
> CommitDate: Sat Jan 30 13:10:32 2021 +0100
>
> Ignore queries and fragments in URIs
>
> Previously, a request for "/index.html" would yield a 200, while a
> request for "/index.html?foo=bar" would yield a 404, as quark would
> look for the file "index.html?foo=bar" in the serve directory.
>
> To accomodate this behaviour, it's no longer sufficient to just compare
> realuri and req->uri. Instead, we set a "dirty" flag every time we
> change the URI in such a way that it requires a redirect.
>
> According to RFC 3986 section 3, queries and fragments are there
> to (further) "identify a resource within the scope of the URI's scheme
> and naming authority (if any)". However, it's perfectly legitimate to
> just ignore this further specification when the URI itself is already
> pointing at a unique resource (i.e. "/index.html").
>
> This behaviour is consistent with dynamic web applications which usually
> ignore parameters they don't care about. Quark is too much Zen to care
> about any parameters. This has the added bonus that you can now clone
> repositories (read-only) via the "dumb" HTTP git-protocol, so
>
> git clone https://example.org/git/project.git
>
> is now possible (provided you run update-server-info during the
> post-update-hook). This wouldn't work previously because git, when
> asked to clone via HTTP, would first probe the server with a request for
>
> project.git/info/refs?service=git-upload-pack
>
> (i.e. asking for the "smart" HTTP git-protocol to confirm). Quark would
> return a 404, though, while git only gracefully "downgrades" to the
> "dumb" HTTP git-protocol if the request succeeds but only yields a basic
> 200 response without special git-headers.
>
> This way, it is now trivial to also share git-repositories (and other
> gracefully-downgrading protocols). While the "dumb" HTTP git-protocol
> only supports read-only-access, I don't think that's much of an overall
> loss (to the contrary!).
>
> HTTP authentication is broken and it makes much more sense to enable
> ssh-access to contributors and make them push changes via ssh. The key
> advantage of HTTP-cloning over git://-cloning is the fact that the git
> protocol can be tampered with, while the HTTP-protocol can be encapsulated
> into a secure TLS connection.
>
> Signed-off-by: Laslo Hunhold <dev_AT_frign.de>
>

Cool story, bro.

> diff --git a/http.c b/http.c
> index b74c61d..f52da8a 100644
> --- a/http.c
> +++ b/http.c
> _AT_@ -368,12 +368,12 @@ static int
> normabspath(char *path)
> {
> size_t len;
> - int last = 0;
> + int dirty = 0, last = 0;
> char *p, *q;
>
> /* require and skip first slash */
> if (path[0] != '/') {
> - return 1;
> + return -1;
> }
> p = path + 1;
>
> _AT_@ -387,7 +387,9 @@ normabspath(char *path)
> last = 1;
> }
>
> - if (p == q || (q - p == 1 && p[0] == '.')) {
> + if (*p == '\0') {
> + break;
> + } else if (p == q || (q - p == 1 && p[0] == '.')) {
> /* "/" or "./" */
> goto squash;
> } else if (q - p == 2 && p[0] == '.' && p[1] == '.') {
> _AT_@ -412,9 +414,10 @@ squash:
> memmove(p, q + 1, len - ((q + 1) - path) + 2);
> len -= (q + 1) - p;
> }
> + dirty = 1;
> }
>
> - return 0;
> + return dirty;
> }
>
> static enum status
> _AT_@ -562,7 +565,7 @@ http_prepare_response(const struct request *req, struct response *res,
> struct tm tm = { 0 };
> struct vhost *vhost;
> size_t len, i;
> - int hasport, ipv6host;
> + int dirty = 0, hasport, ipv6host;
> static char realuri[PATH_MAX], tmpuri[PATH_MAX];
> char *p, *mime;
> const char *targethost;
> _AT_@ -570,11 +573,29 @@ http_prepare_response(const struct request *req, struct response *res,
> /* empty all response fields */
> memset(res, 0, sizeof(*res));
>
> - /* make a working copy of the URI and normalize it */
> + /*
> + * make a working copy of the URI, strip queries and fragments
> + * (ignorable according to RFC 3986 section 3) and normalize it
> + */
> memcpy(realuri, req->uri, sizeof(realuri));
> - if (normabspath(realuri)) {
> +
> + if ((p = strchr(realuri, '?'))) {
> + *p = '\0';
> + } else if ((p = strchr(realuri, '#'))) {
> + *p = '\0';
> + }
> +
> + switch (normabspath(realuri)) {
> + case -1:
> s = S_BAD_REQUEST;
> goto err;
> + case 0:
> + /* string is unchanged */
> + break;
> + case 1:
> + /* string was changed */
> + dirty = 1;
> + break;
> }
>
> /* match vhost */
> _AT_@ -594,10 +615,12 @@ http_prepare_response(const struct request *req, struct response *res,
> }
>
> /* if we have a vhost prefix, prepend it to the URI */
> - if (vhost->prefix &&
> - prepend(realuri, LEN(realuri), vhost->prefix)) {
> - s = S_REQUEST_TOO_LARGE;
> - goto err;
> + if (vhost->prefix) {
> + if (prepend(realuri, LEN(realuri), vhost->prefix)) {
> + s = S_REQUEST_TOO_LARGE;
> + goto err;
> + }
> + dirty = 1;
> }
> }
>
> _AT_@ -618,14 +641,23 @@ http_prepare_response(const struct request *req, struct response *res,
> s = S_REQUEST_TOO_LARGE;
> goto err;
> }
> + dirty = 1;
> break;
> }
> }
>
> /* normalize URI again, in case we introduced dirt */
> - if (normabspath(realuri)) {
> + switch (normabspath(realuri)) {
> + case -1:
> s = S_BAD_REQUEST;
> goto err;
> + case 0:
> + /* string is unchanged */
> + break;
> + case 1:
> + /* string was changed */
> + dirty = 1;
> + break;
> }
>
> /* stat the relative path derived from the URI */
> _AT_@ -644,6 +676,7 @@ http_prepare_response(const struct request *req, struct response *res,
> if (len > 0 && realuri[len - 1] != '/') {
> realuri[len] = '/';
> realuri[len + 1] = '\0';
> + dirty = 1;
> }
> }
>
> _AT_@ -658,10 +691,10 @@ http_prepare_response(const struct request *req, struct response *res,
> }
>
> /*
> - * redirect if the original URI and the "real" URI differ or if
> - * the requested host is non-canonical
> + * redirect if the URI needs to be redirected or the requested
> + * host is non-canonical
> */
> - if (strcmp(req->uri, realuri) || (srv->vhost && vhost &&
> + if (dirty || (srv->vhost && vhost &&
> strcmp(req->field[REQ_HOST], vhost->chost))) {
> res->status = S_MOVED_PERMANENTLY;
>
> _AT_@ -716,12 +749,12 @@ http_prepare_response(const struct request *req, struct response *res,
> * (optionally including the vhost servedir as a prefix)
> * into the actual response-path
> */
> - if (esnprintf(res->uri, sizeof(res->uri), "%s", req->uri)) {
> + if (esnprintf(res->uri, sizeof(res->uri), "%s", realuri)) {
> s = S_REQUEST_TOO_LARGE;
> goto err;
> }
> if (esnprintf(res->path, sizeof(res->path), "%s%s",
> - vhost ? vhost->dir : "", RELPATH(req->uri))) {
> + vhost ? vhost->dir : "", RELPATH(realuri))) {
> s = S_REQUEST_TOO_LARGE;
> goto err;
> }
> _AT_@ -733,7 +766,7 @@ http_prepare_response(const struct request *req, struct response *res,
> * the URI
> */
> if (esnprintf(tmpuri, sizeof(tmpuri), "%s%s",
> - req->uri, srv->docindex)) {
> + realuri, srv->docindex)) {
> s = S_REQUEST_TOO_LARGE;
> goto err;
> }
>

-- 
Kind regards,
Hiltjo
Received on Sat Jan 30 2021 - 14:30:12 CET

This archive was generated by hypermail 2.3.0 : Sat Jan 30 2021 - 14:36:36 CET