[hackers] [sbase][PATCH] tar: support hardlinks during compression

From: Andrea Calligaris <ac89.hk.public_AT_gmail.com>
Date: Thu, 7 Mar 2024 19:02:19 +0100

This patch depends on:
"tar: fix long names crashing tar archiving"
https://lists.suckless.org/hackers/2402/19071.html

I just did the simplest possible implementation, in order to
have at least something (it's a feature of 'tar' that I do
actually use in my projects, so I needed it).
Insights for more interesting approaches can be found here:
https://lists.suckless.org/dev/2301/35084.html

This should finally cover the specifications, although one
might then want to support even longer paths (more than 150
(prefix) + 100 (name) for regular files, and more than 100
for links); but that should be discussed I think on dev.
---
 tar.c | 101 +++++++++++++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 90 insertions(+), 11 deletions(-)
diff --git a/tar.c b/tar.c
index a2dea4d..a54d66d 100644
--- a/tar.c
+++ b/tar.c
_AT_@ -52,6 +52,16 @@ struct header {
 	char prefix[155];
 };
 
+/* List of encountered hardlinks. */
+struct hlink {
+	struct hlink *next;
+	dev_t dev;
+	ino_t ino;
+	char linkname[100];
+};
+struct hlink *hlinklist = NULL;
+struct hlink *hlinktail = NULL;
+
 static struct dirtime {
 	char *name;
 	time_t mtime;
_AT_@ -182,14 +192,15 @@ archive(const char *path)
 	char b[BLKSIZ];
 	struct group *gr;
 	struct header *h;
+	struct hlink *hlp;
 	struct passwd *pw;
 	struct stat st;
 	size_t chksum, i;
 	ssize_t l, r;
 	int fd = -1;
-	size_t path_len;
 	char tmp_prefix[PATH_MAX];
 	char *bsname;
+	int found_hlink = 0;
 
 	if (lstat(path, &st) < 0) {
 		weprintf("lstat %s:", path);
_AT_@ -205,14 +216,13 @@ archive(const char *path)
 	h = (struct header *)b;
 	memset(b, 0, sizeof(b));
 
-	path_len = strlen(path);
-	if (path_len > 100 - 1) {
+	if (strlen(path) >= 100) {
 		// Cover case where path name is too long (in which case we need
 		// to split it to prefix and name).
 		bsname = basename((char *)path);
-		strncpy(tmp_prefix, path, PATH_MAX);
+		estrlcpy(tmp_prefix, path, PATH_MAX);
 		dirname(tmp_prefix);
-		// Could still be too long to fit in the struct.
+		// Could still be too long to fit in the fields.
 		if (strlen(bsname) >= sizeof(h->name) ||
 		    strlen(tmp_prefix) >= sizeof(h->prefix)) {
 			eprintf("filename too long: %s\n", path);
_AT_@ -234,11 +244,55 @@ archive(const char *path)
 	estrlcpy(h->gname,   gr ? gr->gr_name : "",       sizeof(h->gname));
 
 	if (S_ISREG(st.st_mode)) {
-		h->type = REG;
-		putoctal(h->size, (unsigned)st.st_size,  sizeof(h->size));
-		fd = open(path, O_RDONLY);
-		if (fd < 0)
-			eprintf("open %s:", path);
+		if (st.st_nlink > 1) {
+			/* It's an hardlink */
+			for (hlp = hlinklist; hlp; hlp = hlp->next) {
+				if (hlp->ino == st.st_ino &&
+				    hlp->dev == st.st_dev) {
+					/* Found in our list. */
+					found_hlink = 1;
+					h->type = HARDLINK;
+					putoctal(h->size, 0, sizeof(h->size));
+					estrlcpy(
+					    h->linkname, hlp->linkname,
+					    sizeof(h->linkname));
+					break;
+				}
+			}
+			if (!found_hlink) {
+				/* Never encountered this hardlink before. Let's
+				 * store it in our list. */
+				if (strlen(h->prefix) > 0)
+					eprintf(
+					    "filename too long to be able to "
+					    "store it as a hardlink: %s\n",
+					    path);
+				struct hlink *new_hlink =
+				    ecalloc(1, sizeof(struct hlink));
+				new_hlink->next = NULL;
+				new_hlink->dev = st.st_dev;
+				new_hlink->ino = st.st_ino;
+				estrlcpy(
+				    new_hlink->linkname, h->name,
+				    sizeof(new_hlink->linkname));
+				if (hlinklist == NULL)
+					hlinklist = new_hlink;
+				else
+					hlinktail->next = new_hlink;
+				hlinktail = new_hlink;
+			}
+		}
+		/* If it's a regular file OR if it is an hardlink but it's the
+		   first time we encounter it, we need to dump the file content.
+		*/
+		if (!found_hlink) {
+			h->type = REG;
+			putoctal(
+			    h->size, (unsigned)st.st_size, sizeof(h->size));
+			fd = open(path, O_RDONLY);
+			if (fd < 0)
+				eprintf("open %s:", path);
+		}
 	} else if (S_ISDIR(st.st_mode)) {
 		h->type = DIRECTORY;
 	} else if (S_ISLNK(st.st_mode)) {
_AT_@ -272,6 +326,20 @@ archive(const char *path)
 	return 0;
 }
 
+static void
+freehlinklist()
+{
+	struct hlink *hlp = hlinklist;
+	struct hlink *next;
+	while (hlp != NULL) {
+		next = hlp->next;
+		free(hlp);
+		hlp = next;
+	}
+	hlinklist = NULL;
+	hlinktail = NULL;
+}
+
 static int
 unarchive(char *fname, ssize_t l, char b[BLKSIZ])
 {
_AT_@ -383,8 +451,16 @@ skipblk(ssize_t l)
 static int
 print(char *fname, ssize_t l, char b[BLKSIZ])
 {
-	puts(fname);
+	struct header *h = (struct header *)b;
+
+	fputs(fname, stdout);
+	if (vflag && h->linkname[0]) {
+		fputs(" link to ", stdout);
+		fputs(h->linkname, stdout);
+	}
+	fputs("\n", stdout);
 	skipblk(l);
+
 	return 0;
 }
 
_AT_@ -602,6 +678,9 @@ main(int argc, char *argv[])
 			eprintf("chdir %s:", dir);
 		for (; *argv; argc--, argv++)
 			recurse(AT_FDCWD, *argv, NULL, &r);
+
+		freehlinklist();
+
 		break;
 	case 't':
 	case 'x':
-- 
2.34.1
Received on Thu Mar 07 2024 - 19:02:19 CET

This archive was generated by hypermail 2.3.0 : Thu Mar 07 2024 - 23:12:40 CET