Version in base suite: 0.33.2-1~deb12u1 Base version: bup_0.33.2-1~deb12u1 Target version: bup_0.33.7-1~deb12u1 Base file: /srv/ftp-master.debian.org/ftp/pool/main/b/bup/bup_0.33.2-1~deb12u1.dsc Target file: /srv/ftp-master.debian.org/policy/pool/main/b/bup/bup_0.33.7-1~deb12u1.dsc .cirrus.yml | 10 .gitignore | 11 + Documentation/bup-bloom.1.md | 54 +++++ Documentation/bup-bloom.md | 54 ----- Documentation/bup-cat-file.1.md | 51 ++++ Documentation/bup-cat-file.md | 51 ---- Documentation/bup-config.5.md | 28 ++ Documentation/bup-daemon.1.md | 28 ++ Documentation/bup-daemon.md | 28 -- Documentation/bup-damage.1.md | 99 +++++++++ Documentation/bup-damage.md | 93 -------- Documentation/bup-drecurse.1.md | 71 ++++++ Documentation/bup-drecurse.md | 71 ------ Documentation/bup-features.1.md | 36 +++ Documentation/bup-features.md | 36 --- Documentation/bup-fsck.1.md | 116 ++++++++++ Documentation/bup-fsck.md | 116 ---------- Documentation/bup-ftp.1.md | 90 ++++++++ Documentation/bup-ftp.md | 90 -------- Documentation/bup-fuse.1.md | 68 ++++++ Documentation/bup-fuse.md | 68 ------ Documentation/bup-gc.1.md | 73 ++++++ Documentation/bup-gc.md | 64 ------ Documentation/bup-get.1.md | 194 ++++++++++++++++++ Documentation/bup-get.md | 190 ----------------- Documentation/bup-help.1.md | 28 ++ Documentation/bup-help.md | 28 -- Documentation/bup-import-duplicity.1.md | 52 ++++ Documentation/bup-import-duplicity.md | 52 ---- Documentation/bup-import-rdiff-backup.1.md | 30 ++ Documentation/bup-import-rdiff-backup.md | 30 -- Documentation/bup-import-rsnapshot.1.md | 35 +++ Documentation/bup-import-rsnapshot.md | 35 --- Documentation/bup-index.1.md | 208 +++++++++++++++++++ Documentation/bup-index.md | 208 ------------------- Documentation/bup-init.1.md | 40 +++ Documentation/bup-init.md | 40 --- Documentation/bup-join.1.md | 55 +++++ Documentation/bup-join.md | 55 ----- Documentation/bup-ls.1.md | 90 ++++++++ Documentation/bup-ls.md | 90 -------- Documentation/bup-margin.1.md | 79 +++++++ Documentation/bup-margin.md | 79 ------- Documentation/bup-memtest.1.md | 130 ++++++++++++ Documentation/bup-memtest.md | 130 ------------ Documentation/bup-meta.1.md | 153 ++++++++++++++ Documentation/bup-meta.md | 153 -------------- Documentation/bup-midx.1.md | 104 +++++++++ Documentation/bup-midx.md | 104 --------- Documentation/bup-mux.1.md | 31 ++ Documentation/bup-mux.md | 31 -- Documentation/bup-on.1.md | 85 +++++++ Documentation/bup-on.md | 85 ------- Documentation/bup-prune-older.1.md | 125 +++++++++++ Documentation/bup-prune-older.md | 125 ----------- Documentation/bup-random.1.md | 80 +++++++ Documentation/bup-random.md | 80 ------- Documentation/bup-restore.1.md | 272 +++++++++++++++++++++++++ Documentation/bup-restore.md | 272 ------------------------- Documentation/bup-rm.1.md | 49 ++++ Documentation/bup-rm.md | 49 ---- Documentation/bup-save.1.md | 179 ++++++++++++++++ Documentation/bup-save.md | 179 ---------------- Documentation/bup-server.1.md | 52 ++++ Documentation/bup-server.md | 52 ---- Documentation/bup-split.1.md | 179 ++++++++++++++++ Documentation/bup-split.md | 179 ---------------- Documentation/bup-tag.1.md | 61 +++++ Documentation/bup-tag.md | 61 ----- Documentation/bup-tick.1.md | 32 +++ Documentation/bup-tick.md | 32 --- Documentation/bup-validate-object-links.1.md | 38 +++ Documentation/bup-validate-ref-links.1.md | 49 ++++ Documentation/bup-web.1.md | 76 +++++++ Documentation/bup-web.md | 76 ------- Documentation/bup.1.md | 125 +++++++++++ Documentation/bup.md | 126 ----------- GNUmakefile | 75 +++++-- README | 49 ++-- README.md | 49 ++-- config/configure.inc | 46 ++-- debian/changelog | 59 +++++ debian/docs | 5 debian/patches/debian-changes | 32 +-- debian/rules | 6 dev/cleanup-mounts-under | 2 dev/groups | 15 + dev/lib.sh | 19 - dev/path-fs | 20 + dev/perforate-repo | 92 ++++++++ dev/prep-for-freebsd-build | 2 dev/prep-for-macos-build | 5 dev/root-status | 5 dev/sparse-size | 85 +++++++ dev/sparse-test-data | 5 dev/update-checkout-info | 6 dev/update-doc-branches | 8 issue/missing-objects-fig-bloom-get.dot | 23 ++ issue/missing-objects-fig-bloom-set.dot | 27 ++ issue/missing-objects-fig-bup-model-2.dot | 18 + issue/missing-objects-fig-bup-model.dot | 22 ++ issue/missing-objects-fig-gc-dangling.dot | 28 ++ issue/missing-objects-fig-get-bug-save.dot | 16 + issue/missing-objects-fig-git-model.dot | 18 + issue/missing-objects-fig-rm-after-gc.dot | 19 + issue/missing-objects-fig-rm-after.dot | 32 +++ issue/missing-objects-fig-rm-before.dot | 28 ++ issue/missing-objects.md | 287 +++++++++++++++++++++++++++ lib/bup/client.py | 19 - lib/bup/cmd/bloom.py | 20 + lib/bup/cmd/damage.py | 4 lib/bup/cmd/fsck.py | 148 ++++++++++--- lib/bup/cmd/gc.py | 12 - lib/bup/cmd/get.py | 85 +++++-- lib/bup/cmd/list_idx.py | 5 lib/bup/cmd/midx.py | 51 ++++ lib/bup/cmd/on__server.py | 9 lib/bup/cmd/split.py | 19 + lib/bup/cmd/validate_object_links.py | 125 +++++++++++ lib/bup/cmd/validate_ref_links.py | 63 +++++ lib/bup/cmd/web.py | 2 lib/bup/compat.py | 26 +- lib/bup/gc.py | 214 +++++++++++++------- lib/bup/git.py | 287 +++++++++++++++++---------- lib/bup/helpers.py | 82 ++++++- lib/bup/io.py | 6 lib/bup/main.py | 103 +++------ lib/bup/metadata.py | 10 lib/bup/midx.py | 154 +++++++++----- lib/bup/source_info.py | 4 lib/bup/version.py | 4 lib/bup/vfs.py | 4 lib/cmd/bup.c | 62 ++--- note/0.32.1-from-0.32.md | 38 +++ note/0.32.2-from-0.32.1.md | 26 ++ note/0.33.3-from-0.33.2.md | 57 +++++ note/0.33.4-from-0.33.3.md | 44 ++++ note/0.33.5-from-0.33.4.md | 156 ++++++++++++++ note/0.33.6-from-0.33.5.md | 22 ++ note/0.33.7-from-0.33.6.md | 18 + pytest | 3 test/ext/conftest.py | 36 +-- test/ext/test-fsck | 46 ++++ test/ext/test-gc | 81 +++---- test/ext/test-gc-removes-incomplete-trees | 75 +++++++ test/ext/test-get-missing | 49 ++++ test/ext/test-help | 8 test/ext/test-meta | 9 test/ext/test-misc | 2 test/ext/test-packsizelimit | 43 +++- test/ext/test-save-data-race | 2 test/ext/test-save-symlink-race | 2 test/ext/test-sparse-files | 34 +-- test/ext/test-validate-object-links | 41 +++ test/ext/test-validate-ref-links | 64 ++++++ test/ext/test-walk-object-order | 51 ++++ test/int/test_git.py | 11 - test/int/test_midx.py | 38 +++ test/lib/wvpytest.py | 6 wvtest.sh | 4 160 files changed, 6270 insertions(+), 3895 deletions(-) diff: /srv/release.debian.org/tmp/oha51BrRjK/bup-0.33.2/bin/bup: No such file or directory diff: /srv/release.debian.org/tmp/rcSec11Vpf/bup-0.33.7/bin/bup: No such file or directory diff: /srv/release.debian.org/tmp/oha51BrRjK/bup-0.33.2/bup: No such file or directory diff: /srv/release.debian.org/tmp/rcSec11Vpf/bup-0.33.7/bup: No such file or directory diff -Nru bup-0.33.2/.cirrus.yml bup-0.33.7/.cirrus.yml --- bup-0.33.2/.cirrus.yml 2023-07-01 20:08:43.000000000 +0000 +++ bup-0.33.7/.cirrus.yml 2025-01-08 20:04:11.000000000 +0000 @@ -28,12 +28,12 @@ adduser --disabled-password --gecos '' bup chown -R bup:bup . printf "make -j6 -C %q BUP_PYTHON_CONFIG=python3-config long-check" \ - "$(pwd)" | su -l bup + "$(pwd)" | su -l -w BUP_TEST_OTHER_BUP bup task: name: debian check container: - image: debian:bullseye + image: debian:buster cpu: 4 memory: 2 script: | @@ -48,20 +48,20 @@ task: name: freebsd check freebsd_instance: - image: freebsd-12-4-release-amd64 + image: freebsd-13-2-release-amd64 cpu: 4 memory: 4 script: | set -xe dev/prep-for-freebsd-build python3 dev/system-info - BUP_PYTHON_CONFIG=python3.9-config make -j6 check + gmake -j6 check task: name: macos check macos_instance: # https://cirrus-ci.org/guide/macOS/ - image: ghcr.io/cirruslabs/macos-monterey-base:latest + image: ghcr.io/cirruslabs/macos-runner:sonoma script: | set -xe dev/prep-for-macos-build python3 diff -Nru bup-0.33.2/.gitignore bup-0.33.7/.gitignore --- bup-0.33.2/.gitignore 2023-07-01 20:08:43.000000000 +0000 +++ bup-0.33.7/.gitignore 2025-01-08 20:04:11.000000000 +0000 @@ -10,6 +10,17 @@ /dev/python /dev/python-proposed /dev/python-proposed.d +/issue/missing-objects-fig-bloom-get.svg +/issue/missing-objects-fig-bloom-set.svg +/issue/missing-objects-fig-bup-model-2.svg +/issue/missing-objects-fig-bup-model.svg +/issue/missing-objects-fig-gc-dangling.svg +/issue/missing-objects-fig-get-bug-save.svg +/issue/missing-objects-fig-git-model.svg +/issue/missing-objects-fig-rm-after-gc.svg +/issue/missing-objects-fig-rm-after.svg +/issue/missing-objects-fig-rm-before.svg +/issue/missing-objects.html /lib/bup/_helpers.d /lib/bup/_helpers.dll /lib/bup/_helpers.so diff -Nru bup-0.33.2/Documentation/bup-bloom.1.md bup-0.33.7/Documentation/bup-bloom.1.md --- bup-0.33.2/Documentation/bup-bloom.1.md 1970-01-01 00:00:00.000000000 +0000 +++ bup-0.33.7/Documentation/bup-bloom.1.md 2025-01-08 20:04:11.000000000 +0000 @@ -0,0 +1,54 @@ +% bup-bloom(1) Bup %BUP_VERSION% +% Brandon Low +% %BUP_DATE% + +# NAME + +bup-bloom - generates, regenerates, updates bloom filters + +# SYNOPSIS + +bup bloom [-d dir] [-o outfile] [-k hashes] [-c idxfile] [-f] [\--ruin] + +# DESCRIPTION + +`bup bloom` builds a bloom filter file for a bup +repository. If one already exists, it checks the filter and +updates or regenerates it as needed. + +# OPTIONS + +\--ruin +: destroy bloom filters by setting the whole bitmask to + zeros. you really want to know what you are doing if + run this and you want to delete the resulting bloom + when you are done with it. + +-f, \--force +: don't update the existing bloom file; generate a new + one from scratch. + +-d, \--dir=*directory* +: the directory, containing `.idx` files, to process. + Defaults to $BUP_DIR/objects/pack + +-o, \--outfile=*outfile* +: the file to write the bloom filter to. defaults to + $dir/bup.bloom + +-k, \--hashes=*hashes* +: number of hash functions to use only 4 and 5 are valid. + defaults to 5 for repositories < 2 TiB, or 4 otherwise. + See comments in git.py for more on this value. + +-c, \--check=*idxfile* +: checks the bloom file (counterintuitively outfile) + against the specified `.idx` file, first checks that the + bloom filter is claiming to contain the `.idx`, then + checks that it does actually contain all of the objects + in the `.idx`. Does not write anything and ignores the + `-k` option. + +# BUP + +Part of the `bup`(1) suite. diff -Nru bup-0.33.2/Documentation/bup-bloom.md bup-0.33.7/Documentation/bup-bloom.md --- bup-0.33.2/Documentation/bup-bloom.md 2023-07-01 20:08:43.000000000 +0000 +++ bup-0.33.7/Documentation/bup-bloom.md 1970-01-01 00:00:00.000000000 +0000 @@ -1,54 +0,0 @@ -% bup-bloom(1) Bup %BUP_VERSION% -% Brandon Low -% %BUP_DATE% - -# NAME - -bup-bloom - generates, regenerates, updates bloom filters - -# SYNOPSIS - -bup bloom [-d dir] [-o outfile] [-k hashes] [-c idxfile] [-f] [\--ruin] - -# DESCRIPTION - -`bup bloom` builds a bloom filter file for a bup -repository. If one already exists, it checks the filter and -updates or regenerates it as needed. - -# OPTIONS - -\--ruin -: destroy bloom filters by setting the whole bitmask to - zeros. you really want to know what you are doing if - run this and you want to delete the resulting bloom - when you are done with it. - --f, \--force -: don't update the existing bloom file; generate a new - one from scratch. - --d, \--dir=*directory* -: the directory, containing `.idx` files, to process. - Defaults to $BUP_DIR/objects/pack - --o, \--outfile=*outfile* -: the file to write the bloom filter to. defaults to - $dir/bup.bloom - --k, \--hashes=*hashes* -: number of hash functions to use only 4 and 5 are valid. - defaults to 5 for repositories < 2 TiB, or 4 otherwise. - See comments in git.py for more on this value. - --c, \--check=*idxfile* -: checks the bloom file (counterintuitively outfile) - against the specified `.idx` file, first checks that the - bloom filter is claiming to contain the `.idx`, then - checks that it does actually contain all of the objects - in the `.idx`. Does not write anything and ignores the - `-k` option. - -# BUP - -Part of the `bup`(1) suite. diff -Nru bup-0.33.2/Documentation/bup-cat-file.1.md bup-0.33.7/Documentation/bup-cat-file.1.md --- bup-0.33.2/Documentation/bup-cat-file.1.md 1970-01-01 00:00:00.000000000 +0000 +++ bup-0.33.7/Documentation/bup-cat-file.1.md 2025-01-08 20:04:11.000000000 +0000 @@ -0,0 +1,51 @@ +% bup-cat-file(1) Bup %BUP_VERSION% +% Rob Browning +% %BUP_DATE% + +# NAME + +bup-cat-file - extract archive content (low-level) + +# SYNOPSIS + +bup cat-file [\--meta|\--bupm] <*path*> + +# DESCRIPTION + +`bup cat-file` extracts content associated with *path* from the +archive and dumps it to standard output. If nothing special is +requested, the actual data contained by *path* (which must be a +regular file) will be dumped. + +# OPTIONS + +\--meta +: retrieve the metadata entry associated with *path*. Note that + currently this does not return the raw bytes for the entry + recorded in the relevant .bupm in the archive, but rather a + decoded and then re-encoded version. When that matters, it should + be possible (though awkward) to use `--bupm` on the parent + directory and then find the relevant entry in the output. + +\--bupm +: retrieve the .bupm file associated with *path*, which must be a + directory. + +# EXAMPLES + + # Retrieve the content of somefile. + $ bup cat-file /foo/latest/somefile > somefile-content + + # Examine the metadata associated with something. + $ bup cat-file --meta /foo/latest/something | bup meta -tvvf - + + # Examine the metadata for somedir, including the items it contains. + $ bup cat-file --bupm /foo/latest/somedir | bup meta -tvvf - + +# SEE ALSO + +`bup-join`(1), `bup-meta`(1) + +# BUP + +Part of the `bup`(1) suite. diff -Nru bup-0.33.2/Documentation/bup-cat-file.md bup-0.33.7/Documentation/bup-cat-file.md --- bup-0.33.2/Documentation/bup-cat-file.md 2023-07-01 20:08:43.000000000 +0000 +++ bup-0.33.7/Documentation/bup-cat-file.md 1970-01-01 00:00:00.000000000 +0000 @@ -1,51 +0,0 @@ -% bup-cat-file(1) Bup %BUP_VERSION% -% Rob Browning -% %BUP_DATE% - -# NAME - -bup-cat-file - extract archive content (low-level) - -# SYNOPSIS - -bup cat-file [\--meta|\--bupm] <*path*> - -# DESCRIPTION - -`bup cat-file` extracts content associated with *path* from the -archive and dumps it to standard output. If nothing special is -requested, the actual data contained by *path* (which must be a -regular file) will be dumped. - -# OPTIONS - -\--meta -: retrieve the metadata entry associated with *path*. Note that - currently this does not return the raw bytes for the entry - recorded in the relevant .bupm in the archive, but rather a - decoded and then re-encoded version. When that matters, it should - be possible (though awkward) to use `--bupm` on the parent - directory and then find the relevant entry in the output. - -\--bupm -: retrieve the .bupm file associated with *path*, which must be a - directory. - -# EXAMPLES - - # Retrieve the content of somefile. - $ bup cat-file /foo/latest/somefile > somefile-content - - # Examine the metadata associated with something. - $ bup cat-file --meta /foo/latest/something | bup meta -tvvf - - - # Examine the metadata for somedir, including the items it contains. - $ bup cat-file --bupm /foo/latest/somedir | bup meta -tvvf - - -# SEE ALSO - -`bup-join`(1), `bup-meta`(1) - -# BUP - -Part of the `bup`(1) suite. diff -Nru bup-0.33.2/Documentation/bup-config.5.md bup-0.33.7/Documentation/bup-config.5.md --- bup-0.33.2/Documentation/bup-config.5.md 1970-01-01 00:00:00.000000000 +0000 +++ bup-0.33.7/Documentation/bup-config.5.md 2025-01-08 20:04:11.000000000 +0000 @@ -0,0 +1,28 @@ +% bup-config(5) Bup %BUP_VERSION% +% Rob Browning +% %BUP_DATE% + +# NAME + +bup-config - bup configuration options + +# DESCRIPTION + +The following options may be set in the relevant `git` config +(`git-config(1)`). + +# OPTIONS + +pack.packSizeLimit +: Respected when writing pack files (e.g. via `bup save ...`). + Currently read from the repository to which the pack files are + being written, excepting `bup on REMOTE...` which incorrectly + reads the value from the `REMOTE` repository. + +# SEE ALSO + +`git-config`(1) + +# BUP + +Part of the `bup`(1) suite. diff -Nru bup-0.33.2/Documentation/bup-daemon.1.md bup-0.33.7/Documentation/bup-daemon.1.md --- bup-0.33.2/Documentation/bup-daemon.1.md 1970-01-01 00:00:00.000000000 +0000 +++ bup-0.33.7/Documentation/bup-daemon.1.md 2025-01-08 20:04:11.000000000 +0000 @@ -0,0 +1,28 @@ +% bup-daemon(1) Bup %BUP_VERSION% +% Brandon Low +% %BUP_DATE% + +# NAME + +bup-daemon - listens for connections and runs `bup server` + +# SYNOPSIS + +bup daemon [-l address] [-p port] + +# DESCRIPTION + +`bup daemon` is a simple bup server which listens on a +socket and forks connections to `bup mux server` children. + +# OPTIONS + +-l, \--listen=*address* +: the address or hostname to listen on + +-p, \--port=*port* +: the port to listen on + +# BUP + +Part of the `bup`(1) suite. diff -Nru bup-0.33.2/Documentation/bup-daemon.md bup-0.33.7/Documentation/bup-daemon.md --- bup-0.33.2/Documentation/bup-daemon.md 2023-07-01 20:08:43.000000000 +0000 +++ bup-0.33.7/Documentation/bup-daemon.md 1970-01-01 00:00:00.000000000 +0000 @@ -1,28 +0,0 @@ -% bup-daemon(1) Bup %BUP_VERSION% -% Brandon Low -% %BUP_DATE% - -# NAME - -bup-daemon - listens for connections and runs `bup server` - -# SYNOPSIS - -bup daemon [-l address] [-p port] - -# DESCRIPTION - -`bup daemon` is a simple bup server which listens on a -socket and forks connections to `bup mux server` children. - -# OPTIONS - --l, \--listen=*address* -: the address or hostname to listen on - --p, \--port=*port* -: the port to listen on - -# BUP - -Part of the `bup`(1) suite. diff -Nru bup-0.33.2/Documentation/bup-damage.1.md bup-0.33.7/Documentation/bup-damage.1.md --- bup-0.33.2/Documentation/bup-damage.1.md 1970-01-01 00:00:00.000000000 +0000 +++ bup-0.33.7/Documentation/bup-damage.1.md 2025-01-08 20:04:11.000000000 +0000 @@ -0,0 +1,99 @@ +% bup-damage(1) Bup %BUP_VERSION% +% Avery Pennarun +% %BUP_DATE% + +# NAME + +bup-damage - randomly destroy blocks of a file + +# SYNOPSIS + +bup damage [-n count] [-s maxsize] [\--percent pct] [-S seed] +[\--equal] \ + +# DESCRIPTION + +Use `bup damage` to deliberately destroy blocks in a +`.pack` or `.idx` file (from `.bup/objects/pack`) to test +the recovery features of `bup-fsck`(1) or other programs. + +*THIS PROGRAM IS EXTREMELY DANGEROUS AND WILL DESTROY YOUR +DATA* + +`bup damage` is primarily useful for automated or manual tests +of data recovery tools, to reassure yourself that the tools +actually work. + +Note that the details of the current behavior may change (particularly +the details not documented here). For example the moment, the damage +is strictly probabilistic, and so may or may not actually alter any +given block. With a block size of 1, there should be a 1/256 chance +that the block won't actually change. This behavior may change. + +# OPTIONS + +-n, \--num=*numblocks* +: the number of separate blocks to damage in each file + (default 10). + Note that it's possible for more than one damaged + segment to fall in the same `bup-fsck`(1) recovery block, + so you might not damage as many recovery blocks as you + expect. If this is a problem, use `--equal`. + +-s, \--size=*maxblocksize* +: the maximum size, in bytes, of each damaged block + (default 1 unless `--percent` is specified). Note that + because of the way `bup-fsck`(1) works, a multi-byte + block could fall on the boundary between two recovery + blocks, and thus damaging two separate recovery blocks. + In small files, it's also possible for a damaged block + to be larger than a recovery block. If these issues + might be a problem, you should use the default damage + size of one byte. + +\--percent=*maxblockpercent* +: the maximum size, in percent of the original file, of + each damaged block. If both `--size` and `--percent` + are given, the maximum block size is the minimum of the + two restrictions. You can use this to ensure that a + given block will never damage more than one or two + `git-fsck`(1) recovery blocks. + +-S, \--seed=*randomseed* +: seed the random number generator with the given value. + If you use this option, your tests will be repeatable, + since the damaged block offsets, sizes, and contents + will be the same every time. By default, the random + numbers are different every time (so you can run tests + in a loop and repeatedly test with different + damage each time). + +\--equal +: instead of choosing random offsets for each damaged + block, space the blocks equally throughout the file, + starting at offset 0. If you also choose a correct + maximum block size, this can guarantee that any given + damage block never damages more than one `git-fsck`(1) + recovery block. (This is also guaranteed if you use + `-s 1`.) + +# EXAMPLES + # make a backup in case things go horribly wrong + cp -pPR ~/.bup/objects/pack ~/bup-packs.bak + + # generate recovery blocks for all packs + bup fsck -g + + # deliberately damage the packs + bup damage -n 10 -s 1 -S 0 ~/.bup/objects/pack/*.{pack,idx} + + # recover from the damage + bup fsck -r + +# SEE ALSO + +`bup-fsck`(1), `par2`(1) + +# BUP + +Part of the `bup`(1) suite. diff -Nru bup-0.33.2/Documentation/bup-damage.md bup-0.33.7/Documentation/bup-damage.md --- bup-0.33.2/Documentation/bup-damage.md 2023-07-01 20:08:43.000000000 +0000 +++ bup-0.33.7/Documentation/bup-damage.md 1970-01-01 00:00:00.000000000 +0000 @@ -1,93 +0,0 @@ -% bup-damage(1) Bup %BUP_VERSION% -% Avery Pennarun -% %BUP_DATE% - -# NAME - -bup-damage - randomly destroy blocks of a file - -# SYNOPSIS - -bup damage [-n count] [-s maxsize] [\--percent pct] [-S seed] -[\--equal] \ - -# DESCRIPTION - -Use `bup damage` to deliberately destroy blocks in a -`.pack` or `.idx` file (from `.bup/objects/pack`) to test -the recovery features of `bup-fsck`(1) or other programs. - -*THIS PROGRAM IS EXTREMELY DANGEROUS AND WILL DESTROY YOUR -DATA* - -`bup damage` is primarily useful for automated or manual tests -of data recovery tools, to reassure yourself that the tools -actually work. - -# OPTIONS - --n, \--num=*numblocks* -: the number of separate blocks to damage in each file - (default 10). - Note that it's possible for more than one damaged - segment to fall in the same `bup-fsck`(1) recovery block, - so you might not damage as many recovery blocks as you - expect. If this is a problem, use `--equal`. - --s, \--size=*maxblocksize* -: the maximum size, in bytes, of each damaged block - (default 1 unless `--percent` is specified). Note that - because of the way `bup-fsck`(1) works, a multi-byte - block could fall on the boundary between two recovery - blocks, and thus damaging two separate recovery blocks. - In small files, it's also possible for a damaged block - to be larger than a recovery block. If these issues - might be a problem, you should use the default damage - size of one byte. - -\--percent=*maxblockpercent* -: the maximum size, in percent of the original file, of - each damaged block. If both `--size` and `--percent` - are given, the maximum block size is the minimum of the - two restrictions. You can use this to ensure that a - given block will never damage more than one or two - `git-fsck`(1) recovery blocks. - --S, \--seed=*randomseed* -: seed the random number generator with the given value. - If you use this option, your tests will be repeatable, - since the damaged block offsets, sizes, and contents - will be the same every time. By default, the random - numbers are different every time (so you can run tests - in a loop and repeatedly test with different - damage each time). - -\--equal -: instead of choosing random offsets for each damaged - block, space the blocks equally throughout the file, - starting at offset 0. If you also choose a correct - maximum block size, this can guarantee that any given - damage block never damages more than one `git-fsck`(1) - recovery block. (This is also guaranteed if you use - `-s 1`.) - -# EXAMPLES - # make a backup in case things go horribly wrong - cp -pPR ~/.bup/objects/pack ~/bup-packs.bak - - # generate recovery blocks for all packs - bup fsck -g - - # deliberately damage the packs - bup damage -n 10 -s 1 -S 0 ~/.bup/objects/pack/*.{pack,idx} - - # recover from the damage - bup fsck -r - -# SEE ALSO - -`bup-fsck`(1), `par2`(1) - -# BUP - -Part of the `bup`(1) suite. diff -Nru bup-0.33.2/Documentation/bup-drecurse.1.md bup-0.33.7/Documentation/bup-drecurse.1.md --- bup-0.33.2/Documentation/bup-drecurse.1.md 1970-01-01 00:00:00.000000000 +0000 +++ bup-0.33.7/Documentation/bup-drecurse.1.md 2025-01-08 20:04:11.000000000 +0000 @@ -0,0 +1,71 @@ +% bup-drecurse(1) Bup %BUP_VERSION% +% Avery Pennarun +% %BUP_DATE% + +# NAME + +bup-drecurse - recursively list files in your filesystem + +# SYNOPSIS + +bup drecurse [-x] [-q] [\--exclude *path*] +\ [\--exclude-from *filename*] [\--exclude-rx *pattern*] +\ [\--exclude-rx-from *filename*] [\--profile] \ + +# DESCRIPTION + +`bup drecurse` traverses files in the filesystem in a way +similar to `find`(1). In most cases, you should use +`find`(1) instead. + +This program is useful mainly for testing the file +traversal algorithm used in `bup-index`(1). + +Note that filenames are returned in reverse alphabetical +order, as in `bup-index`(1). This is important because you +can't generate the hash of a parent directory until you +have generated the hashes of all its children. When +listing files in reverse order, the parent directory will +come after its children, making this easy. + +# OPTIONS + +-x, \--xdev, \--one-file-system +: don't cross filesystem boundaries -- though as with tar and rsync, + the mount points themselves will still be reported. + +-q, \--quiet +: don't print filenames as they are encountered. Useful + when testing performance of the traversal algorithms. + +\--exclude=*path* +: exclude *path* from the backup (may be repeated). + +\--exclude-from=*filename* +: read --exclude paths from *filename*, one path per-line (may be + repeated). Ignore completely empty lines. + +\--exclude-rx=*pattern* +: exclude any path matching *pattern*. See `bup-index`(1) for + details, but note that unlike index, drecurse will produce + relative paths if the drecurse target is a relative path. (may be + repeated). + +\--exclude-rx-from=*filename* +: read --exclude-rx patterns from *filename*, one pattern per-line + (may be repeated). Ignore completely empty lines. + +\--profile +: print profiling information upon completion. Useful + when testing performance of the traversal algorithms. + +# EXAMPLES + bup drecurse -x / + +# SEE ALSO + +`bup-index`(1) + +# BUP + +Part of the `bup`(1) suite. diff -Nru bup-0.33.2/Documentation/bup-drecurse.md bup-0.33.7/Documentation/bup-drecurse.md --- bup-0.33.2/Documentation/bup-drecurse.md 2023-07-01 20:08:43.000000000 +0000 +++ bup-0.33.7/Documentation/bup-drecurse.md 1970-01-01 00:00:00.000000000 +0000 @@ -1,71 +0,0 @@ -% bup-drecurse(1) Bup %BUP_VERSION% -% Avery Pennarun -% %BUP_DATE% - -# NAME - -bup-drecurse - recursively list files in your filesystem - -# SYNOPSIS - -bup drecurse [-x] [-q] [\--exclude *path*] -\ [\--exclude-from *filename*] [\--exclude-rx *pattern*] -\ [\--exclude-rx-from *filename*] [\--profile] \ - -# DESCRIPTION - -`bup drecurse` traverses files in the filesystem in a way -similar to `find`(1). In most cases, you should use -`find`(1) instead. - -This program is useful mainly for testing the file -traversal algorithm used in `bup-index`(1). - -Note that filenames are returned in reverse alphabetical -order, as in `bup-index`(1). This is important because you -can't generate the hash of a parent directory until you -have generated the hashes of all its children. When -listing files in reverse order, the parent directory will -come after its children, making this easy. - -# OPTIONS - --x, \--xdev, \--one-file-system -: don't cross filesystem boundaries -- though as with tar and rsync, - the mount points themselves will still be reported. - --q, \--quiet -: don't print filenames as they are encountered. Useful - when testing performance of the traversal algorithms. - -\--exclude=*path* -: exclude *path* from the backup (may be repeated). - -\--exclude-from=*filename* -: read --exclude paths from *filename*, one path per-line (may be - repeated). Ignore completely empty lines. - -\--exclude-rx=*pattern* -: exclude any path matching *pattern*. See `bup-index`(1) for - details, but note that unlike index, drecurse will produce - relative paths if the drecurse target is a relative path. (may be - repeated). - -\--exclude-rx-from=*filename* -: read --exclude-rx patterns from *filename*, one pattern per-line - (may be repeated). Ignore completely empty lines. - -\--profile -: print profiling information upon completion. Useful - when testing performance of the traversal algorithms. - -# EXAMPLES - bup drecurse -x / - -# SEE ALSO - -`bup-index`(1) - -# BUP - -Part of the `bup`(1) suite. diff -Nru bup-0.33.2/Documentation/bup-features.1.md bup-0.33.7/Documentation/bup-features.1.md --- bup-0.33.2/Documentation/bup-features.1.md 1970-01-01 00:00:00.000000000 +0000 +++ bup-0.33.7/Documentation/bup-features.1.md 2025-01-08 20:04:11.000000000 +0000 @@ -0,0 +1,36 @@ +% bup-features(1) Bup %BUP_VERSION% +% Rob Browning +% %BUP_DATE% + +# NAME + +bup-features - report the current status and capabilities of bup itself + +# SYNOPSIS + +bup features + +# DESCRIPTION + +`bup features` reports information about the current bup installation, +for example, which version of the Python interpreter is used, whether command +line editing is supported by `bup ftp`, or POSIX ACLs can be saved and +restored. + +# EXAMPLES + + $ bup features + bup 0.31~a7ff2d5b8c12b24b97858aad1251d28c18f8c1e1 + source a7ff2d5b8c12b24b97858aad1251d28c18f8c1e1 2020-07-05 14:54:06 -0500 + Python: 3.7.3 + Command line editing (e.g. bup ftp): yes + Saving and restoring POSIX ACLs: yes + .... + +# SEE ALSO + +`bup-version`(1) + +# BUP + +Part of the `bup`(1) suite. diff -Nru bup-0.33.2/Documentation/bup-features.md bup-0.33.7/Documentation/bup-features.md --- bup-0.33.2/Documentation/bup-features.md 2023-07-01 20:08:43.000000000 +0000 +++ bup-0.33.7/Documentation/bup-features.md 1970-01-01 00:00:00.000000000 +0000 @@ -1,36 +0,0 @@ -% bup-features(1) Bup %BUP_VERSION% -% Rob Browning -% %BUP_DATE% - -# NAME - -bup-features - report the current status and capabilities of bup itself - -# SYNOPSIS - -bup features - -# DESCRIPTION - -`bup features` reports information about the current bup installation, -for example, which version of the Python interpreter is used, whether command -line editing is supported by `bup ftp`, or POSIX ACLs can be saved and -restored. - -# EXAMPLES - - $ bup features - bup 0.31~a7ff2d5b8c12b24b97858aad1251d28c18f8c1e1 - source a7ff2d5b8c12b24b97858aad1251d28c18f8c1e1 2020-07-05 14:54:06 -0500 - Python: 3.7.3 - Command line editing (e.g. bup ftp): yes - Saving and restoring POSIX ACLs: yes - .... - -# SEE ALSO - -`bup-version`(1) - -# BUP - -Part of the `bup`(1) suite. diff -Nru bup-0.33.2/Documentation/bup-fsck.1.md bup-0.33.7/Documentation/bup-fsck.1.md --- bup-0.33.2/Documentation/bup-fsck.1.md 1970-01-01 00:00:00.000000000 +0000 +++ bup-0.33.7/Documentation/bup-fsck.1.md 2025-01-08 20:04:11.000000000 +0000 @@ -0,0 +1,116 @@ +% bup-fsck(1) Bup %BUP_VERSION% +% Avery Pennarun +% %BUP_DATE% + +# NAME + +bup-fsck - verify or repair a bup repository + +# SYNOPSIS + +bup fsck [-r] [-g] [-v] [\--quick] [-j *jobs*] [\--par2-ok] +[\--disable-par2] [filenames...] + +# DESCRIPTION + +`bup fsck` is a tool for validating bup repositories in the +same way that `git fsck` validates git repositories. + +It can also generate and/or use "recovery blocks" using the +`par2`(1) tool (if you have it installed). This allows you +to recover from damaged blocks covering up to 5% of your +`.pack` files. + +In a normal backup system, damaged blocks are less +important, because there tends to be enough data duplicated +between backup sets that a single damaged backup set is +non-critical. In a deduplicating backup system like bup, +however, no block is ever stored more than once, even if it +is used in every single backup. If that block were to be +unrecoverable, *all* your backup sets would be +damaged at once. Thus, it's important to be able to verify +the integrity of your backups and recover from disk errors +if they occur. + +*WARNING*: bup fsck's recovery features are not available +unless you have the free `par2`(1) package installed on +your bup server. + +*WARNING*: bup fsck obviously cannot recover from a +complete disk failure. If your backups are important, you +need to carefully consider redundancy (such as using RAID +for multi-disk redundancy, or making off-site backups for +site redundancy). + +# OPTIONS + +-r, \--repair +: attempt to repair any damaged packs using + existing recovery blocks. (Requires `par2`(1).) + +-g, \--generate +: generate recovery blocks for any packs that don't + already have them. (Requires `par2`(1).) + +-v, \--verbose +: increase verbosity (can be used more than once). + +\--quick +: don't run a full `git verify-pack` on each pack file; + instead just check the final checksum. This can cause + a significant speedup with no obvious decrease in + reliability. However, you may want to avoid this + option if you're paranoid. Has no effect on packs that + already have recovery information. + +-j, \--jobs=*numjobs* +: maximum number of pack verifications to run at a time. + The optimal value for this option depends how fast your + CPU can verify packs vs. your disk throughput. If you + run too many jobs at once, your disk will get saturated + by seeking back and forth between files and performance + will actually decrease, even if *numjobs* is less than + the number of CPU cores on your system. You can + experiment with this option to find the optimal value. + +\--par2-ok +: immediately return 0 if `par2`(1) is installed and + working, or 1 otherwise. Do not actually check + anything. + +\--disable-par2 +: pretend that `par2`(1) is not installed, and ignore all + recovery blocks. + + +# EXAMPLES + # generate recovery blocks for all packs that don't + # have them + bup fsck -g + + # generate recovery blocks for a particular pack + bup fsck -g ~/.bup/objects/pack/153a1420cb1c8*.pack + + # check all packs for correctness (can be very slow!) + bup fsck + + # check all packs for correctness and recover any + # damaged ones + bup fsck -r + + # check a particular pack for correctness and recover + # it if damaged + bup fsck -r ~/.bup/objects/pack/153a1420cb1c8*.pack + + # check if recovery blocks are available on this system + if bup fsck --par2-ok; then + echo "par2 is ok" + fi + +# SEE ALSO + +`bup-damage`(1), `fsck`(1), `git-fsck`(1) + +# BUP + +Part of the `bup`(1) suite. diff -Nru bup-0.33.2/Documentation/bup-fsck.md bup-0.33.7/Documentation/bup-fsck.md --- bup-0.33.2/Documentation/bup-fsck.md 2023-07-01 20:08:43.000000000 +0000 +++ bup-0.33.7/Documentation/bup-fsck.md 1970-01-01 00:00:00.000000000 +0000 @@ -1,116 +0,0 @@ -% bup-fsck(1) Bup %BUP_VERSION% -% Avery Pennarun -% %BUP_DATE% - -# NAME - -bup-fsck - verify or repair a bup repository - -# SYNOPSIS - -bup fsck [-r] [-g] [-v] [\--quick] [-j *jobs*] [\--par2-ok] -[\--disable-par2] [filenames...] - -# DESCRIPTION - -`bup fsck` is a tool for validating bup repositories in the -same way that `git fsck` validates git repositories. - -It can also generate and/or use "recovery blocks" using the -`par2`(1) tool (if you have it installed). This allows you -to recover from damaged blocks covering up to 5% of your -`.pack` files. - -In a normal backup system, damaged blocks are less -important, because there tends to be enough data duplicated -between backup sets that a single damaged backup set is -non-critical. In a deduplicating backup system like bup, -however, no block is ever stored more than once, even if it -is used in every single backup. If that block were to be -unrecoverable, *all* your backup sets would be -damaged at once. Thus, it's important to be able to verify -the integrity of your backups and recover from disk errors -if they occur. - -*WARNING*: bup fsck's recovery features are not available -unless you have the free `par2`(1) package installed on -your bup server. - -*WARNING*: bup fsck obviously cannot recover from a -complete disk failure. If your backups are important, you -need to carefully consider redundancy (such as using RAID -for multi-disk redundancy, or making off-site backups for -site redundancy). - -# OPTIONS - --r, \--repair -: attempt to repair any damaged packs using - existing recovery blocks. (Requires `par2`(1).) - --g, \--generate -: generate recovery blocks for any packs that don't - already have them. (Requires `par2`(1).) - --v, \--verbose -: increase verbosity (can be used more than once). - -\--quick -: don't run a full `git verify-pack` on each pack file; - instead just check the final checksum. This can cause - a significant speedup with no obvious decrease in - reliability. However, you may want to avoid this - option if you're paranoid. Has no effect on packs that - already have recovery information. - --j, \--jobs=*numjobs* -: maximum number of pack verifications to run at a time. - The optimal value for this option depends how fast your - CPU can verify packs vs. your disk throughput. If you - run too many jobs at once, your disk will get saturated - by seeking back and forth between files and performance - will actually decrease, even if *numjobs* is less than - the number of CPU cores on your system. You can - experiment with this option to find the optimal value. - -\--par2-ok -: immediately return 0 if `par2`(1) is installed and - working, or 1 otherwise. Do not actually check - anything. - -\--disable-par2 -: pretend that `par2`(1) is not installed, and ignore all - recovery blocks. - - -# EXAMPLES - # generate recovery blocks for all packs that don't - # have them - bup fsck -g - - # generate recovery blocks for a particular pack - bup fsck -g ~/.bup/objects/pack/153a1420cb1c8*.pack - - # check all packs for correctness (can be very slow!) - bup fsck - - # check all packs for correctness and recover any - # damaged ones - bup fsck -r - - # check a particular pack for correctness and recover - # it if damaged - bup fsck -r ~/.bup/objects/pack/153a1420cb1c8*.pack - - # check if recovery blocks are available on this system - if bup fsck --par2-ok; then - echo "par2 is ok" - fi - -# SEE ALSO - -`bup-damage`(1), `fsck`(1), `git-fsck`(1) - -# BUP - -Part of the `bup`(1) suite. diff -Nru bup-0.33.2/Documentation/bup-ftp.1.md bup-0.33.7/Documentation/bup-ftp.1.md --- bup-0.33.2/Documentation/bup-ftp.1.md 1970-01-01 00:00:00.000000000 +0000 +++ bup-0.33.7/Documentation/bup-ftp.1.md 2025-01-08 20:04:11.000000000 +0000 @@ -0,0 +1,90 @@ +% bup-ftp(1) Bup %BUP_VERSION% +% Avery Pennarun +% %BUP_DATE% + +# NAME + +bup-ftp - ftp-like client for navigating bup repositories + +# SYNOPSIS + +bup ftp + +# DESCRIPTION + +`bup ftp` is a command-line tool for navigating bup +repositories. It has commands similar to the Unix `ftp`(1) +command. The file hierarchy is the same as that shown by +`bup-fuse`(1) and `bup-ls`(1). + +Note: if your system has the python-readline library +installed, you can use the \ key to complete filenames +while navigating your backup data. This will save you a +lot of typing. + + +# COMMANDS + +The following commands are available inside `bup ftp`: + +ls [-s] [-a] [*path*] +: print the contents of a directory. If no path argument + is given, the current directory's contents are listed. + If -a is given, also include hidden files (files which + start with a `.` character). If -s is given, each file + is displayed with its hash from the bup archive to its + left. + +cd *dirname* +: change to a different working directory + +pwd +: print the path of the current working directory + +cat *filenames...* +: print the contents of one or more files to stdout + +get *filename* *localname* +: download the contents of *filename* and save it to disk + as *localname*. If *localname* is omitted, uses + *filename* as the local name. + +mget *filenames...* +: download the contents of the given *filenames* and + stores them to disk under the same names. The + filenames may contain Unix filename globs (`*`, `?`, + etc.) + +help +: print a list of available commands + +quit +: exit the `bup ftp` client + + +# EXAMPLES + $ bup ftp + bup> ls + mybackup/ yourbackup/ + + bup> cd mybackup/ + bup> ls + 2010-02-05-185507@ 2010-02-05-185508@ latest@ + + bup> cd latest/ + bup> ls + (...etc...) + + bup> get myfile + Saving 'myfile' + bup> quit + + +# SEE ALSO + +`bup-fuse`(1), `bup-ls`(1), `bup-save`(1), `bup-restore`(1) + + +# BUP + +Part of the `bup`(1) suite. diff -Nru bup-0.33.2/Documentation/bup-ftp.md bup-0.33.7/Documentation/bup-ftp.md --- bup-0.33.2/Documentation/bup-ftp.md 2023-07-01 20:08:43.000000000 +0000 +++ bup-0.33.7/Documentation/bup-ftp.md 1970-01-01 00:00:00.000000000 +0000 @@ -1,90 +0,0 @@ -% bup-ftp(1) Bup %BUP_VERSION% -% Avery Pennarun -% %BUP_DATE% - -# NAME - -bup-ftp - ftp-like client for navigating bup repositories - -# SYNOPSIS - -bup ftp - -# DESCRIPTION - -`bup ftp` is a command-line tool for navigating bup -repositories. It has commands similar to the Unix `ftp`(1) -command. The file hierarchy is the same as that shown by -`bup-fuse`(1) and `bup-ls`(1). - -Note: if your system has the python-readline library -installed, you can use the \ key to complete filenames -while navigating your backup data. This will save you a -lot of typing. - - -# COMMANDS - -The following commands are available inside `bup ftp`: - -ls [-s] [-a] [*path*] -: print the contents of a directory. If no path argument - is given, the current directory's contents are listed. - If -a is given, also include hidden files (files which - start with a `.` character). If -s is given, each file - is displayed with its hash from the bup archive to its - left. - -cd *dirname* -: change to a different working directory - -pwd -: print the path of the current working directory - -cat *filenames...* -: print the contents of one or more files to stdout - -get *filename* *localname* -: download the contents of *filename* and save it to disk - as *localname*. If *localname* is omitted, uses - *filename* as the local name. - -mget *filenames...* -: download the contents of the given *filenames* and - stores them to disk under the same names. The - filenames may contain Unix filename globs (`*`, `?`, - etc.) - -help -: print a list of available commands - -quit -: exit the `bup ftp` client - - -# EXAMPLES - $ bup ftp - bup> ls - mybackup/ yourbackup/ - - bup> cd mybackup/ - bup> ls - 2010-02-05-185507@ 2010-02-05-185508@ latest@ - - bup> cd latest/ - bup> ls - (...etc...) - - bup> get myfile - Saving 'myfile' - bup> quit - - -# SEE ALSO - -`bup-fuse`(1), `bup-ls`(1), `bup-save`(1), `bup-restore`(1) - - -# BUP - -Part of the `bup`(1) suite. diff -Nru bup-0.33.2/Documentation/bup-fuse.1.md bup-0.33.7/Documentation/bup-fuse.1.md --- bup-0.33.2/Documentation/bup-fuse.1.md 1970-01-01 00:00:00.000000000 +0000 +++ bup-0.33.7/Documentation/bup-fuse.1.md 2025-01-08 20:04:11.000000000 +0000 @@ -0,0 +1,68 @@ +% bup-fuse(1) Bup %BUP_VERSION% +% Avery Pennarun +% %BUP_DATE% + +# NAME + +bup-fuse - mount a bup repository as a filesystem + +# SYNOPSIS + +bup fuse [-d] [-f] [-o] \ + +# DESCRIPTION + +`bup fuse` opens a bup repository and exports it as a +`fuse`(7) userspace filesystem. + +This feature is only available on systems (such as Linux) +which support FUSE. + +**WARNING**: bup fuse is still experimental and does not +enforce any file permissions! All files will be readable +by all users. + +When you're done accessing the mounted fuse filesystem, you +should unmount it with `umount`(8). + +# OPTIONS + +-d, \--debug +: run in the foreground and print FUSE debug information + for each request. + +-f, \--foreground +: run in the foreground and exit only when the filesystem + is unmounted. + +-o, \--allow-other +: permit other users to access the filesystem. Necessary for + exporting the filesystem via Samba, for example. + +\--meta +: report some of the original metadata (when available) for the + mounted paths (currently the uid, gid, mode, and timestamps). + Without this, only generic values will be presented. This option + is not yet enabled by default because it may negatively affect + performance, and note that any timestamps before 1970-01-01 UTC + (i.e. before the Unix epoch) will be presented as 1970-01-01 UTC. + +-v, \--verbose +: increase verbosity (can be used more than once). + +# EXAMPLES + rm -rf /tmp/buptest + mkdir /tmp/buptest + sudo bup fuse -d /tmp/buptest + ls /tmp/buptest/*/latest + ... + umount /tmp/buptest + +# SEE ALSO + +`fuse`(7), `fusermount`(1), `bup-ls`(1), `bup-ftp`(1), +`bup-restore`(1), `bup-web`(1) + +# BUP + +Part of the `bup`(1) suite. diff -Nru bup-0.33.2/Documentation/bup-fuse.md bup-0.33.7/Documentation/bup-fuse.md --- bup-0.33.2/Documentation/bup-fuse.md 2023-07-01 20:08:43.000000000 +0000 +++ bup-0.33.7/Documentation/bup-fuse.md 1970-01-01 00:00:00.000000000 +0000 @@ -1,68 +0,0 @@ -% bup-fuse(1) Bup %BUP_VERSION% -% Avery Pennarun -% %BUP_DATE% - -# NAME - -bup-fuse - mount a bup repository as a filesystem - -# SYNOPSIS - -bup fuse [-d] [-f] [-o] \ - -# DESCRIPTION - -`bup fuse` opens a bup repository and exports it as a -`fuse`(7) userspace filesystem. - -This feature is only available on systems (such as Linux) -which support FUSE. - -**WARNING**: bup fuse is still experimental and does not -enforce any file permissions! All files will be readable -by all users. - -When you're done accessing the mounted fuse filesystem, you -should unmount it with `umount`(8). - -# OPTIONS - --d, \--debug -: run in the foreground and print FUSE debug information - for each request. - --f, \--foreground -: run in the foreground and exit only when the filesystem - is unmounted. - --o, \--allow-other -: permit other users to access the filesystem. Necessary for - exporting the filesystem via Samba, for example. - -\--meta -: report some of the original metadata (when available) for the - mounted paths (currently the uid, gid, mode, and timestamps). - Without this, only generic values will be presented. This option - is not yet enabled by default because it may negatively affect - performance, and note that any timestamps before 1970-01-01 UTC - (i.e. before the Unix epoch) will be presented as 1970-01-01 UTC. - --v, \--verbose -: increase verbosity (can be used more than once). - -# EXAMPLES - rm -rf /tmp/buptest - mkdir /tmp/buptest - sudo bup fuse -d /tmp/buptest - ls /tmp/buptest/*/latest - ... - umount /tmp/buptest - -# SEE ALSO - -`fuse`(7), `fusermount`(1), `bup-ls`(1), `bup-ftp`(1), -`bup-restore`(1), `bup-web`(1) - -# BUP - -Part of the `bup`(1) suite. diff -Nru bup-0.33.2/Documentation/bup-gc.1.md bup-0.33.7/Documentation/bup-gc.1.md --- bup-0.33.2/Documentation/bup-gc.1.md 1970-01-01 00:00:00.000000000 +0000 +++ bup-0.33.7/Documentation/bup-gc.1.md 2025-01-08 20:04:11.000000000 +0000 @@ -0,0 +1,73 @@ +% bup-gc(1) Bup %BUP_VERSION% +% Rob Browning +% %BUP_DATE% + +# NAME + +bup-gc - remove unreferenced, unneeded data + +# SYNOPSIS + +bup gc [-#|\--verbose] <*branch*|*save*...> + +# DESCRIPTION + +`bup gc` removes (permanently deletes) unreachable data from the +repository, data that isn't referred to directly or indirectly by the +current set of branches (backup sets) and tags. But bear in mind that +given deduplication, deleting a save and running the garbage collector +might or might not actually delete anything (or reclaim any space). + +With the current, proababilistic implementation, some fraction of the +unreachable data may be retained. In exchange, the garbage collection +should require less RAM than might be required by some more precise +approaches. + +Typically, the garbage collector would be invoked after some set of +invocations of `bup rm`. + +WARNING: This is one of the few bup commands that modifies your +archive in intentionally destructive ways. Though if an attempt to +`join` or `restore` the data you still care about after a `gc` +succeeds, that's a fairly encouraging sign that the commands worked +correctly. (The `dev/compare-trees` command in the source tree can be +used to help test before/after results.) + +# OPTIONS + +\--threshold=N +: only rewrite a packfile if it's over N percent garbage and + contains no unreachable trees or commits. The default threshold + is 10%. + +-v, \--verbose +: increase verbosity (can be used more than once). With one -v, bup + prints every directory name as it gets backed up. With two -v, + it also prints every filename. + +-*#*, \--compress=*#* +: set the compression level to # (a value from 0-9, where + 9 is the highest and 0 is no compression). The default + is 1 (fast, loose compression). + +\--ignore-missing +: report missing objects, but don't stop the collection. + +# EXIT STATUS + +The exit status will be nonzero if there were any errors. +Encountering any missing object is considered an error. + +# EXAMPLES + + # Remove all saves of "home" and most of the otherwise unreferenced data. + $ bup rm home + $ bup gc + +# SEE ALSO + +`bup-rm`(1) and `bup-fsck`(1) + +# BUP + +Part of the `bup`(1) suite. diff -Nru bup-0.33.2/Documentation/bup-gc.md bup-0.33.7/Documentation/bup-gc.md --- bup-0.33.2/Documentation/bup-gc.md 2023-07-01 20:08:43.000000000 +0000 +++ bup-0.33.7/Documentation/bup-gc.md 1970-01-01 00:00:00.000000000 +0000 @@ -1,64 +0,0 @@ -% bup-gc(1) Bup %BUP_VERSION% -% Rob Browning -% %BUP_DATE% - -# NAME - -bup-gc - remove unreferenced, unneeded data - -# SYNOPSIS - -bup gc [-#|\--verbose] <*branch*|*save*...> - -# DESCRIPTION - -`bup gc` removes (permanently deletes) unreachable data from the -repository, data that isn't referred to directly or indirectly by the -current set of branches (backup sets) and tags. But bear in mind that -given deduplication, deleting a save and running the garbage collector -might or might not actually delete anything (or reclaim any space). - -With the current, proababilistic implementation, some fraction of the -unreachable data may be retained. In exchange, the garbage collection -should require much less RAM than might by some more precise -approaches. - -Typically, the garbage collector would be invoked after some set of -invocations of `bup rm`. - -WARNING: This is one of the few bup commands that modifies your -archive in intentionally destructive ways. Though if an attempt to -`join` or `restore` the data you still care about after a `gc` -succeeds, that's a fairly encouraging sign that the commands worked -correctly. (The `dev/compare-trees` command in the source tree can be -used to help test before/after results.) - -# OPTIONS - -\--threshold=N -: only rewrite a packfile if it's over N percent garbage; otherwise - leave it alone. The default threshold is 10%. - --v, \--verbose -: increase verbosity (can be used more than once). With one -v, bup - prints every directory name as it gets backed up. With two -v, - it also prints every filename. - --*#*, \--compress=*#* -: set the compression level to # (a value from 0-9, where - 9 is the highest and 0 is no compression). The default - is 1 (fast, loose compression). - -# EXAMPLES - - # Remove all saves of "home" and most of the otherwise unreferenced data. - $ bup rm home - $ bup gc - -# SEE ALSO - -`bup-rm`(1) and `bup-fsck`(1) - -# BUP - -Part of the `bup`(1) suite. diff -Nru bup-0.33.2/Documentation/bup-get.1.md bup-0.33.7/Documentation/bup-get.1.md --- bup-0.33.2/Documentation/bup-get.1.md 1970-01-01 00:00:00.000000000 +0000 +++ bup-0.33.7/Documentation/bup-get.1.md 2025-01-08 20:04:11.000000000 +0000 @@ -0,0 +1,194 @@ +% bup-get(1) Bup %BUP_VERSION% +% Rob Browning +% %BUP_DATE% + +# NAME + +bup-get - copy repository items (CAUTION: EXPERIMENTAL) + +# SYNOPSIS + +bup get \[-s *source-path*\] \[-r *host*:*path*\] OPTIONS \<(METHOD *ref* [*dest*])\>... + +# DESCRIPTION + +`bup get` copies the indicated *ref*s from the source repository to +the destination repository (respecting `--bup-dir` and `BUP_DIR`), +according to the specified METHOD, which may be one of `--ff`, +`--ff:`, `--append`, `--append:`, `--pick`, `--pick:`, `--force-pick`, +`--force-pick:`, `--new-tag`, `--new-tag:`, `--replace`, `--replace:`, +or `--unnamed`. See the EXAMPLES below for a quick introduction. + +The *ref* is the source repository reference of the object to be +fetched, and the *dest* is the optional destination reference. A +*dest* may only be specified for a METHOD whose name ends in a colon. +For example: + + bup get -s /source/repo --ff foo + bup get -s /source/repo --ff: foo/latest bar + bup get -s /source/repo --pick: foo/2010-10-10-101010 .tag/bar + +As a special case, if *ref* names the "latest" save symlink, then bup +will act exactly as if the save that "latest" points to had been +specified, rather than the "latest" symlink itself, so `bup get +foo/latest` will actually be interpreted as something like `bup get +foo/2013-01-01-030405`. + +In some situations `bup get` will evaluate a branch operation +according to whether or not it will be a "fast-forward" (which +requires that any existing destination branch be an ancestor of the +source). + +An existing destination tag can only be overwritten by a `--replace` +or `--force-pick`. + +When a new commit is created (i.e. via `--append`, `--pick`, etc.), it +will have the same author, author date, and message as the original, +but a committer and committer date corresponding to the current user +and time. + +If requested by the appropriate options, bup will print the commit, +tree, or tag hash for each destination reference updated. When +relevant, the tree hash will be printed before the commit hash. + +Local *ref*s can be pushed to a remote repository with the `--remote` +option, and remote *ref*s can be pulled into a local repository via +"bup on HOST get ...". See `bup-on`(1) and the EXAMPLES below for +further information. + +WARNING: This is one of the few bup commands that can modify your +archives in intentionally destructive ways. Though if an attempt to +join or restore the data you still care about succeeds after you've +run this command, then that's a fairly encouraging sign that it worked +correctly. (The dev/compare-trees command in the source tree can be +used to help test before/after results.) + +# METHODS + +\--ff *ref*, \--ff: *ref* *dest* +: fast-forward *dest* to match *ref*. If *dest* is not specified + and *ref* names a save, set *dest* to the save's branch. If + *dest* is not specified and *ref* names a branch or a tag, use the + same name for *dest*. + +\--append *ref*, \--append: *ref* *dest* +: append all of the commits represented by *ref* to *dest* as new + commits. If *ref* names a directory/tree, append a new commit for + that tree. If *dest* is not specified and *ref* names a save or + branch, set *dest* to the *ref* branch name. If *dest* is not + specified and *ref* names a tag, use the same name for *dest*. + +\--pick *ref*, \--pick: *ref* *dest* +: append the single commit named by *ref* to *dest* as a new commit. + If *dest* is not specified and *ref* names a save, set *dest* to + the *ref* branch name. If *dest* is not specified and *ref* names + a tag, use the same name for *dest*. + +\--force-pick *ref*, \--force-pick: *ref* *dest* +: do the same thing as `--pick`, but don't refuse to overwrite an + existing tag. + +\--new-tag *ref*, \--new-tag: *ref* *dest* +: create a *dest* tag for *ref*, but refuse to overwrite an existing + tag. If *dest* is not specified and *ref* names a tag, use the + same name for *dest*. + +\--replace *ref*, \--replace: *ref* *dest* +: clobber *dest* with *ref*, overwriting any existing tag, or + replacing any existing branch. If *dest* is not specified and + *ref* names a branch or tag, use the same name for *dest*. + +\--unnamed *ref* +: copy *ref* into the destination repository, without any name, + leaving a potentially dangling reference until/unless the object + named by *ref* is referred to some other way (cf. `bup tag`). + +# OPTIONS + +-s, \--source=*path* +: use *path* as the source repository, instead of the default. + +-r, \--remote=*host*:*path* +: store the indicated items on the given remote server. If *path* + is omitted, uses the default path on the remote server (you still + need to include the ':'). The connection to the remote server is + made with SSH. If you'd like to specify which port, user or + private key to use for the SSH connection, we recommend you use + the `~/.ssh/config` file. + +-c, \--print-commits +: for each updated branch, print the new git commit id. + +-t, \--print-trees +: for each updated branch, print the new git tree id of the + filesystem root. + +\--print-tags +: for each updated tag, print the new git id. + +-v, \--verbose +: increase verbosity (can be used more than once). With + `-v`, print the name of every item fetched, with `-vv` add + directory names, and with `-vvv` add every filename. + +\--bwlimit=*bytes/sec* +: don't transmit more than *bytes/sec* bytes per second to the + server. This can help avoid sucking up all your network + bandwidth. Use a suffix like k, M, or G to specify multiples of + 1024, 1024\*1024, 1024\*1024\*1024 respectively. + +-*#*, \--compress=*#* +: set the compression level to # (a value from 0-9, where + 9 is the highest and 0 is no compression). The default + is 1 (fast, loose compression) + +\--ignore-missing +: ignore missing objects encountered during a transfer. Currently + only supported by `--unnamed`, and potentially *dangerous*. + +# EXAMPLES + + # Update or copy the archives branch in src-repo to the local repository. + $ bup get -s src-repo --ff archives + + # Append a particular archives save to the pruned-archives branch. + $ bup get -s src-repo --pick: archives/2013-01-01-030405 pruned-archives + + # Update or copy the archives branch on remotehost to the local + # repository. + $ bup on remotehost get --ff archives + + # Update or copy the local branch archives to remotehost. + $ bup get -r remotehost: --ff archives + + # Update or copy the archives branch in src-repo to remotehost. + $ bup get -s src-repo -r remotehost: --ff archives + + # Update the archives-2 branch on remotehost to match archives. + # If archives-2 exists and is not an ancestor of archives, bup + # will refuse. + $ bup get -r remotehost: --ff: archives archives-2 + + # Replace the contents of branch y with those of x. + $ bup get --replace: x y + + # Copy the latest local save from the archives branch to the + # remote tag foo. + $ bup get -r remotehost: --pick: archives/latest .tag/foo + + # Or if foo already exists: + $ bup get -r remotehost: --force-pick: archives/latest .tag/foo + + # Append foo (from above) to the local other-archives branch. + $ bup on remotehost get --append: .tag/foo other-archives + + # Append only the /home directory from archives/latest to only-home. + $ bup get -s "$BUP_DIR" --append: archives/latest/home only-home + +# SEE ALSO + +`bup-on`(1), `bup-tag`(1), `ssh_config`(5) + +# BUP + +Part of the `bup`(1) suite. diff -Nru bup-0.33.2/Documentation/bup-get.md bup-0.33.7/Documentation/bup-get.md --- bup-0.33.2/Documentation/bup-get.md 2023-07-01 20:08:43.000000000 +0000 +++ bup-0.33.7/Documentation/bup-get.md 1970-01-01 00:00:00.000000000 +0000 @@ -1,190 +0,0 @@ -% bup-get(1) Bup %BUP_VERSION% -% Rob Browning -% %BUP_DATE% - -# NAME - -bup-get - copy repository items (CAUTION: EXPERIMENTAL) - -# SYNOPSIS - -bup get \[-s *source-path*\] \[-r *host*:*path*\] OPTIONS \<(METHOD *ref* [*dest*])\>... - -# DESCRIPTION - -`bup get` copies the indicated *ref*s from the source repository to -the destination repository (respecting `--bup-dir` and `BUP_DIR`), -according to the specified METHOD, which may be one of `--ff`, -`--ff:`, `--append`, `--append:`, `--pick`, `--pick:`, `--force-pick`, -`--force-pick:`, `--new-tag`, `--new-tag:`, `--replace`, `--replace:`, -or `--unnamed`. See the EXAMPLES below for a quick introduction. - -The *ref* is the source repository reference of the object to be -fetched, and the *dest* is the optional destination reference. A -*dest* may only be specified for a METHOD whose name ends in a colon. -For example: - - bup get -s /source/repo --ff foo - bup get -s /source/repo --ff: foo/latest bar - bup get -s /source/repo --pick: foo/2010-10-10-101010 .tag/bar - -As a special case, if *ref* names the "latest" save symlink, then bup -will act exactly as if the save that "latest" points to had been -specified, rather than the "latest" symlink itself, so `bup get -foo/latest` will actually be interpreted as something like `bup get -foo/2013-01-01-030405`. - -In some situations `bup get` will evaluate a branch operation -according to whether or not it will be a "fast-forward" (which -requires that any existing destination branch be an ancestor of the -source). - -An existing destination tag can only be overwritten by a `--replace` -or `--force-pick`. - -When a new commit is created (i.e. via `--append`, `--pick`, etc.), it -will have the same author, author date, and message as the original, -but a committer and committer date corresponding to the current user -and time. - -If requested by the appropriate options, bup will print the commit, -tree, or tag hash for each destination reference updated. When -relevant, the tree hash will be printed before the commit hash. - -Local *ref*s can be pushed to a remote repository with the `--remote` -option, and remote *ref*s can be pulled into a local repository via -"bup on HOST get ...". See `bup-on`(1) and the EXAMPLES below for -further information. - -WARNING: This is one of the few bup commands that can modify your -archives in intentionally destructive ways. Though if an attempt to -join or restore the data you still care about succeeds after you've -run this command, then that's a fairly encouraging sign that it worked -correctly. (The dev/compare-trees command in the source tree can be -used to help test before/after results.) - -# METHODS - -\--ff *ref*, \--ff: *ref* *dest* -: fast-forward *dest* to match *ref*. If *dest* is not specified - and *ref* names a save, set *dest* to the save's branch. If - *dest* is not specified and *ref* names a branch or a tag, use the - same name for *dest*. - -\--append *ref*, \--append: *ref* *dest* -: append all of the commits represented by *ref* to *dest* as new - commits. If *ref* names a directory/tree, append a new commit for - that tree. If *dest* is not specified and *ref* names a save or - branch, set *dest* to the *ref* branch name. If *dest* is not - specified and *ref* names a tag, use the same name for *dest*. - -\--pick *ref*, \--pick: *ref* *dest* -: append the single commit named by *ref* to *dest* as a new commit. - If *dest* is not specified and *ref* names a save, set *dest* to - the *ref* branch name. If *dest* is not specified and *ref* names - a tag, use the same name for *dest*. - -\--force-pick *ref*, \--force-pick: *ref* *dest* -: do the same thing as `--pick`, but don't refuse to overwrite an - existing tag. - -\--new-tag *ref*, \--new-tag: *ref* *dest* -: create a *dest* tag for *ref*, but refuse to overwrite an existing - tag. If *dest* is not specified and *ref* names a tag, use the - same name for *dest*. - -\--replace *ref*, \--replace: *ref* *dest* -: clobber *dest* with *ref*, overwriting any existing tag, or - replacing any existing branch. If *dest* is not specified and - *ref* names a branch or tag, use the same name for *dest*. - -\--unnamed *ref* -: copy *ref* into the destination repository, without any name, - leaving a potentially dangling reference until/unless the object - named by *ref* is referred to some other way (cf. `bup tag`). - -# OPTIONS - --s, \--source=*path* -: use *path* as the source repository, instead of the default. - --r, \--remote=*host*:*path* -: store the indicated items on the given remote server. If *path* - is omitted, uses the default path on the remote server (you still - need to include the ':'). The connection to the remote server is - made with SSH. If you'd like to specify which port, user or - private key to use for the SSH connection, we recommend you use - the `~/.ssh/config` file. - --c, \--print-commits -: for each updated branch, print the new git commit id. - --t, \--print-trees -: for each updated branch, print the new git tree id of the - filesystem root. - -\--print-tags -: for each updated tag, print the new git id. - --v, \--verbose -: increase verbosity (can be used more than once). With - `-v`, print the name of every item fetched, with `-vv` add - directory names, and with `-vvv` add every filename. - -\--bwlimit=*bytes/sec* -: don't transmit more than *bytes/sec* bytes per second to the - server. This can help avoid sucking up all your network - bandwidth. Use a suffix like k, M, or G to specify multiples of - 1024, 1024\*1024, 1024\*1024\*1024 respectively. - --*#*, \--compress=*#* -: set the compression level to # (a value from 0-9, where - 9 is the highest and 0 is no compression). The default - is 1 (fast, loose compression) - -# EXAMPLES - - # Update or copy the archives branch in src-repo to the local repository. - $ bup get -s src-repo --ff archives - - # Append a particular archives save to the pruned-archives branch. - $ bup get -s src-repo --pick: archives/2013-01-01-030405 pruned-archives - - # Update or copy the archives branch on remotehost to the local - # repository. - $ bup on remotehost get --ff archives - - # Update or copy the local branch archives to remotehost. - $ bup get -r remotehost: --ff archives - - # Update or copy the archives branch in src-repo to remotehost. - $ bup get -s src-repo -r remotehost: --ff archives - - # Update the archives-2 branch on remotehost to match archives. - # If archives-2 exists and is not an ancestor of archives, bup - # will refuse. - $ bup get -r remotehost: --ff: archives archives-2 - - # Replace the contents of branch y with those of x. - $ bup get --replace: x y - - # Copy the latest local save from the archives branch to the - # remote tag foo. - $ bup get -r remotehost: --pick: archives/latest .tag/foo - - # Or if foo already exists: - $ bup get -r remotehost: --force-pick: archives/latest .tag/foo - - # Append foo (from above) to the local other-archives branch. - $ bup on remotehost get --append: .tag/foo other-archives - - # Append only the /home directory from archives/latest to only-home. - $ bup get -s "$BUP_DIR" --append: archives/latest/home only-home - -# SEE ALSO - -`bup-on`(1), `bup-tag`(1), `ssh_config`(5) - -# BUP - -Part of the `bup`(1) suite. diff -Nru bup-0.33.2/Documentation/bup-help.1.md bup-0.33.7/Documentation/bup-help.1.md --- bup-0.33.2/Documentation/bup-help.1.md 1970-01-01 00:00:00.000000000 +0000 +++ bup-0.33.7/Documentation/bup-help.1.md 2025-01-08 20:04:11.000000000 +0000 @@ -0,0 +1,28 @@ +% bup-help(1) Bup %BUP_VERSION% +% Avery Pennarun +% %BUP_DATE% + +# NAME + +bup-help - open the documentation for a given bup command + +# SYNOPSIS + +bup help \ + +# DESCRIPTION + +`bup help ` opens the documentation for the given command. +This is currently equivalent to typing `man bup-`. + + +# EXAMPLES + + $ bup help help + (Imagine that this man page was pasted below, + recursively. Since that would cause an endless loop + we include this silly remark instead. Chicken.) + +# BUP + +Part of the `bup`(1) suite. diff -Nru bup-0.33.2/Documentation/bup-help.md bup-0.33.7/Documentation/bup-help.md --- bup-0.33.2/Documentation/bup-help.md 2023-07-01 20:08:43.000000000 +0000 +++ bup-0.33.7/Documentation/bup-help.md 1970-01-01 00:00:00.000000000 +0000 @@ -1,28 +0,0 @@ -% bup-help(1) Bup %BUP_VERSION% -% Avery Pennarun -% %BUP_DATE% - -# NAME - -bup-help - open the documentation for a given bup command - -# SYNOPSIS - -bup help \ - -# DESCRIPTION - -`bup help ` opens the documentation for the given command. -This is currently equivalent to typing `man bup-`. - - -# EXAMPLES - - $ bup help help - (Imagine that this man page was pasted below, - recursively. Since that would cause an endless loop - we include this silly remark instead. Chicken.) - -# BUP - -Part of the `bup`(1) suite. diff -Nru bup-0.33.2/Documentation/bup-import-duplicity.1.md bup-0.33.7/Documentation/bup-import-duplicity.1.md --- bup-0.33.2/Documentation/bup-import-duplicity.1.md 1970-01-01 00:00:00.000000000 +0000 +++ bup-0.33.7/Documentation/bup-import-duplicity.1.md 2025-01-08 20:04:11.000000000 +0000 @@ -0,0 +1,52 @@ +% bup-import-duplicity(1) Bup %BUP_VERSION% +% Zoran Zaric , Rob Browning +% %BUP_DATE% + +# NAME + +bup-import-duplicity - import duplicity backups + +# WARNING + +bup-import-duplicity is **EXPERIMENTAL** (proceed with caution) + +# SYNOPSIS + +bup import-duplicity [-n] \ \ + +# DESCRIPTION + +`bup import-duplicity` imports all of the duplicity backups at +`source-url` into `bup` via `bup save -n save-name`. The bup saves +will have the same timestamps (via `bup save --date`) as the original +backups. + +Because this command operates by restoring each duplicity backup to a +temporary directory, the extent to which the metadata is preserved +will depend on the characteristics of the underlying filesystem, +whether or not you run `import-duplicity` as root (or under +`fakeroot`(1)), etc. + +Note that this command will use [`mkdtemp`][mkdtemp] to create +temporary directories, which means that it should respect any +`TEMPDIR`, `TEMP`, or `TMP` environment variable settings. Make sure +that the relevant filesystem has enough space for the largest +duplicity backup being imported. + +Since all invocations of duplicity use a temporary `--archive-dir`, +`import-duplicity` should not affect ~/.cache/duplicity. + +# OPTIONS + +-n, \--dry-run +: don't do anything; just print out what would be done + +# EXAMPLES + + $ bup import-duplicity file:///duplicity/src/ legacy-duplicity + +# BUP + +Part of the `bup`(1) suite. + +[mkdtemp]: https://docs.python.org/3/library/tempfile.html#tempfile.mkdtemp diff -Nru bup-0.33.2/Documentation/bup-import-duplicity.md bup-0.33.7/Documentation/bup-import-duplicity.md --- bup-0.33.2/Documentation/bup-import-duplicity.md 2023-07-01 20:08:43.000000000 +0000 +++ bup-0.33.7/Documentation/bup-import-duplicity.md 1970-01-01 00:00:00.000000000 +0000 @@ -1,52 +0,0 @@ -% bup-import-duplicity(1) Bup %BUP_VERSION% -% Zoran Zaric , Rob Browning -% %BUP_DATE% - -# NAME - -bup-import-duplicity - import duplicity backups - -# WARNING - -bup-import-duplicity is **EXPERIMENTAL** (proceed with caution) - -# SYNOPSIS - -bup import-duplicity [-n] \ \ - -# DESCRIPTION - -`bup import-duplicity` imports all of the duplicity backups at -`source-url` into `bup` via `bup save -n save-name`. The bup saves -will have the same timestamps (via `bup save --date`) as the original -backups. - -Because this command operates by restoring each duplicity backup to a -temporary directory, the extent to which the metadata is preserved -will depend on the characteristics of the underlying filesystem, -whether or not you run `import-duplicity` as root (or under -`fakeroot`(1)), etc. - -Note that this command will use [`mkdtemp`][mkdtemp] to create -temporary directories, which means that it should respect any -`TEMPDIR`, `TEMP`, or `TMP` environment variable settings. Make sure -that the relevant filesystem has enough space for the largest -duplicity backup being imported. - -Since all invocations of duplicity use a temporary `--archive-dir`, -`import-duplicity` should not affect ~/.cache/duplicity. - -# OPTIONS - --n, \--dry-run -: don't do anything; just print out what would be done - -# EXAMPLES - - $ bup import-duplicity file:///duplicity/src/ legacy-duplicity - -# BUP - -Part of the `bup`(1) suite. - -[mkdtemp]: https://docs.python.org/3/library/tempfile.html#tempfile.mkdtemp diff -Nru bup-0.33.2/Documentation/bup-import-rdiff-backup.1.md bup-0.33.7/Documentation/bup-import-rdiff-backup.1.md --- bup-0.33.2/Documentation/bup-import-rdiff-backup.1.md 1970-01-01 00:00:00.000000000 +0000 +++ bup-0.33.7/Documentation/bup-import-rdiff-backup.1.md 2025-01-08 20:04:11.000000000 +0000 @@ -0,0 +1,30 @@ +% bup-import-rdiff-backup(1) Bup %BUP_VERSION% +% Zoran Zaric +% %BUP_DATE% + +# NAME + +bup-import-rdiff-backup - import a rdiff-backup archive + +# SYNOPSIS + +bup import-rdiff-backup [-n] + +# DESCRIPTION + +`bup import-rdiff-backup` imports a rdiff-backup archive. The +timestamps for the backups are preserved and the path to +the rdiff-backup archive is stripped from the paths. + +# OPTIONS + +-n, \--dry-run +: don't do anything just print out what would be done + +# EXAMPLES + + $ bup import-rdiff-backup /.snapshots legacy-rdiff-backup + +# BUP + +Part of the `bup`(1) suite. diff -Nru bup-0.33.2/Documentation/bup-import-rdiff-backup.md bup-0.33.7/Documentation/bup-import-rdiff-backup.md --- bup-0.33.2/Documentation/bup-import-rdiff-backup.md 2023-07-01 20:08:43.000000000 +0000 +++ bup-0.33.7/Documentation/bup-import-rdiff-backup.md 1970-01-01 00:00:00.000000000 +0000 @@ -1,30 +0,0 @@ -% bup-import-rdiff-backup(1) Bup %BUP_VERSION% -% Zoran Zaric -% %BUP_DATE% - -# NAME - -bup-import-rdiff-backup - import a rdiff-backup archive - -# SYNOPSIS - -bup import-rdiff-backup [-n] - -# DESCRIPTION - -`bup import-rdiff-backup` imports a rdiff-backup archive. The -timestamps for the backups are preserved and the path to -the rdiff-backup archive is stripped from the paths. - -# OPTIONS - --n, \--dry-run -: don't do anything just print out what would be done - -# EXAMPLES - - $ bup import-rdiff-backup /.snapshots legacy-rdiff-backup - -# BUP - -Part of the `bup`(1) suite. diff -Nru bup-0.33.2/Documentation/bup-import-rsnapshot.1.md bup-0.33.7/Documentation/bup-import-rsnapshot.1.md --- bup-0.33.2/Documentation/bup-import-rsnapshot.1.md 1970-01-01 00:00:00.000000000 +0000 +++ bup-0.33.7/Documentation/bup-import-rsnapshot.1.md 2025-01-08 20:04:11.000000000 +0000 @@ -0,0 +1,35 @@ +% bup-import-rsnapshot(1) Bup %BUP_VERSION% +% Zoran Zaric +% %BUP_DATE% + +# NAME + +bup-import-rsnapshot - import a rsnapshot archive + +# SYNOPSIS + +bup import-rsnapshot [-n] \ [\] + +# SYNOPSIS + +`bup import-rsnapshot` imports an rsnapshot archive. The +timestamps for the backups are preserved and the path to +the rsnapshot archive is stripped from the paths. + +`bup import-rsnapshot` either imports the whole archive +or imports all backups only for a given backuptarget. + +# OPTIONS + +-n, \--dry-run +: don't do anything just print out what would be done + +# EXAMPLES + + $ bup import-rsnapshot /.snapshots + + $ bup import-rsnapshot /.snapshots host1 + +# BUP + +Part of the `bup`(1) suite. diff -Nru bup-0.33.2/Documentation/bup-import-rsnapshot.md bup-0.33.7/Documentation/bup-import-rsnapshot.md --- bup-0.33.2/Documentation/bup-import-rsnapshot.md 2023-07-01 20:08:43.000000000 +0000 +++ bup-0.33.7/Documentation/bup-import-rsnapshot.md 1970-01-01 00:00:00.000000000 +0000 @@ -1,35 +0,0 @@ -% bup-import-rsnapshot(1) Bup %BUP_VERSION% -% Zoran Zaric -% %BUP_DATE% - -# NAME - -bup-import-rsnapshot - import a rsnapshot archive - -# SYNOPSIS - -bup import-rsnapshot [-n] \ [\] - -# SYNOPSIS - -`bup import-rsnapshot` imports an rsnapshot archive. The -timestamps for the backups are preserved and the path to -the rsnapshot archive is stripped from the paths. - -`bup import-rsnapshot` either imports the whole archive -or imports all backups only for a given backuptarget. - -# OPTIONS - --n, \--dry-run -: don't do anything just print out what would be done - -# EXAMPLES - - $ bup import-rsnapshot /.snapshots - - $ bup import-rsnapshot /.snapshots host1 - -# BUP - -Part of the `bup`(1) suite. diff -Nru bup-0.33.2/Documentation/bup-index.1.md bup-0.33.7/Documentation/bup-index.1.md --- bup-0.33.2/Documentation/bup-index.1.md 1970-01-01 00:00:00.000000000 +0000 +++ bup-0.33.7/Documentation/bup-index.1.md 2025-01-08 20:04:11.000000000 +0000 @@ -0,0 +1,208 @@ +% bup-index(1) Bup %BUP_VERSION% +% Avery Pennarun +% %BUP_DATE% + +# NAME + +bup-index - print and/or update the bup filesystem index + +# SYNOPSIS + +bup index \<-p|-m|-s|-u|\--clear|\--check\> [-H] [-l] [-x] [\--fake-valid] +[\--no-check-device] [\--fake-invalid] [-f *indexfile*] [\--exclude *path*] +[\--exclude-from *filename*] [\--exclude-rx *pattern*] +[\--exclude-rx-from *filename*] [-v] \ + +# DESCRIPTION + +`bup index` manipulates the filesystem index, which is a cache of +absolute paths and their metadata (attributes, SHA-1 hashes, etc.). +The bup index is similar in function to the `git`(1) index, and the +default index can be found in `$BUP_DIR/bupindex`. + +Creating a backup in bup consists of two steps: updating +the index with `bup index`, then actually backing up the +files (or a subset of the files) with `bup save`. The +separation exists for these reasons: + +1. There is more than one way to generate a list of files +that need to be backed up. For example, you might want to +use `inotify`(7) or `dnotify`(7). + +2. Even if you back up files to multiple destinations (for +added redundancy), the file names, attributes, and hashes +will be the same each time. Thus, you can save the trouble +of repeatedly re-generating the list of files for each +backup set. + +3. You may want to use the data tracked by bup index for +other purposes (such as speeding up other programs that +need the same information). + +# NOTES + +At the moment, bup will ignore Linux attributes (cf. chattr(1) and +lsattr(1)) on some systems (any big-endian systems where sizeof(long) +< sizeof(int)). This is because the Linux kernel and FUSE currently +disagree over the type of the attr system call arguments, and so on +big-endian systems there's no way to get the results without the risk +of stack corruption (http://lwn.net/Articles/575846/). In these +situations, bup will print a warning the first time Linux attrs are +relevant during any index/save/restore operation. + +bup makes accommodations for the expected "worst-case" filesystem +timestamp resolution -- currently one second; examples include VFAT, +ext2, ext3, small ext4, etc. Since bup cannot know the filesystem +timestamp resolution, and could be traversing multiple filesystems +during any given run, it always assumes that the resolution may be no +better than one second. + +As a practical matter, this means that index updates are a bit +imprecise, and so `bup save` may occasionally record filesystem +changes that you didn't expect. That's because, during an index +update, if bup encounters a path whose actual timestamps are more +recent than one second before the update started, bup will set the +index timestamps for that path (mtime and ctime) to exactly one second +before the run, -- effectively capping those values. + +This ensures that no subsequent changes to those paths can result in +timestamps that are identical to those in the index. If that were +possible, bup could overlook the modifications. + +You can see the effect of this behavior in this example (assume that +less than one second elapses between the initial file creation and +first index run): + + $ touch src/1 src/2 + # A "sleep 1" here would avoid the unexpected save. + $ bup index src + $ bup save -n src src # Saves 1 and 2. + $ date > src/1 + $ bup index src + $ date > src/2 # Not indexed. + $ bup save -n src src # But src/2 is saved anyway. + +Strictly speaking, bup should not notice the change to src/2, but it +does, due to the accommodations described above. + +# MODES + +-u, \--update +: recursively update the index for the given paths and their + descendants. One or more paths must be specified, and if a path + ends with a symbolic link, the link itself will be indexed, not + the target. If no mode option is given, `--update` is the + default, and paths may be excluded by the `--exclude`, + `--exclude-rx`, and `--one-file-system` options. + +-p, \--print +: print the contents of the index. If paths are + given, shows the given entries and their descendants. + If no paths are given, shows the entries starting + at the current working directory (.). + +-m, \--modified +: prints only files which are marked as modified (ie. + changed since the most recent backup) in the index. + Implies `-p`. + +-s, \--status +: prepend a status code (A, M, D, or space) before each + path. Implies `-p`. The codes mean, respectively, + that a file is marked in the index as added, modified, + deleted, or unchanged since the last backup. + +\--check +: carefully check index file integrity before and after + updating. Mostly useful for automated tests. + +\--clear +: clear the default index. + + +# OPTIONS + +-H, \--hash +: for each file printed, prepend the most recently + recorded hash code. The hash code is normally + generated by `bup save`. For objects which have not yet + been backed up, the hash code will be + 0000000000000000000000000000000000000000. Note that + the hash code is printed even if the file is known to + be modified or deleted in the index (ie. the file on + the filesystem no longer matches the recorded hash). + If this is a problem for you, use `--status`. + +-l, \--long +: print more information about each file, in a similar + format to the `-l` option to `ls`(1). + +-x, \--xdev, \--one-file-system +: don't cross filesystem boundaries when traversing the + filesystem -- though as with tar and rsync, the mount points + themselves will still be indexed. Only applicable if you're using + `-u`. + +\--fake-valid +: mark specified paths as up-to-date even if they + aren't. This can be useful for testing, or to avoid + unnecessarily backing up files that you know are + boring. + +\--fake-invalid +: mark specified paths as not up-to-date, forcing the + next "bup save" run to re-check their contents. + +-f, \--indexfile=*indexfile* +: use a different index filename instead of + `$BUP_DIR/bupindex`. + +\--exclude=*path* +: exclude *path* from the backup (may be repeated). + +\--exclude-from=*filename* +: read --exclude paths from *filename*, one path per-line (may be + repeated). Ignore completely empty lines. + +\--exclude-rx=*pattern* +: exclude any path matching *pattern*, which must be a Python regular + expression (http://docs.python.org/library/re.html). The pattern + will be compared against the full path, without anchoring, so + "x/y" will match "ox/yard" or "box/yards". To exclude the + contents of /tmp, but not the directory itself, use + "^/tmp/.". (may be repeated) + + Examples: + + * '/foo$' - exclude any file named foo + * '/foo/$' - exclude any directory named foo + * '/foo/.' - exclude the content of any directory named foo + * '^/tmp/.' - exclude root-level /tmp's content, but not /tmp itself + +\--exclude-rx-from=*filename* +: read --exclude-rx patterns from *filename*, one pattern per-line + (may be repeated). Ignore completely empty lines. + +\--no-check-device +: don't mark an entry invalid if the device number (stat(2) st_dev) + changes. This can be useful when indexing remote, automounted, or + snapshot filesystems (LVM, Btrfs, etc.), where the device number + isn't fixed. + +-v, \--verbose +: increase log output during update (can be used more + than once). With one `-v`, print each directory as it + is updated; with two `-v`, print each file too. + + +# EXAMPLES + bup index -vux /etc /var /usr + + +# SEE ALSO + +`bup-save`(1), `bup-drecurse`(1), `bup-on`(1) + +# BUP + +Part of the `bup`(1) suite. diff -Nru bup-0.33.2/Documentation/bup-index.md bup-0.33.7/Documentation/bup-index.md --- bup-0.33.2/Documentation/bup-index.md 2023-07-01 20:08:43.000000000 +0000 +++ bup-0.33.7/Documentation/bup-index.md 1970-01-01 00:00:00.000000000 +0000 @@ -1,208 +0,0 @@ -% bup-index(1) Bup %BUP_VERSION% -% Avery Pennarun -% %BUP_DATE% - -# NAME - -bup-index - print and/or update the bup filesystem index - -# SYNOPSIS - -bup index \<-p|-m|-s|-u|\--clear|\--check\> [-H] [-l] [-x] [\--fake-valid] -[\--no-check-device] [\--fake-invalid] [-f *indexfile*] [\--exclude *path*] -[\--exclude-from *filename*] [\--exclude-rx *pattern*] -[\--exclude-rx-from *filename*] [-v] \ - -# DESCRIPTION - -`bup index` manipulates the filesystem index, which is a cache of -absolute paths and their metadata (attributes, SHA-1 hashes, etc.). -The bup index is similar in function to the `git`(1) index, and the -default index can be found in `$BUP_DIR/bupindex`. - -Creating a backup in bup consists of two steps: updating -the index with `bup index`, then actually backing up the -files (or a subset of the files) with `bup save`. The -separation exists for these reasons: - -1. There is more than one way to generate a list of files -that need to be backed up. For example, you might want to -use `inotify`(7) or `dnotify`(7). - -2. Even if you back up files to multiple destinations (for -added redundancy), the file names, attributes, and hashes -will be the same each time. Thus, you can save the trouble -of repeatedly re-generating the list of files for each -backup set. - -3. You may want to use the data tracked by bup index for -other purposes (such as speeding up other programs that -need the same information). - -# NOTES - -At the moment, bup will ignore Linux attributes (cf. chattr(1) and -lsattr(1)) on some systems (any big-endian systems where sizeof(long) -< sizeof(int)). This is because the Linux kernel and FUSE currently -disagree over the type of the attr system call arguments, and so on -big-endian systems there's no way to get the results without the risk -of stack corruption (http://lwn.net/Articles/575846/). In these -situations, bup will print a warning the first time Linux attrs are -relevant during any index/save/restore operation. - -bup makes accommodations for the expected "worst-case" filesystem -timestamp resolution -- currently one second; examples include VFAT, -ext2, ext3, small ext4, etc. Since bup cannot know the filesystem -timestamp resolution, and could be traversing multiple filesystems -during any given run, it always assumes that the resolution may be no -better than one second. - -As a practical matter, this means that index updates are a bit -imprecise, and so `bup save` may occasionally record filesystem -changes that you didn't expect. That's because, during an index -update, if bup encounters a path whose actual timestamps are more -recent than one second before the update started, bup will set the -index timestamps for that path (mtime and ctime) to exactly one second -before the run, -- effectively capping those values. - -This ensures that no subsequent changes to those paths can result in -timestamps that are identical to those in the index. If that were -possible, bup could overlook the modifications. - -You can see the effect of this behavior in this example (assume that -less than one second elapses between the initial file creation and -first index run): - - $ touch src/1 src/2 - # A "sleep 1" here would avoid the unexpected save. - $ bup index src - $ bup save -n src src # Saves 1 and 2. - $ date > src/1 - $ bup index src - $ date > src/2 # Not indexed. - $ bup save -n src src # But src/2 is saved anyway. - -Strictly speaking, bup should not notice the change to src/2, but it -does, due to the accommodations described above. - -# MODES - --u, \--update -: recursively update the index for the given paths and their - descendants. One or more paths must be specified, and if a path - ends with a symbolic link, the link itself will be indexed, not - the target. If no mode option is given, `--update` is the - default, and paths may be excluded by the `--exclude`, - `--exclude-rx`, and `--one-file-system` options. - --p, \--print -: print the contents of the index. If paths are - given, shows the given entries and their descendants. - If no paths are given, shows the entries starting - at the current working directory (.). - --m, \--modified -: prints only files which are marked as modified (ie. - changed since the most recent backup) in the index. - Implies `-p`. - --s, \--status -: prepend a status code (A, M, D, or space) before each - path. Implies `-p`. The codes mean, respectively, - that a file is marked in the index as added, modified, - deleted, or unchanged since the last backup. - -\--check -: carefully check index file integrity before and after - updating. Mostly useful for automated tests. - -\--clear -: clear the default index. - - -# OPTIONS - --H, \--hash -: for each file printed, prepend the most recently - recorded hash code. The hash code is normally - generated by `bup save`. For objects which have not yet - been backed up, the hash code will be - 0000000000000000000000000000000000000000. Note that - the hash code is printed even if the file is known to - be modified or deleted in the index (ie. the file on - the filesystem no longer matches the recorded hash). - If this is a problem for you, use `--status`. - --l, \--long -: print more information about each file, in a similar - format to the `-l` option to `ls`(1). - --x, \--xdev, \--one-file-system -: don't cross filesystem boundaries when traversing the - filesystem -- though as with tar and rsync, the mount points - themselves will still be indexed. Only applicable if you're using - `-u`. - -\--fake-valid -: mark specified paths as up-to-date even if they - aren't. This can be useful for testing, or to avoid - unnecessarily backing up files that you know are - boring. - -\--fake-invalid -: mark specified paths as not up-to-date, forcing the - next "bup save" run to re-check their contents. - --f, \--indexfile=*indexfile* -: use a different index filename instead of - `$BUP_DIR/bupindex`. - -\--exclude=*path* -: exclude *path* from the backup (may be repeated). - -\--exclude-from=*filename* -: read --exclude paths from *filename*, one path per-line (may be - repeated). Ignore completely empty lines. - -\--exclude-rx=*pattern* -: exclude any path matching *pattern*, which must be a Python regular - expression (http://docs.python.org/library/re.html). The pattern - will be compared against the full path, without anchoring, so - "x/y" will match "ox/yard" or "box/yards". To exclude the - contents of /tmp, but not the directory itself, use - "^/tmp/.". (may be repeated) - - Examples: - - * '/foo$' - exclude any file named foo - * '/foo/$' - exclude any directory named foo - * '/foo/.' - exclude the content of any directory named foo - * '^/tmp/.' - exclude root-level /tmp's content, but not /tmp itself - -\--exclude-rx-from=*filename* -: read --exclude-rx patterns from *filename*, one pattern per-line - (may be repeated). Ignore completely empty lines. - -\--no-check-device -: don't mark an entry invalid if the device number (stat(2) st_dev) - changes. This can be useful when indexing remote, automounted, or - snapshot filesystems (LVM, Btrfs, etc.), where the device number - isn't fixed. - --v, \--verbose -: increase log output during update (can be used more - than once). With one `-v`, print each directory as it - is updated; with two `-v`, print each file too. - - -# EXAMPLES - bup index -vux /etc /var /usr - - -# SEE ALSO - -`bup-save`(1), `bup-drecurse`(1), `bup-on`(1) - -# BUP - -Part of the `bup`(1) suite. diff -Nru bup-0.33.2/Documentation/bup-init.1.md bup-0.33.7/Documentation/bup-init.1.md --- bup-0.33.2/Documentation/bup-init.1.md 1970-01-01 00:00:00.000000000 +0000 +++ bup-0.33.7/Documentation/bup-init.1.md 2025-01-08 20:04:11.000000000 +0000 @@ -0,0 +1,40 @@ +% bup-init(1) Bup %BUP_VERSION% +% Avery Pennarun +% %BUP_DATE% + +# NAME + +bup-init - initialize a bup repository + +# SYNOPSIS + +[BUP_DIR=*localpath*] bup init [-r *host*:*path*] + +# DESCRIPTION + +`bup init` initializes your local bup repository. By default, BUP_DIR +is `~/.bup`. + +# OPTIONS + +-r, \--remote=*host*:*path* +: Initialize not only the local repository, but also the + remote repository given by the *host* and *path*. This is + not necessary if you intend to back up to the default + location on the server (ie. a blank *path*). The connection to the + remote server is made with SSH. If you'd like to specify which port, user + or private key to use for the SSH connection, we recommend you use the + `~/.ssh/config` file. + + +# EXAMPLES + bup init + + +# SEE ALSO + +`bup-fsck`(1), `ssh_config`(5) + +# BUP + +Part of the `bup`(1) suite. diff -Nru bup-0.33.2/Documentation/bup-init.md bup-0.33.7/Documentation/bup-init.md --- bup-0.33.2/Documentation/bup-init.md 2023-07-01 20:08:43.000000000 +0000 +++ bup-0.33.7/Documentation/bup-init.md 1970-01-01 00:00:00.000000000 +0000 @@ -1,40 +0,0 @@ -% bup-init(1) Bup %BUP_VERSION% -% Avery Pennarun -% %BUP_DATE% - -# NAME - -bup-init - initialize a bup repository - -# SYNOPSIS - -[BUP_DIR=*localpath*] bup init [-r *host*:*path*] - -# DESCRIPTION - -`bup init` initializes your local bup repository. By default, BUP_DIR -is `~/.bup`. - -# OPTIONS - --r, \--remote=*host*:*path* -: Initialize not only the local repository, but also the - remote repository given by the *host* and *path*. This is - not necessary if you intend to back up to the default - location on the server (ie. a blank *path*). The connection to the - remote server is made with SSH. If you'd like to specify which port, user - or private key to use for the SSH connection, we recommend you use the - `~/.ssh/config` file. - - -# EXAMPLES - bup init - - -# SEE ALSO - -`bup-fsck`(1), `ssh_config`(5) - -# BUP - -Part of the `bup`(1) suite. diff -Nru bup-0.33.2/Documentation/bup-join.1.md bup-0.33.7/Documentation/bup-join.1.md --- bup-0.33.2/Documentation/bup-join.1.md 1970-01-01 00:00:00.000000000 +0000 +++ bup-0.33.7/Documentation/bup-join.1.md 2025-01-08 20:04:11.000000000 +0000 @@ -0,0 +1,55 @@ +% bup-join(1) Bup %BUP_VERSION% +% Avery Pennarun +% %BUP_DATE% + +# NAME + +bup-join - concatenate files from a bup repository + +# SYNOPSIS + +bup join [-r *host*:*path*] [refs or hashes...] + +# DESCRIPTION + +`bup join` is roughly the opposite operation to +`bup-split`(1). You can use it to retrieve the contents of +a file from a local or remote bup repository. + +The supplied list of refs or hashes can be in any format +accepted by `git`(1), including branch names, commit ids, +tree ids, or blob ids. + +If no refs or hashes are given on the command line, `bup +join` reads them from stdin instead. + +# OPTIONS + +-r, \--remote=*host*:*path* +: Retrieves objects from the given remote repository instead of the + local one. *path* may be blank, in which case the default remote + repository is used. The connection to the remote server is made + with SSH. If you'd like to specify which port, user or private + key to use for the SSH connection, we recommend you use the + `~/.ssh/config` file. Even though the data source is remote, a + local bup repository is still required. + +# EXAMPLES + # split and then rejoin a file using its tree id + TREE=$(tar -cvf - /etc | bup split -t) + bup join $TREE | tar -tf - + + # make two backups, then get the second-most-recent. + # mybackup~1 is git(1) notation for the second most + # recent commit on the branch named mybackup. + tar -cvf - /etc | bup split -n mybackup + tar -cvf - /etc | bup split -n mybackup + bup join mybackup~1 | tar -tf - + +# SEE ALSO + +`bup-split`(1), `bup-save`(1), `bup-cat-file`, `ssh_config`(5) + +# BUP + +Part of the `bup`(1) suite. diff -Nru bup-0.33.2/Documentation/bup-join.md bup-0.33.7/Documentation/bup-join.md --- bup-0.33.2/Documentation/bup-join.md 2023-07-01 20:08:43.000000000 +0000 +++ bup-0.33.7/Documentation/bup-join.md 1970-01-01 00:00:00.000000000 +0000 @@ -1,55 +0,0 @@ -% bup-join(1) Bup %BUP_VERSION% -% Avery Pennarun -% %BUP_DATE% - -# NAME - -bup-join - concatenate files from a bup repository - -# SYNOPSIS - -bup join [-r *host*:*path*] [refs or hashes...] - -# DESCRIPTION - -`bup join` is roughly the opposite operation to -`bup-split`(1). You can use it to retrieve the contents of -a file from a local or remote bup repository. - -The supplied list of refs or hashes can be in any format -accepted by `git`(1), including branch names, commit ids, -tree ids, or blob ids. - -If no refs or hashes are given on the command line, `bup -join` reads them from stdin instead. - -# OPTIONS - --r, \--remote=*host*:*path* -: Retrieves objects from the given remote repository instead of the - local one. *path* may be blank, in which case the default remote - repository is used. The connection to the remote server is made - with SSH. If you'd like to specify which port, user or private - key to use for the SSH connection, we recommend you use the - `~/.ssh/config` file. Even though the data source is remote, a - local bup repository is still required. - -# EXAMPLES - # split and then rejoin a file using its tree id - TREE=$(tar -cvf - /etc | bup split -t) - bup join $TREE | tar -tf - - - # make two backups, then get the second-most-recent. - # mybackup~1 is git(1) notation for the second most - # recent commit on the branch named mybackup. - tar -cvf - /etc | bup split -n mybackup - tar -cvf - /etc | bup split -n mybackup - bup join mybackup~1 | tar -tf - - -# SEE ALSO - -`bup-split`(1), `bup-save`(1), `bup-cat-file`, `ssh_config`(5) - -# BUP - -Part of the `bup`(1) suite. diff -Nru bup-0.33.2/Documentation/bup-ls.1.md bup-0.33.7/Documentation/bup-ls.1.md --- bup-0.33.2/Documentation/bup-ls.1.md 1970-01-01 00:00:00.000000000 +0000 +++ bup-0.33.7/Documentation/bup-ls.1.md 2025-01-08 20:04:11.000000000 +0000 @@ -0,0 +1,90 @@ +% bup-ls(1) Bup %BUP_VERSION% +% Avery Pennarun +% %BUP_DATE% + +# NAME + +bup-ls - list the contents of a bup repository + +# SYNOPSIS + +bup ls [-r *host*:[*path*]] [OPTION...] \ + +# DESCRIPTION + +`bup ls` lists files and directories in your bup repository +using the same directory hierarchy as they would have with +`bup-fuse`(1). + +The top level directory contains the branch (corresponding to +the `-n` option in `bup save`), the next level is the date +of the backup, and subsequent levels correspond to files in +the backup. + +When `bup ls` is asked to output on a tty, and `-l` is not specified, +it formats the output in columns so it can list as much as possible in +as few lines as possible. However, when `-l` is specified or bup is +asked to output to something other than a tty (say you pipe the output +to another command, or you redirect it to a file), it will print one +file name per line. This makes the listing easier to parse with +external tools. + +Note that `bup ls` doesn't show hidden files by default and one needs to use +the `-a` option to show them. Files are hidden when their name begins with a +dot. For example, on the topmost level, the special directories named `.commit` +and `.tag` are hidden directories. + +Once you have identified the file you want using `bup ls`, +you can view its contents using `bup join` or `git show`. + +# OPTIONS + +-r, \--remote=*host*:[*path*] +: list information for the repository at *path* on the indicated + *host*. If *path* is omitted, uses the default path on the remote + server (you still need to include the ':'). The connection to the + remote server will be made by SSH. If you'd like to specify the + port, user, or private key, we recommend you use the + `~/.ssh/config` file (`ssh_config(5)`). + +-s, \--hash +: show hash for each file/directory. + +-a, \--all +: show hidden files. + +-A, \--almost-all +: show hidden files, except "." and "..". + +-d, \--directory +: show information about directories themselves, rather than their + contents, and don't follow symlinks. + +-l +: provide a detailed, long listing for each item. + +-F, \--classify +: append type indicator: dir/, symlink@, fifo|, socket=, and executable*. + +\--file-type +: append type indicator: dir/, symlink@, fifo|, socket=. + +\--human-readable +: print human readable file sizes (i.e. 3.9K, 4.7M). + +\--numeric-ids +: display numeric IDs (user, group, etc.) rather than names. + +# EXAMPLES + bup ls /myserver/latest/etc/profile + + bup ls -a / + +# SEE ALSO + +`bup-join`(1), `bup-fuse`(1), `bup-ftp`(1), `bup-save`(1), +`git-show`(1), `ssh_config`(5) + +# BUP + +Part of the `bup`(1) suite. diff -Nru bup-0.33.2/Documentation/bup-ls.md bup-0.33.7/Documentation/bup-ls.md --- bup-0.33.2/Documentation/bup-ls.md 2023-07-01 20:08:43.000000000 +0000 +++ bup-0.33.7/Documentation/bup-ls.md 1970-01-01 00:00:00.000000000 +0000 @@ -1,90 +0,0 @@ -% bup-ls(1) Bup %BUP_VERSION% -% Avery Pennarun -% %BUP_DATE% - -# NAME - -bup-ls - list the contents of a bup repository - -# SYNOPSIS - -bup ls [-r *host*:[*path*]] [OPTION...] \ - -# DESCRIPTION - -`bup ls` lists files and directories in your bup repository -using the same directory hierarchy as they would have with -`bup-fuse`(1). - -The top level directory contains the branch (corresponding to -the `-n` option in `bup save`), the next level is the date -of the backup, and subsequent levels correspond to files in -the backup. - -When `bup ls` is asked to output on a tty, and `-l` is not specified, -it formats the output in columns so it can list as much as possible in -as few lines as possible. However, when `-l` is specified or bup is -asked to output to something other than a tty (say you pipe the output -to another command, or you redirect it to a file), it will print one -file name per line. This makes the listing easier to parse with -external tools. - -Note that `bup ls` doesn't show hidden files by default and one needs to use -the `-a` option to show them. Files are hidden when their name begins with a -dot. For example, on the topmost level, the special directories named `.commit` -and `.tag` are hidden directories. - -Once you have identified the file you want using `bup ls`, -you can view its contents using `bup join` or `git show`. - -# OPTIONS - --r, \--remote=*host*:[*path*] -: list information for the repository at *path* on the indicated - *host*. If *path* is omitted, uses the default path on the remote - server (you still need to include the ':'). The connection to the - remote server will be made by SSH. If you'd like to specify the - port, user, or private key, we recommend you use the - `~/.ssh/config` file (`ssh_config(5)`). - --s, \--hash -: show hash for each file/directory. - --a, \--all -: show hidden files. - --A, \--almost-all -: show hidden files, except "." and "..". - --d, \--directory -: show information about directories themselves, rather than their - contents, and don't follow symlinks. - --l -: provide a detailed, long listing for each item. - --F, \--classify -: append type indicator: dir/, symlink@, fifo|, socket=, and executable*. - -\--file-type -: append type indicator: dir/, symlink@, fifo|, socket=. - -\--human-readable -: print human readable file sizes (i.e. 3.9K, 4.7M). - -\--numeric-ids -: display numeric IDs (user, group, etc.) rather than names. - -# EXAMPLES - bup ls /myserver/latest/etc/profile - - bup ls -a / - -# SEE ALSO - -`bup-join`(1), `bup-fuse`(1), `bup-ftp`(1), `bup-save`(1), -`git-show`(1), `ssh_config`(5) - -# BUP - -Part of the `bup`(1) suite. diff -Nru bup-0.33.2/Documentation/bup-margin.1.md bup-0.33.7/Documentation/bup-margin.1.md --- bup-0.33.2/Documentation/bup-margin.1.md 1970-01-01 00:00:00.000000000 +0000 +++ bup-0.33.7/Documentation/bup-margin.1.md 2025-01-08 20:04:11.000000000 +0000 @@ -0,0 +1,79 @@ +% bup-margin(1) Bup %BUP_VERSION% +% Avery Pennarun +% %BUP_DATE% + +# NAME + +bup-margin - figure out your deduplication safety margin + +# SYNOPSIS + +bup margin [options...] + +# DESCRIPTION + +`bup margin` iterates through all objects in your bup +repository, calculating the largest number of prefix bits +shared between any two entries. This number, `n`, +identifies the longest subset of SHA-1 you could use and still +encounter a collision between your object ids. + +For example, one system that was tested had a collection of +11 million objects (70 GB), and `bup margin` returned 45. +That means a 46-bit hash would be sufficient to avoid all +collisions among that set of objects; each object in that +repository could be uniquely identified by its first 46 +bits. + +The number of bits needed seems to increase by about 1 or 2 +for every doubling of the number of objects. Since SHA-1 +hashes have 160 bits, that leaves 115 bits of margin. Of +course, because SHA-1 hashes are essentially random, it's +theoretically possible to use many more bits with far fewer +objects. + +If you're paranoid about the possibility of SHA-1 +collisions, you can monitor your repository by running `bup +margin` occasionally to see if you're getting dangerously +close to 160 bits. + +# OPTIONS + +\--predict +: Guess the offset into each index file where a + particular object will appear, and report the maximum + deviation of the correct answer from the guess. This + is potentially useful for tuning an interpolation + search algorithm. + +\--ignore-midx +: don't use `.midx` files, use only `.idx` files. This is + only really useful when used with `--predict`. + + +# EXAMPLES + $ bup margin + Reading indexes: 100.00% (1612581/1612581), done. + 40 + 40 matching prefix bits + 1.94 bits per doubling + 120 bits (61.86 doublings) remaining + 4.19338e+18 times larger is possible + + Everyone on earth could have 625878182 data sets + like yours, all in one repository, and we would + expect 1 object collision. + + $ bup margin --predict + PackIdxList: using 1 index. + Reading indexes: 100.00% (1612581/1612581), done. + 915 of 1612581 (0.057%) + + +# SEE ALSO + +`bup-midx`(1), `bup-save`(1) + +# BUP + +Part of the `bup`(1) suite. diff -Nru bup-0.33.2/Documentation/bup-margin.md bup-0.33.7/Documentation/bup-margin.md --- bup-0.33.2/Documentation/bup-margin.md 2023-07-01 20:08:43.000000000 +0000 +++ bup-0.33.7/Documentation/bup-margin.md 1970-01-01 00:00:00.000000000 +0000 @@ -1,79 +0,0 @@ -% bup-margin(1) Bup %BUP_VERSION% -% Avery Pennarun -% %BUP_DATE% - -# NAME - -bup-margin - figure out your deduplication safety margin - -# SYNOPSIS - -bup margin [options...] - -# DESCRIPTION - -`bup margin` iterates through all objects in your bup -repository, calculating the largest number of prefix bits -shared between any two entries. This number, `n`, -identifies the longest subset of SHA-1 you could use and still -encounter a collision between your object ids. - -For example, one system that was tested had a collection of -11 million objects (70 GB), and `bup margin` returned 45. -That means a 46-bit hash would be sufficient to avoid all -collisions among that set of objects; each object in that -repository could be uniquely identified by its first 46 -bits. - -The number of bits needed seems to increase by about 1 or 2 -for every doubling of the number of objects. Since SHA-1 -hashes have 160 bits, that leaves 115 bits of margin. Of -course, because SHA-1 hashes are essentially random, it's -theoretically possible to use many more bits with far fewer -objects. - -If you're paranoid about the possibility of SHA-1 -collisions, you can monitor your repository by running `bup -margin` occasionally to see if you're getting dangerously -close to 160 bits. - -# OPTIONS - -\--predict -: Guess the offset into each index file where a - particular object will appear, and report the maximum - deviation of the correct answer from the guess. This - is potentially useful for tuning an interpolation - search algorithm. - -\--ignore-midx -: don't use `.midx` files, use only `.idx` files. This is - only really useful when used with `--predict`. - - -# EXAMPLES - $ bup margin - Reading indexes: 100.00% (1612581/1612581), done. - 40 - 40 matching prefix bits - 1.94 bits per doubling - 120 bits (61.86 doublings) remaining - 4.19338e+18 times larger is possible - - Everyone on earth could have 625878182 data sets - like yours, all in one repository, and we would - expect 1 object collision. - - $ bup margin --predict - PackIdxList: using 1 index. - Reading indexes: 100.00% (1612581/1612581), done. - 915 of 1612581 (0.057%) - - -# SEE ALSO - -`bup-midx`(1), `bup-save`(1) - -# BUP - -Part of the `bup`(1) suite. diff -Nru bup-0.33.2/Documentation/bup-memtest.1.md bup-0.33.7/Documentation/bup-memtest.1.md --- bup-0.33.2/Documentation/bup-memtest.1.md 1970-01-01 00:00:00.000000000 +0000 +++ bup-0.33.7/Documentation/bup-memtest.1.md 2025-01-08 20:04:11.000000000 +0000 @@ -0,0 +1,130 @@ +% bup-memtest(1) Bup %BUP_VERSION% +% Avery Pennarun +% %BUP_DATE% + +# NAME + +bup-memtest - test bup memory usage statistics + +# SYNOPSIS + +bup memtest [options...] + +# DESCRIPTION + +`bup memtest` opens the list of pack indexes in your bup +repository, then searches the list for a series of +nonexistent objects, printing memory usage statistics after +each cycle. + +Because of the way Unix systems work, the output will +usually show a large (and unchanging) value in the VmSize +column, because mapping the index files in the first place +takes a certain amount of virtual address space. However, this +virtual memory usage is entirely virtual; it doesn't take +any of your RAM. Over time, bup uses *parts* of the +indexes, which need to be loaded from disk, and this is +what causes an increase in the VmRSS column. + +# OPTIONS + +-n, \--number=*number* +: set the number of objects to search for during each + cycle (ie. before printing a line of output) + +-c, \--cycles=*cycles* +: set the number of cycles (ie. the number of lines of + output after the first). The first line of output is + always 0 (ie. the baseline before searching for any + objects). + +\--ignore-midx +: ignore any `.midx` files created by `bup midx`. This + allows you to compare memory performance with and + without using midx. + +\--existing +: search for existing objects instead of searching for + random nonexistent ones. This can greatly affect + memory usage and performance. Note that most of the + time, `bup save` spends most of its time searching for + nonexistent objects, since existing ones are probably + in unmodified files that we won't be trying to back up + anyway. So the default behaviour reflects real bup + performance more accurately. But you might want this + option anyway just to make sure you haven't made + searching for existing objects much worse than before. + + +# EXAMPLES + $ bup memtest -n300 -c5 + PackIdxList: using 1 index. + VmSize VmRSS VmData VmStk + 0 20824 kB 4528 kB 1980 kB 84 kB + 300 20828 kB 5828 kB 1984 kB 84 kB + 600 20828 kB 6844 kB 1984 kB 84 kB + 900 20828 kB 7836 kB 1984 kB 84 kB + 1200 20828 kB 8736 kB 1984 kB 84 kB + 1500 20828 kB 9452 kB 1984 kB 84 kB + + $ bup memtest -n300 -c5 --ignore-midx + PackIdxList: using 361 indexes. + VmSize VmRSS VmData VmStk + 0 27444 kB 6552 kB 2516 kB 84 kB + 300 27448 kB 15832 kB 2520 kB 84 kB + 600 27448 kB 17220 kB 2520 kB 84 kB + 900 27448 kB 18012 kB 2520 kB 84 kB + 1200 27448 kB 18388 kB 2520 kB 84 kB + 1500 27448 kB 18556 kB 2520 kB 84 kB + + +# DISCUSSION + +When optimizing bup indexing, the first goal is to keep the +VmRSS reasonably low. However, it might eventually be +necessary to swap in all the indexes, simply because +you're searching for a lot of objects, and this will cause +your RSS to grow as large as VmSize eventually. + +The key word here is *eventually*. As long as VmRSS grows +reasonably slowly, the amount of disk activity caused by +accessing pack indexes is reasonably small. If it grows +quickly, bup will probably spend most of its time swapping +index data from disk instead of actually running your +backup, so backups will run very slowly. + +The purpose of `bup memtest` is to give you an idea of how +fast your memory usage is growing, and to help in +optimizing bup for better memory use. If you have memory +problems you might be asked to send the output of `bup +memtest` to help diagnose the problems. + +Tip: try using `bup midx -a` or `bup midx -f` to see if it +helps reduce your memory usage. + +Trivia: index memory usage in bup (or git) is only really a +problem when adding a large number of previously unseen +objects. This is because for each object, we need to +absolutely confirm that it isn't already in the database, +which requires us to search through *all* the existing pack +indexes to ensure that none of them contain the object in +question. In the more obvious case of searching for +objects that *do* exist, the objects being searched for are +typically related in some way, which means they probably +all exist in a small number of packfiles, so memory usage +will be constrained to just those packfile indexes. + +Since git users typically don't add a lot of files in a +single run, git doesn't really need a program like `bup +midx`. bup, on the other hand, spends most of its time +backing up files it hasn't seen before, so its memory usage +patterns are different. + + +# SEE ALSO + +`bup-midx`(1) + +# BUP + +Part of the `bup`(1) suite. diff -Nru bup-0.33.2/Documentation/bup-memtest.md bup-0.33.7/Documentation/bup-memtest.md --- bup-0.33.2/Documentation/bup-memtest.md 2023-07-01 20:08:43.000000000 +0000 +++ bup-0.33.7/Documentation/bup-memtest.md 1970-01-01 00:00:00.000000000 +0000 @@ -1,130 +0,0 @@ -% bup-memtest(1) Bup %BUP_VERSION% -% Avery Pennarun -% %BUP_DATE% - -# NAME - -bup-memtest - test bup memory usage statistics - -# SYNOPSIS - -bup memtest [options...] - -# DESCRIPTION - -`bup memtest` opens the list of pack indexes in your bup -repository, then searches the list for a series of -nonexistent objects, printing memory usage statistics after -each cycle. - -Because of the way Unix systems work, the output will -usually show a large (and unchanging) value in the VmSize -column, because mapping the index files in the first place -takes a certain amount of virtual address space. However, this -virtual memory usage is entirely virtual; it doesn't take -any of your RAM. Over time, bup uses *parts* of the -indexes, which need to be loaded from disk, and this is -what causes an increase in the VmRSS column. - -# OPTIONS - --n, \--number=*number* -: set the number of objects to search for during each - cycle (ie. before printing a line of output) - --c, \--cycles=*cycles* -: set the number of cycles (ie. the number of lines of - output after the first). The first line of output is - always 0 (ie. the baseline before searching for any - objects). - -\--ignore-midx -: ignore any `.midx` files created by `bup midx`. This - allows you to compare memory performance with and - without using midx. - -\--existing -: search for existing objects instead of searching for - random nonexistent ones. This can greatly affect - memory usage and performance. Note that most of the - time, `bup save` spends most of its time searching for - nonexistent objects, since existing ones are probably - in unmodified files that we won't be trying to back up - anyway. So the default behaviour reflects real bup - performance more accurately. But you might want this - option anyway just to make sure you haven't made - searching for existing objects much worse than before. - - -# EXAMPLES - $ bup memtest -n300 -c5 - PackIdxList: using 1 index. - VmSize VmRSS VmData VmStk - 0 20824 kB 4528 kB 1980 kB 84 kB - 300 20828 kB 5828 kB 1984 kB 84 kB - 600 20828 kB 6844 kB 1984 kB 84 kB - 900 20828 kB 7836 kB 1984 kB 84 kB - 1200 20828 kB 8736 kB 1984 kB 84 kB - 1500 20828 kB 9452 kB 1984 kB 84 kB - - $ bup memtest -n300 -c5 --ignore-midx - PackIdxList: using 361 indexes. - VmSize VmRSS VmData VmStk - 0 27444 kB 6552 kB 2516 kB 84 kB - 300 27448 kB 15832 kB 2520 kB 84 kB - 600 27448 kB 17220 kB 2520 kB 84 kB - 900 27448 kB 18012 kB 2520 kB 84 kB - 1200 27448 kB 18388 kB 2520 kB 84 kB - 1500 27448 kB 18556 kB 2520 kB 84 kB - - -# DISCUSSION - -When optimizing bup indexing, the first goal is to keep the -VmRSS reasonably low. However, it might eventually be -necessary to swap in all the indexes, simply because -you're searching for a lot of objects, and this will cause -your RSS to grow as large as VmSize eventually. - -The key word here is *eventually*. As long as VmRSS grows -reasonably slowly, the amount of disk activity caused by -accessing pack indexes is reasonably small. If it grows -quickly, bup will probably spend most of its time swapping -index data from disk instead of actually running your -backup, so backups will run very slowly. - -The purpose of `bup memtest` is to give you an idea of how -fast your memory usage is growing, and to help in -optimizing bup for better memory use. If you have memory -problems you might be asked to send the output of `bup -memtest` to help diagnose the problems. - -Tip: try using `bup midx -a` or `bup midx -f` to see if it -helps reduce your memory usage. - -Trivia: index memory usage in bup (or git) is only really a -problem when adding a large number of previously unseen -objects. This is because for each object, we need to -absolutely confirm that it isn't already in the database, -which requires us to search through *all* the existing pack -indexes to ensure that none of them contain the object in -question. In the more obvious case of searching for -objects that *do* exist, the objects being searched for are -typically related in some way, which means they probably -all exist in a small number of packfiles, so memory usage -will be constrained to just those packfile indexes. - -Since git users typically don't add a lot of files in a -single run, git doesn't really need a program like `bup -midx`. bup, on the other hand, spends most of its time -backing up files it hasn't seen before, so its memory usage -patterns are different. - - -# SEE ALSO - -`bup-midx`(1) - -# BUP - -Part of the `bup`(1) suite. diff -Nru bup-0.33.2/Documentation/bup-meta.1.md bup-0.33.7/Documentation/bup-meta.1.md --- bup-0.33.2/Documentation/bup-meta.1.md 1970-01-01 00:00:00.000000000 +0000 +++ bup-0.33.7/Documentation/bup-meta.1.md 2025-01-08 20:04:11.000000000 +0000 @@ -0,0 +1,153 @@ +% bup-meta(1) Bup %BUP_VERSION% +% Rob Browning +% %BUP_DATE% + +# NAME + +bup-meta - create or extract a metadata archive + +# SYNOPSIS + +bup meta \--create + ~ [-R] [-v] [-q] [\--no-symlinks] [\--no-paths] [-f *file*] \<*paths*...\> + +bup meta \--list + ~ [-v] [-q] [-f *file*] + +bup meta \--extract + ~ [-v] [-q] [\--numeric-ids] [\--no-symlinks] [-f *file*] + +bup meta \--start-extract + ~ [-v] [-q] [\--numeric-ids] [\--no-symlinks] [-f *file*] + +bup meta \--finish-extract + ~ [-v] [-q] [\--numeric-ids] [-f *file*] + +bup meta \--edit + ~ [\--set-uid *uid* | \--set-gid *gid* | \--set-user *user* | \--set-group *group* | ...] \<*paths*...\> + +# DESCRIPTION + +`bup meta` creates, extracts, or otherwise manipulates metadata +archives. A metadata archive contains the metadata information +(timestamps, ownership, access permissions, etc.) for a set of +filesystem paths. + +See `bup-restore`(1) for a description of the way ownership metadata +is restored. + +# OPTIONS + +-c, \--create +: Create a metadata archive for the specified *path*s. Write the + archive to standard output unless `--file` is specified. + +-t, \--list +: Display information about the metadata in an archive. Read the + archive from standard input unless `--file` is specified. + +-x, \--extract +: Extract a metadata archive. Conceptually, perform `--start-extract` + followed by `--finish-extract`. Read the archive from standard input + unless `--file` is specified. + +\--start-extract +: Build a filesystem tree matching the paths stored in a metadata + archive. By itself, this command does not produce a full + restoration of the metadata. For a full restoration, this command + must be followed by a call to `--finish-extract`. Once this + command has finished, all of the normal files described by the + metadata will exist and be empty. Restoring the data in those + files, and then calling `--finish-extract` should restore the + original tree. The archive will be read from standard input + unless `--file` is specified. + +\--finish-extract +: Finish applying the metadata stored in an archive to the + filesystem. Normally, this command should follow a call to + `--start-extract`. The archive will be read from standard input + unless `--file` is specified. + +\--edit +: Edit metadata archives. The result will be written to standard + output unless `--file` is specified. + +-f, \--file=*filename* +: Read the metadata archive from *filename* or write it to + *filename* as appropriate. If *filename* is "-", then read from + standard input or write to standard output. + +-R, \--recurse +: Recursively descend into subdirectories during `--create`. + +\--xdev, \--one-file-system +: don't cross filesystem boundaries -- though as with tar and rsync, + the mount points themselves will still be handled. + +\--numeric-ids +: Apply numeric IDs (user, group, etc.) rather than names during + `--extract` or `--finish-extract`. + +\--symlinks +: Record symbolic link targets when creating an archive, or restore + symbolic links when extracting an archive (during `--extract` + or `--start-extract`). This option is enabled by default. + Specify `--no-symlinks` to disable it. + +\--paths +: Record pathnames when creating an archive. This option is enabled + by default. Specify `--no-paths` to disable it. + +\--set-uid=*uid* +: Set the metadata uid to the integer *uid* during `--edit`. + +\--set-gid=*gid* +: Set the metadata gid to the integer *gid* during `--edit`. + +\--set-user=*user* +: Set the metadata user to *user* during `--edit`. + +\--unset-user +: Remove the metadata user during `--edit`. + +\--set-group=*group* +: Set the metadata user to *group* during `--edit`. + +\--unset-group +: Remove the metadata group during `--edit`. + +-v, \--verbose +: Be more verbose (can be used more than once). + +-q, \--quiet +: Be quiet. + +# EXAMPLES + + # Create a metadata archive for /etc. + $ bup meta -cRf etc.meta /etc + bup: removing leading "/" from "/etc" + + # Extract the etc.meta archive (files will be empty). + $ mkdir tmp && cd tmp + $ bup meta -xf ../etc.meta + $ ls + etc + + # Restore /etc completely. + $ mkdir tmp && cd tmp + $ bup meta --start-extract -f ../etc.meta + ...fill in all regular file contents using some other tool... + $ bup meta --finish-extract -f ../etc.meta + + # Change user/uid to root. + $ bup meta --edit --set-uid 0 --set-user root \ + src.meta > dest.meta + +# BUGS + +Hard links are not handled yet. + +# BUP + +Part of the `bup`(1) suite. diff -Nru bup-0.33.2/Documentation/bup-meta.md bup-0.33.7/Documentation/bup-meta.md --- bup-0.33.2/Documentation/bup-meta.md 2023-07-01 20:08:43.000000000 +0000 +++ bup-0.33.7/Documentation/bup-meta.md 1970-01-01 00:00:00.000000000 +0000 @@ -1,153 +0,0 @@ -% bup-meta(1) Bup %BUP_VERSION% -% Rob Browning -% %BUP_DATE% - -# NAME - -bup-meta - create or extract a metadata archive - -# SYNOPSIS - -bup meta \--create - ~ [-R] [-v] [-q] [\--no-symlinks] [\--no-paths] [-f *file*] \<*paths*...\> - -bup meta \--list - ~ [-v] [-q] [-f *file*] - -bup meta \--extract - ~ [-v] [-q] [\--numeric-ids] [\--no-symlinks] [-f *file*] - -bup meta \--start-extract - ~ [-v] [-q] [\--numeric-ids] [\--no-symlinks] [-f *file*] - -bup meta \--finish-extract - ~ [-v] [-q] [\--numeric-ids] [-f *file*] - -bup meta \--edit - ~ [\--set-uid *uid* | \--set-gid *gid* | \--set-user *user* | \--set-group *group* | ...] \<*paths*...\> - -# DESCRIPTION - -`bup meta` creates, extracts, or otherwise manipulates metadata -archives. A metadata archive contains the metadata information -(timestamps, ownership, access permissions, etc.) for a set of -filesystem paths. - -See `bup-restore`(1) for a description of the way ownership metadata -is restored. - -# OPTIONS - --c, \--create -: Create a metadata archive for the specified *path*s. Write the - archive to standard output unless `--file` is specified. - --t, \--list -: Display information about the metadata in an archive. Read the - archive from standard input unless `--file` is specified. - --x, \--extract -: Extract a metadata archive. Conceptually, perform `--start-extract` - followed by `--finish-extract`. Read the archive from standard input - unless `--file` is specified. - -\--start-extract -: Build a filesystem tree matching the paths stored in a metadata - archive. By itself, this command does not produce a full - restoration of the metadata. For a full restoration, this command - must be followed by a call to `--finish-extract`. Once this - command has finished, all of the normal files described by the - metadata will exist and be empty. Restoring the data in those - files, and then calling `--finish-extract` should restore the - original tree. The archive will be read from standard input - unless `--file` is specified. - -\--finish-extract -: Finish applying the metadata stored in an archive to the - filesystem. Normally, this command should follow a call to - `--start-extract`. The archive will be read from standard input - unless `--file` is specified. - -\--edit -: Edit metadata archives. The result will be written to standard - output unless `--file` is specified. - --f, \--file=*filename* -: Read the metadata archive from *filename* or write it to - *filename* as appropriate. If *filename* is "-", then read from - standard input or write to standard output. - --R, \--recurse -: Recursively descend into subdirectories during `--create`. - -\--xdev, \--one-file-system -: don't cross filesystem boundaries -- though as with tar and rsync, - the mount points themselves will still be handled. - -\--numeric-ids -: Apply numeric IDs (user, group, etc.) rather than names during - `--extract` or `--finish-extract`. - -\--symlinks -: Record symbolic link targets when creating an archive, or restore - symbolic links when extracting an archive (during `--extract` - or `--start-extract`). This option is enabled by default. - Specify `--no-symlinks` to disable it. - -\--paths -: Record pathnames when creating an archive. This option is enabled - by default. Specify `--no-paths` to disable it. - -\--set-uid=*uid* -: Set the metadata uid to the integer *uid* during `--edit`. - -\--set-gid=*gid* -: Set the metadata gid to the integer *gid* during `--edit`. - -\--set-user=*user* -: Set the metadata user to *user* during `--edit`. - -\--unset-user -: Remove the metadata user during `--edit`. - -\--set-group=*group* -: Set the metadata user to *group* during `--edit`. - -\--unset-group -: Remove the metadata group during `--edit`. - --v, \--verbose -: Be more verbose (can be used more than once). - --q, \--quiet -: Be quiet. - -# EXAMPLES - - # Create a metadata archive for /etc. - $ bup meta -cRf etc.meta /etc - bup: removing leading "/" from "/etc" - - # Extract the etc.meta archive (files will be empty). - $ mkdir tmp && cd tmp - $ bup meta -xf ../etc.meta - $ ls - etc - - # Restore /etc completely. - $ mkdir tmp && cd tmp - $ bup meta --start-extract -f ../etc.meta - ...fill in all regular file contents using some other tool... - $ bup meta --finish-extract -f ../etc.meta - - # Change user/uid to root. - $ bup meta --edit --set-uid 0 --set-user root \ - src.meta > dest.meta - -# BUGS - -Hard links are not handled yet. - -# BUP - -Part of the `bup`(1) suite. diff -Nru bup-0.33.2/Documentation/bup-midx.1.md bup-0.33.7/Documentation/bup-midx.1.md --- bup-0.33.2/Documentation/bup-midx.1.md 1970-01-01 00:00:00.000000000 +0000 +++ bup-0.33.7/Documentation/bup-midx.1.md 2025-01-08 20:04:11.000000000 +0000 @@ -0,0 +1,104 @@ +% bup-midx(1) Bup %BUP_VERSION% +% Avery Pennarun +% %BUP_DATE% + +# NAME + +bup-midx - create a multi-index (`.midx`) file from several `.idx` files + +# SYNOPSIS + +bup midx [-o *outfile*] \<-a|-f|*idxnames*...\> + +# DESCRIPTION + +`bup midx` creates a multi-index (`.midx`) file from one or more +git pack index (`.idx`) files. + +Note: you should no longer need to run this command by hand. +It gets run automatically by `bup-save`(1) and similar +commands. + +# OPTIONS + +-o, \--output=*filename.midx* +: use the given output filename for the `.midx` file. + Default is auto-generated. + +-a, \--auto +: automatically generate new `.midx` files for any `.idx` + files where it would be appropriate. + +-f, \--force +: force generation of a single new `.midx` file containing + *all* your `.idx` files, even if other `.midx` files + already exist. This will result in the fastest backup + performance, but may take a long time to run. + +\--dir=*packdir* +: specify the directory containing the `.idx`/`.midx` files + to work with. The default is `$BUP_DIR/objects/pack`. + +\--max-files +: maximum number of `.idx` files to open at a time. You + can use this if you have an especially small number of file + descriptors available, so that midx can complete + (though possibly non-optimally) even if it can't open + all your `.idx` files at once. The default value of this + option should be fine for most people. + +\--check +: validate a `.midx` file by ensuring that all objects in + its contained `.idx` files exist inside the `.midx`. May + be useful for debugging. + + +# EXAMPLES + $ bup midx -a + Merging 21 indexes (2278559 objects). + Table size: 524288 (17 bits) + Reading indexes: 100.00% (2278559/2278559), done. + midx-b66d7c9afc4396187218f2936a87b865cf342672.midx + +# DISCUSSION + +By default, bup uses git-formatted pack files, which +consist of a pack file (containing objects) and an idx +file (containing a sorted list of object names and their +offsets in the .pack file). + +Normal idx files are convenient because it means you can use +`git`(1) to access your backup datasets. However, idx +files can get slow when you have a lot of very large packs +(which git typically doesn't have, but bup often does). + +bup `.midx` files consist of a single sorted list of all the objects +contained in all the .pack files it references. This list +can be binary searched in about log2(m) steps, where m is +the total number of objects. + +To further speed up the search, midx files also have a +variable-sized fanout table that reduces the first n +steps of the binary search. With the help of this fanout +table, bup can narrow down which page of the midx file a +given object id would be in (if it exists) with a single +lookup. Thus, typical searches will only need to swap in +two pages: one for the fanout table, and one for the object +id. + +midx files are most useful when creating new backups, since +searching for a nonexistent object in the repository +necessarily requires searching through *all* the index +files to ensure that it does not exist. (Searching for +objects that *do* exist can be optimized; for example, +consecutive objects are often stored in the same pack, so +we can search that one first using an MRU algorithm.) + + +# SEE ALSO + +`bup-save`(1), `bup-margin`(1), `bup-memtest`(1) + +# BUP + +Part of the `bup`(1) suite. diff -Nru bup-0.33.2/Documentation/bup-midx.md bup-0.33.7/Documentation/bup-midx.md --- bup-0.33.2/Documentation/bup-midx.md 2023-07-01 20:08:43.000000000 +0000 +++ bup-0.33.7/Documentation/bup-midx.md 1970-01-01 00:00:00.000000000 +0000 @@ -1,104 +0,0 @@ -% bup-midx(1) Bup %BUP_VERSION% -% Avery Pennarun -% %BUP_DATE% - -# NAME - -bup-midx - create a multi-index (`.midx`) file from several `.idx` files - -# SYNOPSIS - -bup midx [-o *outfile*] \<-a|-f|*idxnames*...\> - -# DESCRIPTION - -`bup midx` creates a multi-index (`.midx`) file from one or more -git pack index (`.idx`) files. - -Note: you should no longer need to run this command by hand. -It gets run automatically by `bup-save`(1) and similar -commands. - -# OPTIONS - --o, \--output=*filename.midx* -: use the given output filename for the `.midx` file. - Default is auto-generated. - --a, \--auto -: automatically generate new `.midx` files for any `.idx` - files where it would be appropriate. - --f, \--force -: force generation of a single new `.midx` file containing - *all* your `.idx` files, even if other `.midx` files - already exist. This will result in the fastest backup - performance, but may take a long time to run. - -\--dir=*packdir* -: specify the directory containing the `.idx`/`.midx` files - to work with. The default is `$BUP_DIR/objects/pack`. - -\--max-files -: maximum number of `.idx` files to open at a time. You - can use this if you have an especially small number of file - descriptors available, so that midx can complete - (though possibly non-optimally) even if it can't open - all your `.idx` files at once. The default value of this - option should be fine for most people. - -\--check -: validate a `.midx` file by ensuring that all objects in - its contained `.idx` files exist inside the `.midx`. May - be useful for debugging. - - -# EXAMPLES - $ bup midx -a - Merging 21 indexes (2278559 objects). - Table size: 524288 (17 bits) - Reading indexes: 100.00% (2278559/2278559), done. - midx-b66d7c9afc4396187218f2936a87b865cf342672.midx - -# DISCUSSION - -By default, bup uses git-formatted pack files, which -consist of a pack file (containing objects) and an idx -file (containing a sorted list of object names and their -offsets in the .pack file). - -Normal idx files are convenient because it means you can use -`git`(1) to access your backup datasets. However, idx -files can get slow when you have a lot of very large packs -(which git typically doesn't have, but bup often does). - -bup `.midx` files consist of a single sorted list of all the objects -contained in all the .pack files it references. This list -can be binary searched in about log2(m) steps, where m is -the total number of objects. - -To further speed up the search, midx files also have a -variable-sized fanout table that reduces the first n -steps of the binary search. With the help of this fanout -table, bup can narrow down which page of the midx file a -given object id would be in (if it exists) with a single -lookup. Thus, typical searches will only need to swap in -two pages: one for the fanout table, and one for the object -id. - -midx files are most useful when creating new backups, since -searching for a nonexistent object in the repository -necessarily requires searching through *all* the index -files to ensure that it does not exist. (Searching for -objects that *do* exist can be optimized; for example, -consecutive objects are often stored in the same pack, so -we can search that one first using an MRU algorithm.) - - -# SEE ALSO - -`bup-save`(1), `bup-margin`(1), `bup-memtest`(1) - -# BUP - -Part of the `bup`(1) suite. diff -Nru bup-0.33.2/Documentation/bup-mux.1.md bup-0.33.7/Documentation/bup-mux.1.md --- bup-0.33.2/Documentation/bup-mux.1.md 1970-01-01 00:00:00.000000000 +0000 +++ bup-0.33.7/Documentation/bup-mux.1.md 2025-01-08 20:04:11.000000000 +0000 @@ -0,0 +1,31 @@ +% bup-mux(1) Bup %BUP_VERSION% +% Brandon Low +% %BUP_DATE% + +# NAME + +bup-mux - multiplexes data and error streams over a connection + +# SYNOPSIS + +bup mux \ [options...] + +# DESCRIPTION + +`bup mux` is used in the bup client-server protocol to +send both data and debugging/error output over the single +connection stream. + +`bup mux bup server` might be used in an inetd server setup. + +# OPTIONS + +command +: the command to run + +options +: options for the command + +# BUP + +Part of the `bup`(1) suite. diff -Nru bup-0.33.2/Documentation/bup-mux.md bup-0.33.7/Documentation/bup-mux.md --- bup-0.33.2/Documentation/bup-mux.md 2023-07-01 20:08:43.000000000 +0000 +++ bup-0.33.7/Documentation/bup-mux.md 1970-01-01 00:00:00.000000000 +0000 @@ -1,31 +0,0 @@ -% bup-mux(1) Bup %BUP_VERSION% -% Brandon Low -% %BUP_DATE% - -# NAME - -bup-mux - multiplexes data and error streams over a connection - -# SYNOPSIS - -bup mux \ [options...] - -# DESCRIPTION - -`bup mux` is used in the bup client-server protocol to -send both data and debugging/error output over the single -connection stream. - -`bup mux bup server` might be used in an inetd server setup. - -# OPTIONS - -command -: the command to run - -options -: options for the command - -# BUP - -Part of the `bup`(1) suite. diff -Nru bup-0.33.2/Documentation/bup-on.1.md bup-0.33.7/Documentation/bup-on.1.md --- bup-0.33.2/Documentation/bup-on.1.md 1970-01-01 00:00:00.000000000 +0000 +++ bup-0.33.7/Documentation/bup-on.1.md 2025-01-08 20:04:11.000000000 +0000 @@ -0,0 +1,85 @@ +% bup-on(1) Bup %BUP_VERSION% +% Avery Pennarun +% %BUP_DATE% + +# NAME + +bup-on - run a bup server locally and client remotely + +# SYNOPSIS + +bup on \ index ... + +bup on \ save ... + +bup on \ split ... + +bup on \ get ... + +# DESCRIPTION + +`bup on` runs the given bup command on the given host using +ssh. It runs a bup server on the local machine, so that +commands like `bup save` on the remote machine can back up +to the local machine. (You don't need to provide a +`--remote` option to `bup save` in order for this to work.) + +See `bup-index`(1), `bup-save`(1), and so on for details of +how each subcommand works. + +This 'reverse mode' operation is useful when the machine +being backed up isn't supposed to be able to ssh into the +backup server. For example, your backup server can be +hidden behind a one-way firewall on a private or dynamic IP +address; using an ssh key, it can be authorized to ssh into +each of your important machines. After connecting to each +destination machine, it initiates a backup, receiving the +resulting data and storing in its local repository. + +For example, if you run several virtual private Linux +machines on a remote hosting provider, you could back them +up to a local (much less expensive) computer in your +basement. + + +# EXAMPLES + + # First index the files on the remote server + + $ bup on myserver index -vux /etc + bup server: reading from stdin. + Indexing: 2465, done. + bup: merging indexes (186668/186668), done. + bup server: done + + # Now save the files from the remote server to the + # local $BUP_DIR + + $ bup on myserver save -n myserver-backup /etc + bup server: reading from stdin. + bup server: command: 'list-indexes' + PackIdxList: using 7 indexes. + Saving: 100.00% (241/241k, 648/648 files), done. + bup server: received 55 objects. + Indexing objects: 100% (55/55), done. + bup server: command: 'quit' + bup server: done + + # Now we can look at the resulting repo on the local + # machine + + $ bup ftp 'cat /myserver-backup/latest/etc/passwd' + root:x:0:0:root:/root:/bin/bash + daemon:x:1:1:daemon:/usr/sbin:/bin/sh + bin:x:2:2:bin:/bin:/bin/sh + sys:x:3:3:sys:/dev:/bin/sh + sync:x:4:65534:sync:/bin:/bin/sync + ... + +# SEE ALSO + +`bup-index`(1), `bup-save`(1), `bup-split`(1), `bup-get`(1) + +# BUP + +Part of the `bup`(1) suite. diff -Nru bup-0.33.2/Documentation/bup-on.md bup-0.33.7/Documentation/bup-on.md --- bup-0.33.2/Documentation/bup-on.md 2023-07-01 20:08:43.000000000 +0000 +++ bup-0.33.7/Documentation/bup-on.md 1970-01-01 00:00:00.000000000 +0000 @@ -1,85 +0,0 @@ -% bup-on(1) Bup %BUP_VERSION% -% Avery Pennarun -% %BUP_DATE% - -# NAME - -bup-on - run a bup server locally and client remotely - -# SYNOPSIS - -bup on \ index ... - -bup on \ save ... - -bup on \ split ... - -bup on \ get ... - -# DESCRIPTION - -`bup on` runs the given bup command on the given host using -ssh. It runs a bup server on the local machine, so that -commands like `bup save` on the remote machine can back up -to the local machine. (You don't need to provide a -`--remote` option to `bup save` in order for this to work.) - -See `bup-index`(1), `bup-save`(1), and so on for details of -how each subcommand works. - -This 'reverse mode' operation is useful when the machine -being backed up isn't supposed to be able to ssh into the -backup server. For example, your backup server can be -hidden behind a one-way firewall on a private or dynamic IP -address; using an ssh key, it can be authorized to ssh into -each of your important machines. After connecting to each -destination machine, it initiates a backup, receiving the -resulting data and storing in its local repository. - -For example, if you run several virtual private Linux -machines on a remote hosting provider, you could back them -up to a local (much less expensive) computer in your -basement. - - -# EXAMPLES - - # First index the files on the remote server - - $ bup on myserver index -vux /etc - bup server: reading from stdin. - Indexing: 2465, done. - bup: merging indexes (186668/186668), done. - bup server: done - - # Now save the files from the remote server to the - # local $BUP_DIR - - $ bup on myserver save -n myserver-backup /etc - bup server: reading from stdin. - bup server: command: 'list-indexes' - PackIdxList: using 7 indexes. - Saving: 100.00% (241/241k, 648/648 files), done. - bup server: received 55 objects. - Indexing objects: 100% (55/55), done. - bup server: command: 'quit' - bup server: done - - # Now we can look at the resulting repo on the local - # machine - - $ bup ftp 'cat /myserver-backup/latest/etc/passwd' - root:x:0:0:root:/root:/bin/bash - daemon:x:1:1:daemon:/usr/sbin:/bin/sh - bin:x:2:2:bin:/bin:/bin/sh - sys:x:3:3:sys:/dev:/bin/sh - sync:x:4:65534:sync:/bin:/bin/sync - ... - -# SEE ALSO - -`bup-index`(1), `bup-save`(1), `bup-split`(1), `bup-get`(1) - -# BUP - -Part of the `bup`(1) suite. diff -Nru bup-0.33.2/Documentation/bup-prune-older.1.md bup-0.33.7/Documentation/bup-prune-older.1.md --- bup-0.33.2/Documentation/bup-prune-older.1.md 1970-01-01 00:00:00.000000000 +0000 +++ bup-0.33.7/Documentation/bup-prune-older.1.md 2025-01-08 20:04:11.000000000 +0000 @@ -0,0 +1,125 @@ +% bup-prune-older(1) bup %BUP_VERSION% | bup %BUP_VERSION% +% Rob Browning +% %BUP_DATE% + +# NAME + +bup-prune-older - remove older saves + +# SYNOPSIS + +bup prune-older [options...] <*branch*...> + +# DESCRIPTION + +`bup prune-older` removes (permanently deletes) all saves except those +preserved by the various keep arguments detailed below. At least one +keep argument must be specified. This command is equivalent to a +suitable `bup rm` invocation followed by `bup gc`. + +WARNING: This is one of the few bup commands that modifies your +archive in intentionally destructive ways. Though if an attempt to +`join` or `restore` the data you still care about after a +`prune-older` succeeds, that's a fairly encouraging sign that the +commands worked correctly. (The `dev/compare-trees` command in the +source tree can be used to help test before/after results.) + +# KEEP PERIODS + +A `--keep` PERIOD (as required below) must be an integer followed by a +scale, or "forever". For example, 12y specifies a PERIOD of twelve +years. Here are the valid scales: + + - s indicates seconds + - min indicates minutes (60s) + - h indicates hours (60m) + - d indicates days (24h) + - w indicates weeks (7d) + - m indicates months (31d) + - y indicates years (366d) + - forever is infinitely far in the past + +As indicated, the PERIODS are computed with respect to the current +time, or the `--wrt` value if specified, and do not respect any +calendar, so `--keep-dailies-for 5d` means a period starting exactly +5 * 24 * 60 * 60 seconds before the starting point. + +# OPTIONS + +\--keep-all-for PERIOD +: when no smaller time scale `--keep` option applies, retain all saves + within the given period. + +\--keep-dailies-for PERIOD +: when no smaller time scale `--keep` option applies, retain the + newest save for any day within the given period. + +\--keep-monthlies-for PERIOD +: when no smaller time scale `--keep` option applies, retain the + newest save for any month within the given period. + +\--keep-yearlies-for PERIOD +: when no smaller time scale `--keep` option applies, retain the + newest save for any year within the given period. + +\--wrt UTC_SECONDS +: when computing a keep period, place the most recent end of the + range at UTC\_SECONDS, and any saves newer than this will be kept. + +\--pretend +: don't do anything, just list the actions that would be taken to + standard output, one action per line like this: + + - SAVE + + SAVE + ... + +\--gc +: garbage collect the repository after removing the relevant saves. + This is the default behavior, but it can be avoided with `--no-gc`. + +\--gc-threshold N +: only rewrite a packfile if it's over N percent garbage; otherwise + leave it alone. The default threshold is 10%. + +-*#*, \--compress *#* +: set the compression level when rewriting archive data to # (a + value from 0-9, where 9 is the highest and 0 is no compression). + The default is 1 (fast, loose compression). + +-v, \--verbose +: increase verbosity (can be specified more than once). + +# NOTES + +When `--verbose` is specified, the save periods will be summarized to +standard error with lines like this: + + keeping monthlies since 1969-07-20-201800 + keeping all yearlies + ... + +It's possible that the current implementation might not be able to +format the date if, for example, it is far enough back in time. In +that case, you will see something like this: + + keeping yearlies since -30109891477 seconds before 1969-12-31-180000 + ... + +# EXAMPLES + + # Keep all saves for the past month, and any newer monthlies for + # the past year. Delete everything else. + $ bup prune-older --keep-all-for 1m --keep-monthlies-for 1y + + # Keep all saves for the past 6 months and delete everything else, + # but only on the semester branch. + $ bup prune-older --keep-all-for 6m semester + +# SEE ALSO + +`bup-rm`(1), `bup-gc`(1), and `bup-fsck`(1) + +# BUP + +Part of the `bup`(1) suite. diff -Nru bup-0.33.2/Documentation/bup-prune-older.md bup-0.33.7/Documentation/bup-prune-older.md --- bup-0.33.2/Documentation/bup-prune-older.md 2023-07-01 20:08:43.000000000 +0000 +++ bup-0.33.7/Documentation/bup-prune-older.md 1970-01-01 00:00:00.000000000 +0000 @@ -1,125 +0,0 @@ -% bup-prune-older(1) bup %BUP_VERSION% | bup %BUP_VERSION% -% Rob Browning -% %BUP_DATE% - -# NAME - -bup-prune-older - remove older saves - -# SYNOPSIS - -bup prune-older [options...] <*branch*...> - -# DESCRIPTION - -`bup prune-older` removes (permanently deletes) all saves except those -preserved by the various keep arguments detailed below. At least one -keep argument must be specified. This command is equivalent to a -suitable `bup rm` invocation followed by `bup gc`. - -WARNING: This is one of the few bup commands that modifies your -archive in intentionally destructive ways. Though if an attempt to -`join` or `restore` the data you still care about after a -`prune-older` succeeds, that's a fairly encouraging sign that the -commands worked correctly. (The `dev/compare-trees` command in the -source tree can be used to help test before/after results.) - -# KEEP PERIODS - -A `--keep` PERIOD (as required below) must be an integer followed by a -scale, or "forever". For example, 12y specifies a PERIOD of twelve -years. Here are the valid scales: - - - s indicates seconds - - min indicates minutes (60s) - - h indicates hours (60m) - - d indicates days (24h) - - w indicates weeks (7d) - - m indicates months (31d) - - y indicates years (366d) - - forever is infinitely far in the past - -As indicated, the PERIODS are computed with respect to the current -time, or the `--wrt` value if specified, and do not respect any -calendar, so `--keep-dailies-for 5d` means a period starting exactly -5 * 24 * 60 * 60 seconds before the starting point. - -# OPTIONS - -\--keep-all-for PERIOD -: when no smaller time scale `--keep` option applies, retain all saves - within the given period. - -\--keep-dailies-for PERIOD -: when no smaller time scale `--keep` option applies, retain the - newest save for any day within the given period. - -\--keep-monthlies-for PERIOD -: when no smaller time scale `--keep` option applies, retain the - newest save for any month within the given period. - -\--keep-yearlies-for PERIOD -: when no smaller time scale `--keep` option applies, retain the - newest save for any year within the given period. - -\--wrt UTC_SECONDS -: when computing a keep period, place the most recent end of the - range at UTC\_SECONDS, and any saves newer than this will be kept. - -\--pretend -: don't do anything, just list the actions that would be taken to - standard output, one action per line like this: - - - SAVE - + SAVE - ... - -\--gc -: garbage collect the repository after removing the relevant saves. - This is the default behavior, but it can be avoided with `--no-gc`. - -\--gc-threshold N -: only rewrite a packfile if it's over N percent garbage; otherwise - leave it alone. The default threshold is 10%. - --*#*, \--compress *#* -: set the compression level when rewriting archive data to # (a - value from 0-9, where 9 is the highest and 0 is no compression). - The default is 1 (fast, loose compression). - --v, \--verbose -: increase verbosity (can be specified more than once). - -# NOTES - -When `--verbose` is specified, the save periods will be summarized to -standard error with lines like this: - - keeping monthlies since 1969-07-20-201800 - keeping all yearlies - ... - -It's possible that the current implementation might not be able to -format the date if, for example, it is far enough back in time. In -that case, you will see something like this: - - keeping yearlies since -30109891477 seconds before 1969-12-31-180000 - ... - -# EXAMPLES - - # Keep all saves for the past month, and any newer monthlies for - # the past year. Delete everything else. - $ bup prune-older --keep-all-for 1m --keep-monthlies-for 1y - - # Keep all saves for the past 6 months and delete everything else, - # but only on the semester branch. - $ bup prune-older --keep-all-for 6m semester - -# SEE ALSO - -`bup-rm`(1), `bup-gc`(1), and `bup-fsck`(1) - -# BUP - -Part of the `bup`(1) suite. diff -Nru bup-0.33.2/Documentation/bup-random.1.md bup-0.33.7/Documentation/bup-random.1.md --- bup-0.33.2/Documentation/bup-random.1.md 1970-01-01 00:00:00.000000000 +0000 +++ bup-0.33.7/Documentation/bup-random.1.md 2025-01-08 20:04:11.000000000 +0000 @@ -0,0 +1,80 @@ +% bup-random(1) Bup %BUP_VERSION% +% Avery Pennarun +% %BUP_DATE% + +# NAME + +bup-random - generate a stream of random output + +# SYNOPSIS + +bup random [-S seed] [-fv] \ + +# DESCRIPTION + +`bup random` produces a stream of pseudorandom output bytes to +stdout. Note: the bytes are *not* generated using a +cryptographic algorithm and should never be used for +security. + +Note that the stream of random bytes will be identical +every time `bup random` is run, unless you provide a +different `seed` value. This is intentional: the purpose +of this program is to be able to run repeatable tests on +large amounts of data, so we want identical data every +time. + +`bup random` generates about 240 megabytes per second on a +modern test system (Intel Core2), which is faster than you +could achieve by reading data from most disks. Thus, it +can be helpful when running microbenchmarks. + +# OPTIONS + +\ +: the number of bytes of data to generate. Can be used + with the suffices `k`, `M`, or `G` to indicate + kilobytes, megabytes, or gigabytes, respectively. + +-S, \--seed=*seed* +: use the given value to seed the pseudorandom number + generator. The generated output stream will be + identical for every stream seeded with the same value. + The default seed is 1. A seed value of 0 is equivalent + to 1. + +-f, \--force +: generate output even if stdout is a tty. (Generating + random data to a tty is generally considered + ill-advised, but you can do if you really want.) + +-v, \--verbose +: print a progress message showing the number of bytes that + has been output so far. + +# EXAMPLES + + $ bup random 1k | sha1sum + 2108c55d0a2687c8dacf9192677c58437a55db71 - + + $ bup random -S1 1k | sha1sum + 2108c55d0a2687c8dacf9192677c58437a55db71 - + + $ bup random -S2 1k | sha1sum + f71acb90e135d98dad7efc136e8d2cc30573e71a - + + $ time bup random 1G >/dev/null + Random: 1024 Mbytes, done. + + real 0m4.261s + user 0m4.048s + sys 0m0.172s + + $ bup random 1G | bup split -t --bench + Random: 1024 Mbytes, done. + bup: 1048576.00kbytes in 18.59 secs = 56417.78 kbytes/sec + 1092599b9c7b2909652ef1e6edac0796bfbfc573 + +# BUP + +Part of the `bup`(1) suite. diff -Nru bup-0.33.2/Documentation/bup-random.md bup-0.33.7/Documentation/bup-random.md --- bup-0.33.2/Documentation/bup-random.md 2023-07-01 20:08:43.000000000 +0000 +++ bup-0.33.7/Documentation/bup-random.md 1970-01-01 00:00:00.000000000 +0000 @@ -1,80 +0,0 @@ -% bup-random(1) Bup %BUP_VERSION% -% Avery Pennarun -% %BUP_DATE% - -# NAME - -bup-random - generate a stream of random output - -# SYNOPSIS - -bup random [-S seed] [-fv] \ - -# DESCRIPTION - -`bup random` produces a stream of pseudorandom output bytes to -stdout. Note: the bytes are *not* generated using a -cryptographic algorithm and should never be used for -security. - -Note that the stream of random bytes will be identical -every time `bup random` is run, unless you provide a -different `seed` value. This is intentional: the purpose -of this program is to be able to run repeatable tests on -large amounts of data, so we want identical data every -time. - -`bup random` generates about 240 megabytes per second on a -modern test system (Intel Core2), which is faster than you -could achieve by reading data from most disks. Thus, it -can be helpful when running microbenchmarks. - -# OPTIONS - -\ -: the number of bytes of data to generate. Can be used - with the suffices `k`, `M`, or `G` to indicate - kilobytes, megabytes, or gigabytes, respectively. - --S, \--seed=*seed* -: use the given value to seed the pseudorandom number - generator. The generated output stream will be - identical for every stream seeded with the same value. - The default seed is 1. A seed value of 0 is equivalent - to 1. - --f, \--force -: generate output even if stdout is a tty. (Generating - random data to a tty is generally considered - ill-advised, but you can do if you really want.) - --v, \--verbose -: print a progress message showing the number of bytes that - has been output so far. - -# EXAMPLES - - $ bup random 1k | sha1sum - 2108c55d0a2687c8dacf9192677c58437a55db71 - - - $ bup random -S1 1k | sha1sum - 2108c55d0a2687c8dacf9192677c58437a55db71 - - - $ bup random -S2 1k | sha1sum - f71acb90e135d98dad7efc136e8d2cc30573e71a - - - $ time bup random 1G >/dev/null - Random: 1024 Mbytes, done. - - real 0m4.261s - user 0m4.048s - sys 0m0.172s - - $ bup random 1G | bup split -t --bench - Random: 1024 Mbytes, done. - bup: 1048576.00kbytes in 18.59 secs = 56417.78 kbytes/sec - 1092599b9c7b2909652ef1e6edac0796bfbfc573 - -# BUP - -Part of the `bup`(1) suite. diff -Nru bup-0.33.2/Documentation/bup-restore.1.md bup-0.33.7/Documentation/bup-restore.1.md --- bup-0.33.2/Documentation/bup-restore.1.md 1970-01-01 00:00:00.000000000 +0000 +++ bup-0.33.7/Documentation/bup-restore.1.md 2025-01-08 20:04:11.000000000 +0000 @@ -0,0 +1,272 @@ +% bup-restore(1) Bup %BUP_VERSION% +% Avery Pennarun +% %BUP_DATE% + +# NAME + +bup-restore - extract files from a backup set + +# SYNOPSIS + +bup restore [-r *host*:[*path*]] [\--outdir=*outdir*] [\--exclude-rx *pattern*] +[\--exclude-rx-from *filename*] [-v] [-q] \ + +# DESCRIPTION + +`bup restore` extracts files from a backup set (created +with `bup-save`(1)) to the local filesystem. + +The specified *paths* are of the form +/_branch_/_revision_/_some/where_. The components of the +path are as follows: + +branch +: the name of the backup set to restore from; this + corresponds to the `--name` (`-n`) option to `bup save`. + +revision +: the revision of the backup set to restore. The + revision *latest* is always the most recent + backup on the given branch. You can discover other + revisions using `bup ls /branch`. + +some/where +: the previously saved path (after any stripping/grafting) that you + want to restore. For example, `etc/passwd`. + +If _some/where_ names a directory, `bup restore` will restore that +directory and then recursively restore its contents. + +If _some/where_ names a directory and ends with a slash (ie. +path/to/dir/), `bup restore` will restore the children of that +directory directly to the current directory (or the `--outdir`). If +_some/where_ does not end in a slash, the children will be restored to +a subdirectory of the current directory. + +If _some/where_ names a directory and ends in '/.' (ie. +path/to/dir/.), `bup restore` will do exactly what it would have done +for path/to/dir, and then restore _dir_'s metadata to the current +directory (or the `--outdir`). See the EXAMPLES section. + +As a special case, if _some/where_ names the "latest" symlink, +e.g. `bup restore /foo/latest`, then bup will act exactly as if the +save that "latest" points to had been specified, and restore that, +rather than the "latest" symlink itself. + +Whenever path metadata is available, `bup restore` will attempt to +restore it. When restoring ownership, bup implements tar/rsync-like +semantics. It will normally prefer user and group names to uids and +gids when they're available, but it will not try to restore the user +unless running as root, and it will fall back to the numeric uid or +gid whenever the metadata contains a user or group name that doesn't +exist on the current system. The use of user and group names can be +disabled via `--numeric-ids` (which can be important when restoring a +chroot, for example), and as a special case, a uid or gid of 0 will +never be remapped by name. Additionally, some systems don't allow +setting a uid/gid that doesn't correspond with a known user/group. On +those systems, bup will log an error for each relevant path. + +The `--map-user`, `--map-group`, `--map-uid`, `--map-gid` options may +be used to adjust the available ownership information before any of +the rules above are applied, but note that due to those rules, +`--map-uid` and `--map-gid` will have no effect whenever a path has a +valid user or group. In those cases, either `--numeric-ids` must be +specified, or the user or group must be cleared by a suitable +`--map-user foo=` or `--map-group foo=`. + +Hardlinks will also be restored when possible, but at least currently, +no links will be made to targets outside the restore tree, and if the +restore tree spans a different arrangement of filesystems from the +save tree, some hardlink sets may not be completely restored. + +Also note that changing hardlink sets on disk between index and save +may produce unexpected results. With the current implementation, bup +will attempt to recreate any given hardlink set as it existed at index +time, even if all of the files in the set weren't still hardlinked +(but were otherwise identical) at save time. + +Note that during the restoration process, access to data within the +restore tree may be more permissive than it was in the original +source. Unless security is irrelevant, you must restore to a private +subdirectory, and then move the resulting tree to its final position. +See the EXAMPLES section for a demonstration. + +# OPTIONS + +-r, \--remote=*host*:*path* +: restore the backup set from the given remote server. If + *path* is omitted, uses the default path on the remote + server (you still need to include the ':'). The connection to the + remote server is made with SSH. If you'd like to specify which port, user + or private key to use for the SSH connection, we recommend you use the + `~/.ssh/config` file. + +-C, \--outdir=*outdir* +: create and change to directory *outdir* before + extracting the files. + +\--numeric-ids +: restore numeric IDs (user, group, etc.) rather than names. + +\--exclude-rx=*pattern* +: exclude any path matching *pattern*, which must be a Python + regular expression (http://docs.python.org/library/re.html). The + pattern will be compared against the full path rooted at the top + of the restore tree, without anchoring, so "x/y" will match + "ox/yard" or "box/yards". To exclude the contents of /tmp, but + not the directory itself, use "^/tmp/.". (can be specified more + than once) + + Note that the root of the restore tree (which matches '^/') is the + top of the archive tree being restored, and has nothing to do with + the filesystem destination. Given "restore ... /foo/latest/etc/", + the pattern '^/passwd$' would match if a file named passwd had + been saved as '/foo/latest/etc/passwd'. + + Examples: + + * '/foo$' - exclude any file named foo + * '/foo/$' - exclude any directory named foo + * '/foo/.' - exclude the content of any directory named foo + * '^/tmp/.' - exclude root-level /tmp's content, but not /tmp itself + +\--exclude-rx-from=*filename* +: read --exclude-rx patterns from *filename*, one pattern per-line + (may be repeated). Ignore completely empty lines. + +\--sparse +: write output data sparsely when reasonable. Currently, reasonable + just means "at least whenever there are 512 or more consecutive + zeroes". + +\--map-user *old*=*new* +: for every path, restore the *old* (saved) user name as *new*. + Specifying "" for *new* will clear the user. For example + "--map-user foo=" will allow the uid to take effect for any path + that originally had a user of "foo", unless countermanded by a + subsequent "--map-user foo=..." specification. See DESCRIPTION + above for further information. + +\--map-group *old*=*new* +: for every path, restore the *old* (saved) group name as *new*. + Specifying "" for *new* will clear the group. For example + "--map-group foo=" will allow the gid to take effect for any path + that originally had a group of "foo", unless countermanded by a + subsequent "--map-group foo=..." specification. See DESCRIPTION + above for further information. + +\--map-uid *old*=*new* +: for every path, restore the *old* (saved) uid as *new*, unless + countermanded by a subsequent "--map-uid *old*=..." option. Note + that the uid will only be relevant for paths with no user. See + DESCRIPTION above for further information. + +\--map-gid *old*=*new* +: for every path, restore the *old* (saved) gid as *new*, unless + countermanded by a subsequent "--map-gid *old*=..." option. Note + that the gid will only be relevant for paths with no user. See + DESCRIPTION above for further information. + +-v, \--verbose +: increase log output. Given once, prints every + directory as it is restored; given twice, prints every + file and directory. + +-q, \--quiet +: suppress output, including the progress meter. Normally, if + stderr is a tty, a progress meter displays the total number of + files restored. + +# EXAMPLES + +Create a simple test backup set: + + $ bup index -u /etc + $ bup save -n mybackup /etc/passwd /etc/profile + +Restore just one file: + + $ bup restore /mybackup/latest/etc/passwd + Restoring: 1, done. + + $ ls -l passwd + -rw-r--r-- 1 apenwarr apenwarr 1478 2010-09-08 03:06 passwd + +Restore etc to test (no trailing slash): + + $ bup restore -C test /mybackup/latest/etc + Restoring: 3, done. + + $ find test + test + test/etc + test/etc/passwd + test/etc/profile + +Restore the contents of etc to test (trailing slash): + + $ bup restore -C test /mybackup/latest/etc/ + Restoring: 2, done. + + $ find test + test + test/passwd + test/profile + +Restore the contents of etc and etc's metadata to test (trailing +"/."): + + $ bup restore -C test /mybackup/latest/etc/. + Restoring: 2, done. + + # At this point test and etc's metadata will match. + $ find test + test + test/passwd + test/profile + +Restore a tree without risk of unauthorized access: + + # mkdir --mode 0700 restore-tmp + + # bup restore -C restore-tmp /somebackup/latest/foo + Restoring: 42, done. + + # mv restore-tmp/foo somewhere + + # rmdir restore-tmp + +Restore a tree, remapping an old user and group to a new user and group: + + # ls -l /original/y + -rw-r----- 1 foo baz 3610 Nov 4 11:31 y + # bup restore -C dest --map-user foo=bar --map-group baz=bax /x/latest/y + Restoring: 42, done. + # ls -l dest/y + -rw-r----- 1 bar bax 3610 Nov 4 11:31 y + +Restore a tree, remapping an old uid to a new uid. Note that the old +user must be erased so that bup won't prefer it over the uid: + + # ls -l /original/y + -rw-r----- 1 foo baz 3610 Nov 4 11:31 y + # ls -ln /original/y + -rw-r----- 1 1000 1007 3610 Nov 4 11:31 y + # bup restore -C dest --map-user foo= --map-uid 1000=1042 /x/latest/y + Restoring: 97, done. + # ls -ln dest/y + -rw-r----- 1 1042 1007 3610 Nov 4 11:31 y + +An alternate way to do the same by quashing users/groups universally +with `--numeric-ids`: + + # bup restore -C dest --numeric-ids --map-uid 1000=1042 /x/latest/y + Restoring: 97, done. + +# SEE ALSO + +`bup-save`(1), `bup-ftp`(1), `bup-fuse`(1), `bup-web`(1) + +# BUP + +Part of the `bup`(1) suite. diff -Nru bup-0.33.2/Documentation/bup-restore.md bup-0.33.7/Documentation/bup-restore.md --- bup-0.33.2/Documentation/bup-restore.md 2023-07-01 20:08:43.000000000 +0000 +++ bup-0.33.7/Documentation/bup-restore.md 1970-01-01 00:00:00.000000000 +0000 @@ -1,272 +0,0 @@ -% bup-restore(1) Bup %BUP_VERSION% -% Avery Pennarun -% %BUP_DATE% - -# NAME - -bup-restore - extract files from a backup set - -# SYNOPSIS - -bup restore [-r *host*:[*path*]] [\--outdir=*outdir*] [\--exclude-rx *pattern*] -[\--exclude-rx-from *filename*] [-v] [-q] \ - -# DESCRIPTION - -`bup restore` extracts files from a backup set (created -with `bup-save`(1)) to the local filesystem. - -The specified *paths* are of the form -/_branch_/_revision_/_some/where_. The components of the -path are as follows: - -branch -: the name of the backup set to restore from; this - corresponds to the `--name` (`-n`) option to `bup save`. - -revision -: the revision of the backup set to restore. The - revision *latest* is always the most recent - backup on the given branch. You can discover other - revisions using `bup ls /branch`. - -some/where -: the previously saved path (after any stripping/grafting) that you - want to restore. For example, `etc/passwd`. - -If _some/where_ names a directory, `bup restore` will restore that -directory and then recursively restore its contents. - -If _some/where_ names a directory and ends with a slash (ie. -path/to/dir/), `bup restore` will restore the children of that -directory directly to the current directory (or the `--outdir`). If -_some/where_ does not end in a slash, the children will be restored to -a subdirectory of the current directory. - -If _some/where_ names a directory and ends in '/.' (ie. -path/to/dir/.), `bup restore` will do exactly what it would have done -for path/to/dir, and then restore _dir_'s metadata to the current -directory (or the `--outdir`). See the EXAMPLES section. - -As a special case, if _some/where_ names the "latest" symlink, -e.g. `bup restore /foo/latest`, then bup will act exactly as if the -save that "latest" points to had been specified, and restore that, -rather than the "latest" symlink itself. - -Whenever path metadata is available, `bup restore` will attempt to -restore it. When restoring ownership, bup implements tar/rsync-like -semantics. It will normally prefer user and group names to uids and -gids when they're available, but it will not try to restore the user -unless running as root, and it will fall back to the numeric uid or -gid whenever the metadata contains a user or group name that doesn't -exist on the current system. The use of user and group names can be -disabled via `--numeric-ids` (which can be important when restoring a -chroot, for example), and as a special case, a uid or gid of 0 will -never be remapped by name. Additionally, some systems don't allow -setting a uid/gid that doesn't correspond with a known user/group. On -those systems, bup will log an error for each relevant path. - -The `--map-user`, `--map-group`, `--map-uid`, `--map-gid` options may -be used to adjust the available ownership information before any of -the rules above are applied, but note that due to those rules, -`--map-uid` and `--map-gid` will have no effect whenever a path has a -valid user or group. In those cases, either `--numeric-ids` must be -specified, or the user or group must be cleared by a suitable -`--map-user foo=` or `--map-group foo=`. - -Hardlinks will also be restored when possible, but at least currently, -no links will be made to targets outside the restore tree, and if the -restore tree spans a different arrangement of filesystems from the -save tree, some hardlink sets may not be completely restored. - -Also note that changing hardlink sets on disk between index and save -may produce unexpected results. With the current implementation, bup -will attempt to recreate any given hardlink set as it existed at index -time, even if all of the files in the set weren't still hardlinked -(but were otherwise identical) at save time. - -Note that during the restoration process, access to data within the -restore tree may be more permissive than it was in the original -source. Unless security is irrelevant, you must restore to a private -subdirectory, and then move the resulting tree to its final position. -See the EXAMPLES section for a demonstration. - -# OPTIONS - --r, \--remote=*host*:*path* -: restore the backup set from the given remote server. If - *path* is omitted, uses the default path on the remote - server (you still need to include the ':'). The connection to the - remote server is made with SSH. If you'd like to specify which port, user - or private key to use for the SSH connection, we recommend you use the - `~/.ssh/config` file. - --C, \--outdir=*outdir* -: create and change to directory *outdir* before - extracting the files. - -\--numeric-ids -: restore numeric IDs (user, group, etc.) rather than names. - -\--exclude-rx=*pattern* -: exclude any path matching *pattern*, which must be a Python - regular expression (http://docs.python.org/library/re.html). The - pattern will be compared against the full path rooted at the top - of the restore tree, without anchoring, so "x/y" will match - "ox/yard" or "box/yards". To exclude the contents of /tmp, but - not the directory itself, use "^/tmp/.". (can be specified more - than once) - - Note that the root of the restore tree (which matches '^/') is the - top of the archive tree being restored, and has nothing to do with - the filesystem destination. Given "restore ... /foo/latest/etc/", - the pattern '^/passwd$' would match if a file named passwd had - been saved as '/foo/latest/etc/passwd'. - - Examples: - - * '/foo$' - exclude any file named foo - * '/foo/$' - exclude any directory named foo - * '/foo/.' - exclude the content of any directory named foo - * '^/tmp/.' - exclude root-level /tmp's content, but not /tmp itself - -\--exclude-rx-from=*filename* -: read --exclude-rx patterns from *filename*, one pattern per-line - (may be repeated). Ignore completely empty lines. - -\--sparse -: write output data sparsely when reasonable. Currently, reasonable - just means "at least whenever there are 512 or more consecutive - zeroes". - -\--map-user *old*=*new* -: for every path, restore the *old* (saved) user name as *new*. - Specifying "" for *new* will clear the user. For example - "--map-user foo=" will allow the uid to take effect for any path - that originally had a user of "foo", unless countermanded by a - subsequent "--map-user foo=..." specification. See DESCRIPTION - above for further information. - -\--map-group *old*=*new* -: for every path, restore the *old* (saved) group name as *new*. - Specifying "" for *new* will clear the group. For example - "--map-group foo=" will allow the gid to take effect for any path - that originally had a group of "foo", unless countermanded by a - subsequent "--map-group foo=..." specification. See DESCRIPTION - above for further information. - -\--map-uid *old*=*new* -: for every path, restore the *old* (saved) uid as *new*, unless - countermanded by a subsequent "--map-uid *old*=..." option. Note - that the uid will only be relevant for paths with no user. See - DESCRIPTION above for further information. - -\--map-gid *old*=*new* -: for every path, restore the *old* (saved) gid as *new*, unless - countermanded by a subsequent "--map-gid *old*=..." option. Note - that the gid will only be relevant for paths with no user. See - DESCRIPTION above for further information. - --v, \--verbose -: increase log output. Given once, prints every - directory as it is restored; given twice, prints every - file and directory. - --q, \--quiet -: suppress output, including the progress meter. Normally, if - stderr is a tty, a progress meter displays the total number of - files restored. - -# EXAMPLES - -Create a simple test backup set: - - $ bup index -u /etc - $ bup save -n mybackup /etc/passwd /etc/profile - -Restore just one file: - - $ bup restore /mybackup/latest/etc/passwd - Restoring: 1, done. - - $ ls -l passwd - -rw-r--r-- 1 apenwarr apenwarr 1478 2010-09-08 03:06 passwd - -Restore etc to test (no trailing slash): - - $ bup restore -C test /mybackup/latest/etc - Restoring: 3, done. - - $ find test - test - test/etc - test/etc/passwd - test/etc/profile - -Restore the contents of etc to test (trailing slash): - - $ bup restore -C test /mybackup/latest/etc/ - Restoring: 2, done. - - $ find test - test - test/passwd - test/profile - -Restore the contents of etc and etc's metadata to test (trailing -"/."): - - $ bup restore -C test /mybackup/latest/etc/. - Restoring: 2, done. - - # At this point test and etc's metadata will match. - $ find test - test - test/passwd - test/profile - -Restore a tree without risk of unauthorized access: - - # mkdir --mode 0700 restore-tmp - - # bup restore -C restore-tmp /somebackup/latest/foo - Restoring: 42, done. - - # mv restore-tmp/foo somewhere - - # rmdir restore-tmp - -Restore a tree, remapping an old user and group to a new user and group: - - # ls -l /original/y - -rw-r----- 1 foo baz 3610 Nov 4 11:31 y - # bup restore -C dest --map-user foo=bar --map-group baz=bax /x/latest/y - Restoring: 42, done. - # ls -l dest/y - -rw-r----- 1 bar bax 3610 Nov 4 11:31 y - -Restore a tree, remapping an old uid to a new uid. Note that the old -user must be erased so that bup won't prefer it over the uid: - - # ls -l /original/y - -rw-r----- 1 foo baz 3610 Nov 4 11:31 y - # ls -ln /original/y - -rw-r----- 1 1000 1007 3610 Nov 4 11:31 y - # bup restore -C dest --map-user foo= --map-uid 1000=1042 /x/latest/y - Restoring: 97, done. - # ls -ln dest/y - -rw-r----- 1 1042 1007 3610 Nov 4 11:31 y - -An alternate way to do the same by quashing users/groups universally -with `--numeric-ids`: - - # bup restore -C dest --numeric-ids --map-uid 1000=1042 /x/latest/y - Restoring: 97, done. - -# SEE ALSO - -`bup-save`(1), `bup-ftp`(1), `bup-fuse`(1), `bup-web`(1) - -# BUP - -Part of the `bup`(1) suite. diff -Nru bup-0.33.2/Documentation/bup-rm.1.md bup-0.33.7/Documentation/bup-rm.1.md --- bup-0.33.2/Documentation/bup-rm.1.md 1970-01-01 00:00:00.000000000 +0000 +++ bup-0.33.7/Documentation/bup-rm.1.md 2025-01-08 20:04:11.000000000 +0000 @@ -0,0 +1,49 @@ +% bup-rm(1) Bup %BUP_VERSION% +% Rob Browning +% %BUP_DATE% + +# NAME + +bup-rm - remove references to archive content + +# SYNOPSIS + +bup rm [-#|\--verbose] <*branch*|*save*...> + +# DESCRIPTION + +`bup rm` removes the indicated *branch*es (backup sets) and *save*s. +By itself, this command does not delete any actual data (nor recover +any storage space), but it may make it very difficult or impossible to +refer to the deleted items, unless there are other references to them +(e.g. tags). + +A subsequent garbage collection, either by a `bup gc`, or by a normal +`git gc`, may permanently delete data that is no longer reachable from +the remaining branches or tags, and reclaim the related storage space. + +WARNING: This is one of the few bup commands that modifies your +archive in intentionally destructive ways. + +# OPTIONS + +-v, \--verbose +: increase verbosity (can be used more than once). + +-*#*, \--compress=*#* +: set the compression level to # (a value from 0-9, where + 9 is the highest and 0 is no compression). The default + is 6. Note that `bup rm` may only write new commits. + +# EXAMPLES + + # Delete the backup set (branch) foo and a save in bar. + $ bup rm /foo /bar/2014-10-21-214720 + +# SEE ALSO + +`bup-gc`(1), `bup-save`(1), `bup-fsck`(1), and `bup-tag`(1) + +# BUP + +Part of the `bup`(1) suite. diff -Nru bup-0.33.2/Documentation/bup-rm.md bup-0.33.7/Documentation/bup-rm.md --- bup-0.33.2/Documentation/bup-rm.md 2023-07-01 20:08:43.000000000 +0000 +++ bup-0.33.7/Documentation/bup-rm.md 1970-01-01 00:00:00.000000000 +0000 @@ -1,49 +0,0 @@ -% bup-rm(1) Bup %BUP_VERSION% -% Rob Browning -% %BUP_DATE% - -# NAME - -bup-rm - remove references to archive content - -# SYNOPSIS - -bup rm [-#|\--verbose] <*branch*|*save*...> - -# DESCRIPTION - -`bup rm` removes the indicated *branch*es (backup sets) and *save*s. -By itself, this command does not delete any actual data (nor recover -any storage space), but it may make it very difficult or impossible to -refer to the deleted items, unless there are other references to them -(e.g. tags). - -A subsequent garbage collection, either by a `bup gc`, or by a normal -`git gc`, may permanently delete data that is no longer reachable from -the remaining branches or tags, and reclaim the related storage space. - -WARNING: This is one of the few bup commands that modifies your -archive in intentionally destructive ways. - -# OPTIONS - --v, \--verbose -: increase verbosity (can be used more than once). - --*#*, \--compress=*#* -: set the compression level to # (a value from 0-9, where - 9 is the highest and 0 is no compression). The default - is 6. Note that `bup rm` may only write new commits. - -# EXAMPLES - - # Delete the backup set (branch) foo and a save in bar. - $ bup rm /foo /bar/2014-10-21-214720 - -# SEE ALSO - -`bup-gc`(1), `bup-save`(1), `bup-fsck`(1), and `bup-tag`(1) - -# BUP - -Part of the `bup`(1) suite. diff -Nru bup-0.33.2/Documentation/bup-save.1.md bup-0.33.7/Documentation/bup-save.1.md --- bup-0.33.2/Documentation/bup-save.1.md 1970-01-01 00:00:00.000000000 +0000 +++ bup-0.33.7/Documentation/bup-save.1.md 2025-01-08 20:04:11.000000000 +0000 @@ -0,0 +1,179 @@ +% bup-save(1) Bup %BUP_VERSION% +% Avery Pennarun +% %BUP_DATE% + +# NAME + +bup-save - create a new bup backup set + +# SYNOPSIS + +bup save [-r *host*:*path*] \<-t|-c|-n *name*\> [-#] [-f *indexfile*] +[-v] [-q] [\--smaller=*maxsize*] \; + +# DESCRIPTION + +`bup save` saves the contents of the given files or paths +into a new backup set and optionally names that backup set. + +Note that in order to refer to your backup set later (i.e. for +restoration), you must either specify `--name` (the normal case), or +record the tree or commit id printed by `--tree` or `--commit`. + +Before trying to save files using `bup save`, you should +first update the index using `bup index`. The reasons +for separating the two steps are described in the man page +for `bup-index`(1). + +By default, metadata will be saved for every path, and the metadata +for any unindexed parent directories of indexed paths will be taken +directly from the filesystem. However, if `--strip`, `--strip-path`, +or `--graft` is specified, metadata will not be saved for the root +directory (*/*). See `bup-restore`(1) for more information about the +handling of metadata. + +# OPTIONS + +-r, \--remote=*host*:*path* +: save the backup set to the given remote server. If + *path* is omitted, uses the default path on the remote + server (you still need to include the ':'). The connection to the + remote server is made with SSH. If you'd like to specify which port, user + or private key to use for the SSH connection, we recommend you use the + `~/.ssh/config` file. + +-t, \--tree +: after creating the backup set, print out the git tree + id of the resulting backup. + +-c, \--commit +: after creating the backup set, print out the git commit + id of the resulting backup. + +-n, \--name=*name* +: after creating the backup set, create a git branch + named *name* so that the backup can be accessed using + that name. If *name* already exists, the new backup + will be considered a descendant of the old *name*. + (Thus, you can continually create new backup sets with + the same name, and later view the history of that + backup set to see how files have changed over time.) + +-d, \--date=*date* +: specify the date of the backup, in seconds since the epoch, instead + of the current time. + +-f, \--indexfile=*indexfile* +: use a different index filename instead of + `$BUP_DIR/bupindex`. + +-v, \--verbose +: increase verbosity (can be used more than once). With + one -v, prints every directory name as it gets backed up. With + two -v, also prints every filename. + +-q, \--quiet +: disable progress messages. + +\--smaller=*maxsize* +: don't back up files >= *maxsize* bytes. You can use + this to run frequent incremental backups of your small + files, which can usually be backed up quickly, and skip + over large ones (like virtual machine images) which + take longer. Then you can back up the large files + less frequently. Use a suffix like k, M, or G to + specify multiples of 1024, 1024\*1024, 1024\*1024\*1024 + respectively. + +\--bwlimit=*bytes/sec* +: don't transmit more than *bytes/sec* bytes per second + to the server. This is good for making your backups + not suck up all your network bandwidth. Use a suffix + like k, M, or G to specify multiples of 1024, + 1024\*1024, 1024\*1024\*1024 respectively. + +\--strip +: strips the path that is given from all files and directories. + + A directory */root/chroot/etc* saved with "bup save -n chroot + \--strip /root/chroot" would be saved as */etc*. Note that + currently, metadata will not be saved for the root directory (*/*) + when this option is specified. + +\--strip-path=*path-prefix* +: strips the given path prefix *path-prefix* from all + files and directories. + + A directory */root/chroot/webserver/etc* saved with "bup save -n + webserver \--strip-path=/root/chroot /root/chroot/webserver/etc" + would be saved as */webserver/etc*. Note that currently, metadata + will not be saved for the root directory (*/*) when this option is + specified. + +\--graft=*old_path*=*new_path* +: a graft point *old_path*=*new_path* (can be used more than + once). + + A directory */root/chroot/a/etc* saved with "bup save -n chroot + \--graft /root/chroot/a=/chroot/a" would be saved as + */chroot/a/etc*. Note that currently, metadata will not be saved + for the root directory (*/*) when this option is specified. + +-*#*, \--compress=*#* +: set the compression level to # (a value from 0-9, where + 9 is the highest and 0 is no compression). The default + is 1 (fast, loose compression) + + +# EXAMPLES + $ bup index -ux /etc + Indexing: 1981, done. + + $ bup save -r myserver: -n my-pc-backup --bwlimit=50k /etc + Reading index: 1981, done. + Saving: 100.00% (998/998k, 1981/1981 files), done. + + + + $ ls /home/joe/chroot/httpd + bin var + + $ bup index -ux /home/joe/chroot/httpd + Indexing: 1337, done. + + $ bup save --strip -n joes-httpd-chroot /home/joe/chroot/httpd + Reading index: 1337, done. + Saving: 100.00% (998/998k, 1337/1337 files), done. + + $ bup ls joes-httpd-chroot/latest/ + bin/ + var/ + + + $ bup save --strip-path=/home/joe/chroot -n joes-chroot \ + /home/joe/chroot/httpd + Reading index: 1337, done. + Saving: 100.00% (998/998k, 1337/1337 files), done. + + $ bup ls joes-chroot/latest/ + httpd/ + + + $ bup save --graft /home/joe/chroot/httpd=/http-chroot \ + -n joe + /home/joe/chroot/httpd + Reading index: 1337, done. + Saving: 100.00% (998/998k, 1337/1337 files), done. + + $ bup ls joe/latest/ + http-chroot/ + + +# SEE ALSO + +`bup-index`(1), `bup-split`(1), `bup-on`(1), +`bup-restore`(1), `ssh_config`(5) + +# BUP + +Part of the `bup`(1) suite. diff -Nru bup-0.33.2/Documentation/bup-save.md bup-0.33.7/Documentation/bup-save.md --- bup-0.33.2/Documentation/bup-save.md 2023-07-01 20:08:43.000000000 +0000 +++ bup-0.33.7/Documentation/bup-save.md 1970-01-01 00:00:00.000000000 +0000 @@ -1,179 +0,0 @@ -% bup-save(1) Bup %BUP_VERSION% -% Avery Pennarun -% %BUP_DATE% - -# NAME - -bup-save - create a new bup backup set - -# SYNOPSIS - -bup save [-r *host*:*path*] \<-t|-c|-n *name*\> [-#] [-f *indexfile*] -[-v] [-q] [\--smaller=*maxsize*] \; - -# DESCRIPTION - -`bup save` saves the contents of the given files or paths -into a new backup set and optionally names that backup set. - -Note that in order to refer to your backup set later (i.e. for -restoration), you must either specify `--name` (the normal case), or -record the tree or commit id printed by `--tree` or `--commit`. - -Before trying to save files using `bup save`, you should -first update the index using `bup index`. The reasons -for separating the two steps are described in the man page -for `bup-index`(1). - -By default, metadata will be saved for every path, and the metadata -for any unindexed parent directories of indexed paths will be taken -directly from the filesystem. However, if `--strip`, `--strip-path`, -or `--graft` is specified, metadata will not be saved for the root -directory (*/*). See `bup-restore`(1) for more information about the -handling of metadata. - -# OPTIONS - --r, \--remote=*host*:*path* -: save the backup set to the given remote server. If - *path* is omitted, uses the default path on the remote - server (you still need to include the ':'). The connection to the - remote server is made with SSH. If you'd like to specify which port, user - or private key to use for the SSH connection, we recommend you use the - `~/.ssh/config` file. - --t, \--tree -: after creating the backup set, print out the git tree - id of the resulting backup. - --c, \--commit -: after creating the backup set, print out the git commit - id of the resulting backup. - --n, \--name=*name* -: after creating the backup set, create a git branch - named *name* so that the backup can be accessed using - that name. If *name* already exists, the new backup - will be considered a descendant of the old *name*. - (Thus, you can continually create new backup sets with - the same name, and later view the history of that - backup set to see how files have changed over time.) - --d, \--date=*date* -: specify the date of the backup, in seconds since the epoch, instead - of the current time. - --f, \--indexfile=*indexfile* -: use a different index filename instead of - `$BUP_DIR/bupindex`. - --v, \--verbose -: increase verbosity (can be used more than once). With - one -v, prints every directory name as it gets backed up. With - two -v, also prints every filename. - --q, \--quiet -: disable progress messages. - -\--smaller=*maxsize* -: don't back up files >= *maxsize* bytes. You can use - this to run frequent incremental backups of your small - files, which can usually be backed up quickly, and skip - over large ones (like virtual machine images) which - take longer. Then you can back up the large files - less frequently. Use a suffix like k, M, or G to - specify multiples of 1024, 1024\*1024, 1024\*1024\*1024 - respectively. - -\--bwlimit=*bytes/sec* -: don't transmit more than *bytes/sec* bytes per second - to the server. This is good for making your backups - not suck up all your network bandwidth. Use a suffix - like k, M, or G to specify multiples of 1024, - 1024\*1024, 1024\*1024\*1024 respectively. - -\--strip -: strips the path that is given from all files and directories. - - A directory */root/chroot/etc* saved with "bup save -n chroot - \--strip /root/chroot" would be saved as */etc*. Note that - currently, metadata will not be saved for the root directory (*/*) - when this option is specified. - -\--strip-path=*path-prefix* -: strips the given path prefix *path-prefix* from all - files and directories. - - A directory */root/chroot/webserver/etc* saved with "bup save -n - webserver \--strip-path=/root/chroot /root/chroot/webserver/etc" - would be saved as */webserver/etc*. Note that currently, metadata - will not be saved for the root directory (*/*) when this option is - specified. - -\--graft=*old_path*=*new_path* -: a graft point *old_path*=*new_path* (can be used more than - once). - - A directory */root/chroot/a/etc* saved with "bup save -n chroot - \--graft /root/chroot/a=/chroot/a" would be saved as - */chroot/a/etc*. Note that currently, metadata will not be saved - for the root directory (*/*) when this option is specified. - --*#*, \--compress=*#* -: set the compression level to # (a value from 0-9, where - 9 is the highest and 0 is no compression). The default - is 1 (fast, loose compression) - - -# EXAMPLES - $ bup index -ux /etc - Indexing: 1981, done. - - $ bup save -r myserver: -n my-pc-backup --bwlimit=50k /etc - Reading index: 1981, done. - Saving: 100.00% (998/998k, 1981/1981 files), done. - - - - $ ls /home/joe/chroot/httpd - bin var - - $ bup index -ux /home/joe/chroot/httpd - Indexing: 1337, done. - - $ bup save --strip -n joes-httpd-chroot /home/joe/chroot/httpd - Reading index: 1337, done. - Saving: 100.00% (998/998k, 1337/1337 files), done. - - $ bup ls joes-httpd-chroot/latest/ - bin/ - var/ - - - $ bup save --strip-path=/home/joe/chroot -n joes-chroot \ - /home/joe/chroot/httpd - Reading index: 1337, done. - Saving: 100.00% (998/998k, 1337/1337 files), done. - - $ bup ls joes-chroot/latest/ - httpd/ - - - $ bup save --graft /home/joe/chroot/httpd=/http-chroot \ - -n joe - /home/joe/chroot/httpd - Reading index: 1337, done. - Saving: 100.00% (998/998k, 1337/1337 files), done. - - $ bup ls joe/latest/ - http-chroot/ - - -# SEE ALSO - -`bup-index`(1), `bup-split`(1), `bup-on`(1), -`bup-restore`(1), `ssh_config`(5) - -# BUP - -Part of the `bup`(1) suite. diff -Nru bup-0.33.2/Documentation/bup-server.1.md bup-0.33.7/Documentation/bup-server.1.md --- bup-0.33.2/Documentation/bup-server.1.md 1970-01-01 00:00:00.000000000 +0000 +++ bup-0.33.7/Documentation/bup-server.1.md 2025-01-08 20:04:11.000000000 +0000 @@ -0,0 +1,52 @@ +% bup-server(1) Bup %BUP_VERSION% +% Avery Pennarun +% %BUP_DATE% + +# NAME + +bup-server - the server side of the bup client-server relationship + +# SYNOPSIS + +bup server + +# DESCRIPTION + +`bup server` is the server side of a remote bup session. +If you use `bup-split`(1) or `bup-save`(1) with the `-r` +option, they will ssh to the remote server and run `bup +server` to receive the transmitted objects. + +There is normally no reason to run `bup server` yourself. + +# MODES + +smart +: In this mode, the server checks each incoming object + against the idx files in its repository. If any object + already exists, it tells the client about the idx file + it was found in, allowing the client to download that + idx and avoid sending duplicate data. This is + `bup-server`'s default mode. + +dumb +: In this mode, the server will not check its local index + before writing an object. To avoid writing duplicate + objects, the server will tell the client to download all + of its `.idx` files at the start of the session. This + mode is useful on low powered server hardware (ie + router/slow NAS). + +# FILES + +$BUP_DIR/bup-dumb-server +: Activate dumb server mode, as discussed above. This file is not created by + default in new repositories. + +# SEE ALSO + +`bup-save`(1), `bup-split`(1) + +# BUP + +Part of the `bup`(1) suite. diff -Nru bup-0.33.2/Documentation/bup-server.md bup-0.33.7/Documentation/bup-server.md --- bup-0.33.2/Documentation/bup-server.md 2023-07-01 20:08:43.000000000 +0000 +++ bup-0.33.7/Documentation/bup-server.md 1970-01-01 00:00:00.000000000 +0000 @@ -1,52 +0,0 @@ -% bup-server(1) Bup %BUP_VERSION% -% Avery Pennarun -% %BUP_DATE% - -# NAME - -bup-server - the server side of the bup client-server relationship - -# SYNOPSIS - -bup server - -# DESCRIPTION - -`bup server` is the server side of a remote bup session. -If you use `bup-split`(1) or `bup-save`(1) with the `-r` -option, they will ssh to the remote server and run `bup -server` to receive the transmitted objects. - -There is normally no reason to run `bup server` yourself. - -# MODES - -smart -: In this mode, the server checks each incoming object - against the idx files in its repository. If any object - already exists, it tells the client about the idx file - it was found in, allowing the client to download that - idx and avoid sending duplicate data. This is - `bup-server`'s default mode. - -dumb -: In this mode, the server will not check its local index - before writing an object. To avoid writing duplicate - objects, the server will tell the client to download all - of its `.idx` files at the start of the session. This - mode is useful on low powered server hardware (ie - router/slow NAS). - -# FILES - -$BUP_DIR/bup-dumb-server -: Activate dumb server mode, as discussed above. This file is not created by - default in new repositories. - -# SEE ALSO - -`bup-save`(1), `bup-split`(1) - -# BUP - -Part of the `bup`(1) suite. diff -Nru bup-0.33.2/Documentation/bup-split.1.md bup-0.33.7/Documentation/bup-split.1.md --- bup-0.33.2/Documentation/bup-split.1.md 1970-01-01 00:00:00.000000000 +0000 +++ bup-0.33.7/Documentation/bup-split.1.md 2025-01-08 20:04:11.000000000 +0000 @@ -0,0 +1,179 @@ +% bup-split(1) Bup %BUP_VERSION% +% Avery Pennarun +% %BUP_DATE% + +# NAME + +bup-split - save individual files to bup backup sets + +# SYNOPSIS + +bup split \[-t\] \[-c\] \[-n *name*\] COMMON\_OPTIONS + +bup split -b COMMON\_OPTIONS + +bup split --copy COMMON\_OPTIONS + +bup split --noop \[-t|-b\] COMMON\_OPTIONS + +COMMON\_OPTIONS + ~ \[-r *host*:*path*\] \[-v\] \[-q\] \[-d *seconds-since-epoch*\] \[\--bench\] + \[\--max-pack-size=*bytes*\] \[-#\] \[\--bwlimit=*bytes*\] + \[\--max-pack-objects=*n*\] \[\--fanout=*count*\] + \[\--keep-boundaries\] \[\--git-ids | filenames...\] + +# DESCRIPTION + +`bup split` concatenates the contents of the given files +(or if no filenames are given, reads from stdin), splits +the content into chunks of around 8k using a rolling +checksum algorithm, and saves the chunks into a bup +repository. Chunks which have previously been stored are +not stored again (ie. they are 'deduplicated'). + +Because of the way the rolling checksum works, chunks +tend to be very stable across changes to a given file, +including adding, deleting, and changing bytes. + +For example, if you use `bup split` to back up an XML dump +of a database, and the XML file changes slightly from one +run to the next, nearly all the data will still be +deduplicated and the size of each backup after the first +will typically be quite small. + +Another technique is to pipe the output of the `tar`(1) or +`cpio`(1) programs to `bup split`. When individual files +in the tarball change slightly or are added or removed, bup +still processes the remainder of the tarball efficiently. +(Note that `bup save` is usually a more efficient way to +accomplish this, however.) + +To get the data back, use `bup-join`(1). + +# MODES + +These options select the primary behavior of the command, with -n +being the most likely choice. + +-n, \--name=*name* +: after creating the dataset, create a git branch + named *name* so that it can be accessed using + that name. If *name* already exists, the new dataset + will be considered a descendant of the old *name*. + (Thus, you can continually create new datasets with + the same name, and later view the history of that + dataset to see how it has changed over time.) The original data + will also be available as a top-level file named "data" in the VFS, + accessible via `bup fuse`, `bup ftp`, etc. + +-t, \--tree +: output the git tree id of the resulting dataset. + +-c, \--commit +: output the git commit id of the resulting dataset. + +-b, \--blobs +: output a series of git blob ids that correspond to the chunks in + the dataset. Incompatible with -n, -t, and -c. + +\--noop +: read the data and split it into blocks based on the "bupsplit" + rolling checksum algorithm, but don't store anything in the repo. + Can be combined with -b or -t to compute (but not store) the git + blobs or tree ids for the dataset. This is mostly useful for + benchmarking and validating the bupsplit algorithm. Incompatible + with -n and -c. + +\--copy +: like `--noop`, but also write the data to stdout. This can be + useful for benchmarking the speed of read+bupsplit+write for large + amounts of data. Incompatible with -n, -t, -c, and -b. + +# OPTIONS + +-r, \--remote=*host*:*path* +: save the backup set to the given remote server. If *path* is + omitted, uses the default path on the remote server (you still + need to include the ':'). The connection to the remote server is + made with SSH. If you'd like to specify which port, user or + private key to use for the SSH connection, we recommend you use + the `~/.ssh/config` file. Even though the destination is remote, + a local bup repository is still required. + +-d, \--date=*seconds-since-epoch* +: specify the date inscribed in the commit (seconds since 1970-01-01). + +-q, \--quiet +: disable progress messages. + +-v, \--verbose +: increase verbosity (can be used more than once). + +\--git-ids +: stdin is a list of git object ids instead of raw data. + `bup split` will read the contents of each named git + object (if it exists in the bup repository) and split + it. This might be useful for converting a git + repository with large binary files to use bup-style + hashsplitting instead. This option is probably most + useful when combined with `--keep-boundaries`. + +\--keep-boundaries +: if multiple filenames are given on the command line, + they are normally concatenated together as if the + content all came from a single file. That is, the + set of blobs/trees produced is identical to what it + would have been if there had been a single input file. + However, if you use `--keep-boundaries`, each file is + split separately. You still only get a single tree or + commit or series of blobs, but each blob comes from + only one of the files; the end of one of the input + files always ends a blob. + +\--bench +: print benchmark timings to stderr. + +\--max-pack-size=*bytes* +: never create git packfiles larger than the given number + of bytes. Default is 1 billion bytes. Usually there + is no reason to change this. + +\--max-pack-objects=*numobjs* +: never create git packfiles with more than the given + number of objects. Default is 200 thousand objects. + Usually there is no reason to change this. + +\--fanout=*numobjs* +: when splitting very large files, try and keep the number + of elements in trees to an average of *numobjs*. + +\--bwlimit=*bytes/sec* +: don't transmit more than *bytes/sec* bytes per second + to the server. This is good for making your backups + not suck up all your network bandwidth. Use a suffix + like k, M, or G to specify multiples of 1024, + 1024\*1024, 1024\*1024\*1024 respectively. + +-*#*, \--compress=*#* +: set the compression level to # (a value from 0-9, where + 9 is the highest and 0 is no compression). The default + is 1 (fast, loose compression) + + +# EXAMPLES + + $ tar -cf - /etc | bup split -r myserver: -n mybackup-tar + tar: Removing leading /' from member names + Indexing objects: 100% (196/196), done. + + $ bup join -r myserver: mybackup-tar | tar -tf - | wc -l + 1961 + + +# SEE ALSO + +`bup-join`(1), `bup-index`(1), `bup-save`(1), `bup-on`(1), `ssh_config`(5) + +# BUP + +Part of the `bup`(1) suite. diff -Nru bup-0.33.2/Documentation/bup-split.md bup-0.33.7/Documentation/bup-split.md --- bup-0.33.2/Documentation/bup-split.md 2023-07-01 20:08:43.000000000 +0000 +++ bup-0.33.7/Documentation/bup-split.md 1970-01-01 00:00:00.000000000 +0000 @@ -1,179 +0,0 @@ -% bup-split(1) Bup %BUP_VERSION% -% Avery Pennarun -% %BUP_DATE% - -# NAME - -bup-split - save individual files to bup backup sets - -# SYNOPSIS - -bup split \[-t\] \[-c\] \[-n *name*\] COMMON\_OPTIONS - -bup split -b COMMON\_OPTIONS - -bup split --copy COMMON\_OPTIONS - -bup split --noop \[-t|-b\] COMMON\_OPTIONS - -COMMON\_OPTIONS - ~ \[-r *host*:*path*\] \[-v\] \[-q\] \[-d *seconds-since-epoch*\] \[\--bench\] - \[\--max-pack-size=*bytes*\] \[-#\] \[\--bwlimit=*bytes*\] - \[\--max-pack-objects=*n*\] \[\--fanout=*count*\] - \[\--keep-boundaries\] \[\--git-ids | filenames...\] - -# DESCRIPTION - -`bup split` concatenates the contents of the given files -(or if no filenames are given, reads from stdin), splits -the content into chunks of around 8k using a rolling -checksum algorithm, and saves the chunks into a bup -repository. Chunks which have previously been stored are -not stored again (ie. they are 'deduplicated'). - -Because of the way the rolling checksum works, chunks -tend to be very stable across changes to a given file, -including adding, deleting, and changing bytes. - -For example, if you use `bup split` to back up an XML dump -of a database, and the XML file changes slightly from one -run to the next, nearly all the data will still be -deduplicated and the size of each backup after the first -will typically be quite small. - -Another technique is to pipe the output of the `tar`(1) or -`cpio`(1) programs to `bup split`. When individual files -in the tarball change slightly or are added or removed, bup -still processes the remainder of the tarball efficiently. -(Note that `bup save` is usually a more efficient way to -accomplish this, however.) - -To get the data back, use `bup-join`(1). - -# MODES - -These options select the primary behavior of the command, with -n -being the most likely choice. - --n, \--name=*name* -: after creating the dataset, create a git branch - named *name* so that it can be accessed using - that name. If *name* already exists, the new dataset - will be considered a descendant of the old *name*. - (Thus, you can continually create new datasets with - the same name, and later view the history of that - dataset to see how it has changed over time.) The original data - will also be available as a top-level file named "data" in the VFS, - accessible via `bup fuse`, `bup ftp`, etc. - --t, \--tree -: output the git tree id of the resulting dataset. - --c, \--commit -: output the git commit id of the resulting dataset. - --b, \--blobs -: output a series of git blob ids that correspond to the chunks in - the dataset. Incompatible with -n, -t, and -c. - -\--noop -: read the data and split it into blocks based on the "bupsplit" - rolling checksum algorithm, but don't store anything in the repo. - Can be combined with -b or -t to compute (but not store) the git - blobs or tree ids for the dataset. This is mostly useful for - benchmarking and validating the bupsplit algorithm. Incompatible - with -n and -c. - -\--copy -: like `--noop`, but also write the data to stdout. This can be - useful for benchmarking the speed of read+bupsplit+write for large - amounts of data. Incompatible with -n, -t, -c, and -b. - -# OPTIONS - --r, \--remote=*host*:*path* -: save the backup set to the given remote server. If *path* is - omitted, uses the default path on the remote server (you still - need to include the ':'). The connection to the remote server is - made with SSH. If you'd like to specify which port, user or - private key to use for the SSH connection, we recommend you use - the `~/.ssh/config` file. Even though the destination is remote, - a local bup repository is still required. - --d, \--date=*seconds-since-epoch* -: specify the date inscribed in the commit (seconds since 1970-01-01). - --q, \--quiet -: disable progress messages. - --v, \--verbose -: increase verbosity (can be used more than once). - -\--git-ids -: stdin is a list of git object ids instead of raw data. - `bup split` will read the contents of each named git - object (if it exists in the bup repository) and split - it. This might be useful for converting a git - repository with large binary files to use bup-style - hashsplitting instead. This option is probably most - useful when combined with `--keep-boundaries`. - -\--keep-boundaries -: if multiple filenames are given on the command line, - they are normally concatenated together as if the - content all came from a single file. That is, the - set of blobs/trees produced is identical to what it - would have been if there had been a single input file. - However, if you use `--keep-boundaries`, each file is - split separately. You still only get a single tree or - commit or series of blobs, but each blob comes from - only one of the files; the end of one of the input - files always ends a blob. - -\--bench -: print benchmark timings to stderr. - -\--max-pack-size=*bytes* -: never create git packfiles larger than the given number - of bytes. Default is 1 billion bytes. Usually there - is no reason to change this. - -\--max-pack-objects=*numobjs* -: never create git packfiles with more than the given - number of objects. Default is 200 thousand objects. - Usually there is no reason to change this. - -\--fanout=*numobjs* -: when splitting very large files, try and keep the number - of elements in trees to an average of *numobjs*. - -\--bwlimit=*bytes/sec* -: don't transmit more than *bytes/sec* bytes per second - to the server. This is good for making your backups - not suck up all your network bandwidth. Use a suffix - like k, M, or G to specify multiples of 1024, - 1024\*1024, 1024\*1024\*1024 respectively. - --*#*, \--compress=*#* -: set the compression level to # (a value from 0-9, where - 9 is the highest and 0 is no compression). The default - is 1 (fast, loose compression) - - -# EXAMPLES - - $ tar -cf - /etc | bup split -r myserver: -n mybackup-tar - tar: Removing leading /' from member names - Indexing objects: 100% (196/196), done. - - $ bup join -r myserver: mybackup-tar | tar -tf - | wc -l - 1961 - - -# SEE ALSO - -`bup-join`(1), `bup-index`(1), `bup-save`(1), `bup-on`(1), `ssh_config`(5) - -# BUP - -Part of the `bup`(1) suite. diff -Nru bup-0.33.2/Documentation/bup-tag.1.md bup-0.33.7/Documentation/bup-tag.1.md --- bup-0.33.2/Documentation/bup-tag.1.md 1970-01-01 00:00:00.000000000 +0000 +++ bup-0.33.7/Documentation/bup-tag.1.md 2025-01-08 20:04:11.000000000 +0000 @@ -0,0 +1,61 @@ +% bup-tag(1) Bup %BUP_VERSION% +% Gabriel Filion +% %BUP_DATE% + +# NAME + +bup-tag - tag a commit in the bup repository + +# SYNOPSIS + +bup tag + +bup tag [-f] \ \ + +bup tag -d [-f] \ + +# DESCRIPTION + +`bup tag` lists, creates or deletes a tag in the bup repository. + +A tag is an easy way to retrieve a specific commit. It can be used to mark a +specific backup for easier retrieval later. + +When called without any arguments, the command lists all tags that can +be found in the repository. When called with a tag name and a commit ID +or ref name, it creates a new tag with the given name, if it doesn't +already exist, that points to the commit given in the second argument. When +called with '-d' and a tag name, it removes the given tag, if it exists. + +bup exposes the contents of backups with current tags, via any command that +lists or shows backups. They can be found under the /.tag directory. For +example, the 'ftp' command will show the tag named 'tag1' under /.tag/tag1. + +# OPTIONS + +-d, \--delete +: delete a tag + +-f, \--force +: Overwrite the named tag even if it already exists. With -f, don't + report a missing tag as an error. + +# EXAMPLES + + $ bup tag new-puppet-version hostx-backup + + $ bup tag + new-puppet-version + + $ bup ftp "ls /.tag/new-puppet-version" + files.. + + $ bup tag -d new-puppet-version + +# SEE ALSO + +`bup-save`(1), `bup-split`(1), `bup-ftp`(1), `bup-fuse`(1), `bup-web`(1) + +# BUP + +Part of the `bup`(1) suite. diff -Nru bup-0.33.2/Documentation/bup-tag.md bup-0.33.7/Documentation/bup-tag.md --- bup-0.33.2/Documentation/bup-tag.md 2023-07-01 20:08:43.000000000 +0000 +++ bup-0.33.7/Documentation/bup-tag.md 1970-01-01 00:00:00.000000000 +0000 @@ -1,61 +0,0 @@ -% bup-tag(1) Bup %BUP_VERSION% -% Gabriel Filion -% %BUP_DATE% - -# NAME - -bup-tag - tag a commit in the bup repository - -# SYNOPSIS - -bup tag - -bup tag [-f] \ \ - -bup tag -d [-f] \ - -# DESCRIPTION - -`bup tag` lists, creates or deletes a tag in the bup repository. - -A tag is an easy way to retrieve a specific commit. It can be used to mark a -specific backup for easier retrieval later. - -When called without any arguments, the command lists all tags that can -be found in the repository. When called with a tag name and a commit ID -or ref name, it creates a new tag with the given name, if it doesn't -already exist, that points to the commit given in the second argument. When -called with '-d' and a tag name, it removes the given tag, if it exists. - -bup exposes the contents of backups with current tags, via any command that -lists or shows backups. They can be found under the /.tag directory. For -example, the 'ftp' command will show the tag named 'tag1' under /.tag/tag1. - -# OPTIONS - --d, \--delete -: delete a tag - --f, \--force -: Overwrite the named tag even if it already exists. With -f, don't - report a missing tag as an error. - -# EXAMPLES - - $ bup tag new-puppet-version hostx-backup - - $ bup tag - new-puppet-version - - $ bup ftp "ls /.tag/new-puppet-version" - files.. - - $ bup tag -d new-puppet-version - -# SEE ALSO - -`bup-save`(1), `bup-split`(1), `bup-ftp`(1), `bup-fuse`(1), `bup-web`(1) - -# BUP - -Part of the `bup`(1) suite. diff -Nru bup-0.33.2/Documentation/bup-tick.1.md bup-0.33.7/Documentation/bup-tick.1.md --- bup-0.33.2/Documentation/bup-tick.1.md 1970-01-01 00:00:00.000000000 +0000 +++ bup-0.33.7/Documentation/bup-tick.1.md 2025-01-08 20:04:11.000000000 +0000 @@ -0,0 +1,32 @@ +% bup-tick(1) Bup %BUP_VERSION% +% Avery Pennarun +% %BUP_DATE% + +# NAME + +bup-tick - wait for up to one second + +# SYNOPSIS + +bup tick + +# DESCRIPTION + +`bup tick` waits until `time`(2) returns a different value +than it originally did. Since time() has a granularity of +one second, this can cause a delay of up to one second. + +This program is useful for writing tests that need to +ensure a file date will be seen as modified. It is +slightly better than `sleep`(1) since it sometimes waits +for less than one second. + +# EXAMPLES + + $ date; bup tick; date + Sat Feb 6 16:59:58 EST 2010 + Sat Feb 6 16:59:59 EST 2010 + +# BUP + +Part of the `bup`(1) suite. diff -Nru bup-0.33.2/Documentation/bup-tick.md bup-0.33.7/Documentation/bup-tick.md --- bup-0.33.2/Documentation/bup-tick.md 2023-07-01 20:08:43.000000000 +0000 +++ bup-0.33.7/Documentation/bup-tick.md 1970-01-01 00:00:00.000000000 +0000 @@ -1,32 +0,0 @@ -% bup-tick(1) Bup %BUP_VERSION% -% Avery Pennarun -% %BUP_DATE% - -# NAME - -bup-tick - wait for up to one second - -# SYNOPSIS - -bup tick - -# DESCRIPTION - -`bup tick` waits until `time`(2) returns a different value -than it originally did. Since time() has a granularity of -one second, this can cause a delay of up to one second. - -This program is useful for writing tests that need to -ensure a file date will be seen as modified. It is -slightly better than `sleep`(1) since it sometimes waits -for less than one second. - -# EXAMPLES - - $ date; bup tick; date - Sat Feb 6 16:59:58 EST 2010 - Sat Feb 6 16:59:59 EST 2010 - -# BUP - -Part of the `bup`(1) suite. diff -Nru bup-0.33.2/Documentation/bup-validate-object-links.1.md bup-0.33.7/Documentation/bup-validate-object-links.1.md --- bup-0.33.2/Documentation/bup-validate-object-links.1.md 1970-01-01 00:00:00.000000000 +0000 +++ bup-0.33.7/Documentation/bup-validate-object-links.1.md 2025-01-08 20:04:11.000000000 +0000 @@ -0,0 +1,38 @@ +% bup-validate-object-links(1) Bup %BUP_VERSION% +% Rob Browning +% %BUP_DATE% + +# NAME + +bup-validate-object-links - scan the repository for broken object links + +# SYNOPSIS + +bup validate-object-links + +# DESCRIPTION + +`bup validate-object-links` scans the objects in the repository for +and reports any "broken links" it finds, i.e. any links from a tree or +commit in the repository to an object that doesn't exist. Currently, +it doesn't include "loose objects" (those not in packfiles -- which +git may create, but bup doesn't), and it can't handle tag objects +(which bup also doesn't create). + +Whenever a broken link (missing reference) is found, an ASCII encoded +line formatted like this will be printed to standard output: + + no MISSING_HASH for PARENT_HASH + +# EXIT STATUS + +The exit status will be 1 if any broken links are found, 0 if none are +found, and some other positive integer for other failures. + +# SEE ALSO + +`bup-fsck`(1) + +# BUP + +Part of the `bup`(1) suite. diff -Nru bup-0.33.2/Documentation/bup-validate-ref-links.1.md bup-0.33.7/Documentation/bup-validate-ref-links.1.md --- bup-0.33.2/Documentation/bup-validate-ref-links.1.md 1970-01-01 00:00:00.000000000 +0000 +++ bup-0.33.7/Documentation/bup-validate-ref-links.1.md 2025-01-08 20:04:11.000000000 +0000 @@ -0,0 +1,49 @@ +% bup-validate-ref-links(1) Bup %BUP_VERSION% +% Rob Browning +% %BUP_DATE% + +# NAME + +bup-validate-ref-links - check repository refs for links to missing objects + +# SYNOPSIS + +bup validate-ref-links [*ref*...] + +# DESCRIPTION + +`bup validate-ref-links` checks repository references (e.g. saves) for +commits or subtrees that refer to missing objects and reports the +paths to any found. If no *ref*s are provided, checks all refs, +otherwise only checks those specified. + +This command can also be used to validate a save more quickly than +attempting a `restore` or `join`ing the save to /dev/null, and much +more quickly for multiple related saves, though it only checks for the +existence of the leaf (blob) data, it does not attempt to read that +data. + +At the moment, the broken path information is only logged to standard +error, and is not well specified (i.e. suitable for inspection, but +not parsing). + +Also note that the current implementation may not report all paths to +a given missing object because it only examines each unique tree or +commit object once, no matter how often it appears within the refs +being examined. This means that in order to find every broken save, +you would need to run the command separately for each ref, which is +likely to be much more expensive than a combined run because it can't +skip subtrees that it has encountered before. + +# EXIT STATUS + +The exit status will be 1 if any broken links are found, 0 if none are +found, and some other positive integer for other failures. + +# SEE ALSO + +`bup-fsck`(1), `bup-join`(1), `bup-restore`(1) + +# BUP + +Part of the `bup`(1) suite. diff -Nru bup-0.33.2/Documentation/bup-web.1.md bup-0.33.7/Documentation/bup-web.1.md --- bup-0.33.2/Documentation/bup-web.1.md 1970-01-01 00:00:00.000000000 +0000 +++ bup-0.33.7/Documentation/bup-web.1.md 2025-01-08 20:04:11.000000000 +0000 @@ -0,0 +1,76 @@ +% bup-ftp(1) Bup %BUP_VERSION% +% Joe Beda +% %BUP_DATE% + +# NAME + +bup-web - Start web server to browse bup repositiory + +# SYNOPSIS + +bup web [[hostname]:port] + +bup web unix://path + +# DESCRIPTION + +`bup web` starts a web server that can browse bup repositories. The file +hierarchy is the same as that shown by `bup-fuse`(1), `bup-ls`(1) and +`bup-ftp`(1). + +`hostname` and `port` default to 127.0.0.1 and 8080, respectively, and hence +`bup web` will only offer up the web server to locally running clients. If +you'd like to expose the web server to anyone on your network (dangerous!) you +can omit the bind address to bind to all available interfaces: `:8080`. + +When `unix://path` is specified, the server will listen on the +filesystem socket at `path` rather than a network socket. + +A `SIGTERM` signal may be sent to the server to request an orderly +shutdown. + +# OPTIONS + +\--human-readable +: display human readable file sizes (i.e. 3.9K, 4.7M) + +\--browser +: open the site in the default browser + +# EXAMPLES + + $ bup web + Serving HTTP on 127.0.0.1:8080... + ^C + Interrupted. + + $ bup web :8080 + Serving HTTP on 0.0.0.0:8080... + ^C + Interrupted. + + $ bup web unix://socket & + Serving HTTP on filesystem socket 'socket' + $ curl --unix-socket ./socket http://localhost/ + $ fg + bup web unix://socket + ^C + Interrupted. + + $ bup web & + [1] 30980 + Serving HTTP on 127.0.0.1:8080... + $ kill -s TERM 30980 + Shutdown requested + $ wait 30980 + $ echo $? + 0 + +# SEE ALSO + +`bup-fuse`(1), `bup-ls`(1), `bup-ftp`(1), `bup-restore`(1), `kill`(1) + + +# BUP + +Part of the `bup`(1) suite. diff -Nru bup-0.33.2/Documentation/bup-web.md bup-0.33.7/Documentation/bup-web.md --- bup-0.33.2/Documentation/bup-web.md 2023-07-01 20:08:43.000000000 +0000 +++ bup-0.33.7/Documentation/bup-web.md 1970-01-01 00:00:00.000000000 +0000 @@ -1,76 +0,0 @@ -% bup-ftp(1) Bup %BUP_VERSION% -% Joe Beda -% %BUP_DATE% - -# NAME - -bup-web - Start web server to browse bup repositiory - -# SYNOPSIS - -bup web [[hostname]:port] - -bup web unix://path - -# DESCRIPTION - -`bup web` starts a web server that can browse bup repositories. The file -hierarchy is the same as that shown by `bup-fuse`(1), `bup-ls`(1) and -`bup-ftp`(1). - -`hostname` and `port` default to 127.0.0.1 and 8080, respectively, and hence -`bup web` will only offer up the web server to locally running clients. If -you'd like to expose the web server to anyone on your network (dangerous!) you -can omit the bind address to bind to all available interfaces: `:8080`. - -When `unix://path` is specified, the server will listen on the -filesystem socket at `path` rather than a network socket. - -A `SIGTERM` signal may be sent to the server to request an orderly -shutdown. - -# OPTIONS - -\--human-readable -: display human readable file sizes (i.e. 3.9K, 4.7M) - -\--browser -: open the site in the default browser - -# EXAMPLES - - $ bup web - Serving HTTP on 127.0.0.1:8080... - ^C - Interrupted. - - $ bup web :8080 - Serving HTTP on 0.0.0.0:8080... - ^C - Interrupted. - - $ bup web unix://socket & - Serving HTTP on filesystem socket 'socket' - $ curl --unix-socket ./socket http://localhost/ - $ fg - bup web unix://socket - ^C - Interrupted. - - $ bup web & - [1] 30980 - Serving HTTP on 127.0.0.1:8080... - $ kill -s TERM 30980 - Shutdown requested - $ wait 30980 - $ echo $? - 0 - -# SEE ALSO - -`bup-fuse`(1), `bup-ls`(1), `bup-ftp`(1), `bup-restore`(1), `kill`(1) - - -# BUP - -Part of the `bup`(1) suite. diff -Nru bup-0.33.2/Documentation/bup.1.md bup-0.33.7/Documentation/bup.1.md --- bup-0.33.2/Documentation/bup.1.md 1970-01-01 00:00:00.000000000 +0000 +++ bup-0.33.7/Documentation/bup.1.md 2025-01-08 20:04:11.000000000 +0000 @@ -0,0 +1,125 @@ +% bup(1) Bup %BUP_VERSION% +% Avery Pennarun +% %BUP_DATE% + +# NAME + +bup - Backup program using rolling checksums and git file formats + +# SYNOPSIS + +bup [global options...] \ [options...] + +# DESCRIPTION + +`bup` is a program for making backups of your files using +the git file format. + +Unlike `git`(1) itself, bup is +optimized for handling huge data sets including individual +very large files (such a virtual machine images). However, +once a backup set is created, it can still be accessed +using git tools. + +Subcommands are described in separate man pages. For example +`bup-init`(1) covers `bup init`. + +# GLOBAL OPTIONS + +\--version +: print bup's version number. Equivalent to `bup version`. + +-d, \--bup-dir=*BUP_DIR* +: use the given BUP_DIR parameter as the bup repository + location, instead of reading it from the $BUP_DIR + environment variable or using the default `~/.bup` + location. + + +# COMMONLY USED SUBCOMMANDS + +`bup-fsck`(1) +: Check backup sets for damage and add redundancy information + +`bup-ftp`(1) +: Browse backup sets using an ftp-like client + +`bup-fuse`(1) +: Mount your backup sets as a filesystem + +`bup-help`(1) +: Print detailed help for the given command + +`bup-index`(1) +: Create or display the index of files to back up + +`bup-on`(1) +: Backup a remote machine to the local one + +`bup-restore`(1) +: Extract files from a backup set + +`bup-save`(1) +: Save files into a backup set (note: run "bup index" first) + +`bup-web`(1) +: Launch a web server to examine backup sets + + +# RARELY USED SUBCOMMANDS + +`bup-damage`(1) +: Deliberately destroy data + +`bup-drecurse`(1) +: Recursively list files in your filesystem + +`bup-init`(1) +: Initialize a bup repository + +`bup-join`(1) +: Retrieve a file backed up using `bup-split`(1) + +`bup-ls`(1) +: Browse the files in your backup sets + +`bup-margin`(1) +: Determine how close your bup repository is to armageddon + +`bup-memtest`(1) +: Test bup memory usage statistics + +`bup-midx`(1) +: Index objects to speed up future backups + +`bup-newliner`(1) +: Make sure progress messages don't overlap with output + +`bup-random`(1) +: Generate a stream of random output + +`bup-server`(1) +: The server side of the bup client-server relationship + +`bup-split`(1) +: Split a single file into its own backup set + +`bup-tick`(1) +: Wait for up to one second. + +`bup-version`(1) +: Report the version number of your copy of bup. + + +# ENVIRONMENT + +`BUP_ASSUME_GIT_VERSION_IS_FINE` +: If set to `true`, `yes`, or `1`, assume the version of `git` + in the path is acceptable. + + +# SEE ALSO + +`git`(1) and the *README* file from the bup distribution. + +The home of bup is at . diff -Nru bup-0.33.2/Documentation/bup.md bup-0.33.7/Documentation/bup.md --- bup-0.33.2/Documentation/bup.md 2023-07-01 20:08:43.000000000 +0000 +++ bup-0.33.7/Documentation/bup.md 1970-01-01 00:00:00.000000000 +0000 @@ -1,126 +0,0 @@ -% bup(1) Bup %BUP_VERSION% -% Avery Pennarun -% %BUP_DATE% - -# NAME - -bup - Backup program using rolling checksums and git file formats - -# SYNOPSIS - -bup [global options...] \ [options...] - -# DESCRIPTION - -`bup` is a program for making backups of your files using -the git file format. - -Unlike `git`(1) itself, bup is -optimized for handling huge data sets including individual -very large files (such a virtual machine images). However, -once a backup set is created, it can still be accessed -using git tools. - -The individual bup subcommands appear in their own man -pages. - -# GLOBAL OPTIONS - -\--version -: print bup's version number. Equivalent to - `bup-version`(1) - --d, \--bup-dir=*BUP_DIR* -: use the given BUP_DIR parameter as the bup repository - location, instead of reading it from the $BUP_DIR - environment variable or using the default `~/.bup` - location. - - -# COMMONLY USED SUBCOMMANDS - -`bup-fsck`(1) -: Check backup sets for damage and add redundancy information - -`bup-ftp`(1) -: Browse backup sets using an ftp-like client - -`bup-fuse`(1) -: Mount your backup sets as a filesystem - -`bup-help`(1) -: Print detailed help for the given command - -`bup-index`(1) -: Create or display the index of files to back up - -`bup-on`(1) -: Backup a remote machine to the local one - -`bup-restore`(1) -: Extract files from a backup set - -`bup-save`(1) -: Save files into a backup set (note: run "bup index" first) - -`bup-web`(1) -: Launch a web server to examine backup sets - - -# RARELY USED SUBCOMMANDS - -`bup-damage`(1) -: Deliberately destroy data - -`bup-drecurse`(1) -: Recursively list files in your filesystem - -`bup-init`(1) -: Initialize a bup repository - -`bup-join`(1) -: Retrieve a file backed up using `bup-split`(1) - -`bup-ls`(1) -: Browse the files in your backup sets - -`bup-margin`(1) -: Determine how close your bup repository is to armageddon - -`bup-memtest`(1) -: Test bup memory usage statistics - -`bup-midx`(1) -: Index objects to speed up future backups - -`bup-newliner`(1) -: Make sure progress messages don't overlap with output - -`bup-random`(1) -: Generate a stream of random output - -`bup-server`(1) -: The server side of the bup client-server relationship - -`bup-split`(1) -: Split a single file into its own backup set - -`bup-tick`(1) -: Wait for up to one second. - -`bup-version`(1) -: Report the version number of your copy of bup. - - -# ENVIRONMENT - -`BUP_ASSUME_GIT_VERSION_IS_FINE` -: If set to `true`, `yes`, or `1`, assume the version of `git` - in the path is acceptable. - - -# SEE ALSO - -`git`(1) and the *README* file from the bup distribution. - -The home of bup is at . diff -Nru bup-0.33.2/GNUmakefile bup-0.33.7/GNUmakefile --- bup-0.33.2/GNUmakefile 2023-07-01 20:08:43.000000000 +0000 +++ bup-0.33.7/GNUmakefile 2025-01-08 20:04:11.000000000 +0000 @@ -96,23 +96,58 @@ bup_deps := lib/bup/_helpers$(soext) lib/cmd/bup -all: dev/bup-exec dev/bup-python dev/python $(bup_deps) Documentation/all \ - $(current_sampledata) +incomplete_saves_svg := \ + issue/missing-objects-fig-bloom-get.svg \ + issue/missing-objects-fig-bloom-set.svg \ + issue/missing-objects-fig-bup-model-2.svg \ + issue/missing-objects-fig-bup-model.svg \ + issue/missing-objects-fig-gc-dangling.svg \ + issue/missing-objects-fig-get-bug-save.svg \ + issue/missing-objects-fig-git-model.svg \ + issue/missing-objects-fig-rm-after-gc.svg \ + issue/missing-objects-fig-rm-after.svg \ + issue/missing-objects-fig-rm-before.svg +clean_paths += $(incomplete_saves_svg) + +issue/missing-objects.html: $(incomplete_saves_svg) + +issue/%.svg: issue/%.dot + $(DOT) -Tsvg $< > $@ + +issue/%.html: issue/%.md + $(PANDOC) -s --embed-resources --resource-path issue \ + -r markdown -w html -o $@ $< -$(current_sampledata): - dev/configure-sampledata --setup +issues := +man_md := +DOT ?= $(shell type -p dot) PANDOC ?= $(shell type -p pandoc) ifeq (,$(PANDOC)) - $(shell echo "Warning: pandoc not found; skipping manpage generation" 1>&2) - man_md := + $(info Warning: pandoc not found; skipping generation of related documents) else man_md := $(wildcard Documentation/*.md) + ifeq (,$(findstring --embed-resources,$(shell $(PANDOC) --help))) + $(info Warning: no pandoc --embed-resources; skipping generation of related documents) + else + ifeq (,$(DOT)) + $(info Warning: graphviz dot not found; skipping generation of related documents) + else + issues += issue/missing-objects.html + endif + endif endif -man_roff := $(patsubst %.md,%.1,$(man_md)) -man_html := $(patsubst %.md,%.html,$(man_md)) + +all: dev/bup-exec dev/bup-python dev/python $(bup_deps) Documentation/all \ + $(issues) $(current_sampledata) + +$(current_sampledata): + dev/configure-sampledata --setup + +man_roff := $(man_md:.md=) +man_html := $(man_md:.md=.html) INSTALL=install PREFIX=/usr/local @@ -129,8 +164,11 @@ install: all $(INSTALL) -d $(dest_bindir) $(dest_libdir)/bup/cmd $(dest_libdir)/cmd \ $(dest_libdir)/web/static - test -z "$(man_roff)" || install -d $(dest_mandir)/man1 - test -z "$(man_roff)" || $(INSTALL) -m 0644 $(man_roff) $(dest_mandir)/man1 + for f in $(man_roff); do \ + sec="$${f##*.}"; \ + $(INSTALL) -d $(dest_mandir)/man"$$sec"; \ + $(INSTALL) -m 0644 "$$f" $(dest_mandir)/man"$$sec"; \ + done test -z "$(man_html)" || install -d $(dest_docdir) test -z "$(man_html)" || $(INSTALL) -m 0644 $(man_html) $(dest_docdir) $(INSTALL) -pm 0755 lib/cmd/bup "$(dest_libdir)/cmd/bup" @@ -250,17 +288,20 @@ Documentation/substvars: $(bup_deps) # FIXME: real temp file set -e; bup_ver=$$(./bup version); \ - echo "s,%BUP_VERSION%,$$bup_ver,g" > $@.tmp; \ - echo "s,%BUP_DATE%,$$bup_ver,g" >> $@.tmp + echo "s,%BUP_VERSION%,$$bup_ver,g" > $@.tmp; + set -e; bup_date=$$(./bup version --date); \ + echo "s,%BUP_DATE%,$$bup_date,g" >> $@.tmp mv $@.tmp $@ -Documentation/%.1: Documentation/%.md Documentation/substvars - $(pf); sed -f Documentation/substvars $< \ - | "$(PANDOC)" -s -r markdown -w man -o $@ +define render_page + $(pf); sed -f Documentation/substvars $< \ + | "$(PANDOC)" -s -r markdown -w $(1) -o $(2) +endef +Documentation/%: Documentation/%.md Documentation/substvars + $(call render_page,man,$@) Documentation/%.html: Documentation/%.md Documentation/substvars - $(pf); sed -f Documentation/substvars $< \ - | "$(PANDOC)" -s -r markdown -w html -o $@ + $(call render_page,html,$@) .PHONY: Documentation/clean Documentation/clean: diff -Nru bup-0.33.2/README bup-0.33.7/README --- bup-0.33.2/README 2023-07-01 20:08:43.000000000 +0000 +++ bup-0.33.7/README 2025-01-08 20:04:11.000000000 +0000 @@ -86,9 +86,16 @@ Notable changes introduced by a release ======================================= + - Changes in 0.33.7 as compared to 0.33.6 + - Changes in 0.33.6 as compared to 0.33.5 + - Changes in 0.33.5 as compared to 0.33.4 + - Changes in 0.33.4 as compared to 0.33.3 + - Changes in 0.33.3 as compared to 0.33.2 - Changes in 0.33.2 as compared to 0.33.1 - Changes in 0.33.1 as compared to 0.33 - Changes in 0.33 as compared to 0.32 + - Changes in 0.32.2 as compared to 0.32.1 + - Changes in 0.32.1 as compared to 0.32 - Changes in 0.32 as compared to 0.31 - Changes in 0.31 as compared to 0.30.1 - Changes in 0.30.1 as compared to 0.30 @@ -126,7 +133,7 @@ bup, please check out the latest stable release like this: ```sh - git checkout 0.33.2 + git checkout 0.33.7 ``` You can see the latest stable release here: @@ -176,7 +183,15 @@ ```sh make ``` - + + At the moment the build treats compiler warnings as errors. If the + build fails as a result, try this: + + ```sh + CFLAGS=-Wno-error ./configure + make + ``` + - Run the tests: ```sh @@ -197,11 +212,12 @@ make -j check ``` - The tests should pass. If they don't pass for you, stop here and - send an email to bup-list@googlegroups.com. Though if there are - symbolic links along the current working directory path, the tests - may fail. Running something like this before "make test" should - sidestep the problem: + The tests should pass (with some skipped tests that weren't + applicable in your environment). If they don't pass for you, stop + here and send an email to bup-list@googlegroups.com. Though if + there are symbolic links along the current working directory path, + the tests may fail. Running something like this before "make + test" should sidestep the problem: ```sh cd "$(pwd -P)" @@ -238,19 +254,12 @@ Binary packages of bup are known to be built for the following OSes: - - Debian: - http://packages.debian.org/search?searchon=names&keywords=bup - - Ubuntu: - http://packages.ubuntu.com/search?searchon=names&keywords=bup - - pkgsrc (NetBSD, Dragonfly, and others) - http://pkgsrc.se/sysutils/bup - http://cvsweb.netbsd.org/bsdweb.cgi/pkgsrc/sysutils/bup/ - - Arch Linux: - https://www.archlinux.org/packages/?sort=&q=bup - - Fedora: - https://apps.fedoraproject.org/packages/bup - - macOS (Homebrew): - https://formulae.brew.sh/formula/bup + - [Debian](https://packages.debian.org/bup) + - [Ubuntu](https://packages.ubuntu.com/bup) + - [pkgsrc.se (NetBSD, Dragonfly, and others)](https://pkgsrc.se/sysutils/bup) + - [NetBSD](https://cvsweb.netbsd.org/bsdweb.cgi/pkgsrc/sysutils/bup/) + - [Arch Linux](https://www.archlinux.org/packages/?sort=&q=bup) + - [macOS (Homebrew)](https://formulae.brew.sh/formula/bup) Using bup diff -Nru bup-0.33.2/README.md bup-0.33.7/README.md --- bup-0.33.2/README.md 2023-07-01 20:08:43.000000000 +0000 +++ bup-0.33.7/README.md 2025-01-08 20:04:11.000000000 +0000 @@ -86,9 +86,16 @@ Notable changes introduced by a release ======================================= + - Changes in 0.33.7 as compared to 0.33.6 + - Changes in 0.33.6 as compared to 0.33.5 + - Changes in 0.33.5 as compared to 0.33.4 + - Changes in 0.33.4 as compared to 0.33.3 + - Changes in 0.33.3 as compared to 0.33.2 - Changes in 0.33.2 as compared to 0.33.1 - Changes in 0.33.1 as compared to 0.33 - Changes in 0.33 as compared to 0.32 + - Changes in 0.32.2 as compared to 0.32.1 + - Changes in 0.32.1 as compared to 0.32 - Changes in 0.32 as compared to 0.31 - Changes in 0.31 as compared to 0.30.1 - Changes in 0.30.1 as compared to 0.30 @@ -126,7 +133,7 @@ bup, please check out the latest stable release like this: ```sh - git checkout 0.33.2 + git checkout 0.33.7 ``` You can see the latest stable release here: @@ -176,7 +183,15 @@ ```sh make ``` - + + At the moment the build treats compiler warnings as errors. If the + build fails as a result, try this: + + ```sh + CFLAGS=-Wno-error ./configure + make + ``` + - Run the tests: ```sh @@ -197,11 +212,12 @@ make -j check ``` - The tests should pass. If they don't pass for you, stop here and - send an email to bup-list@googlegroups.com. Though if there are - symbolic links along the current working directory path, the tests - may fail. Running something like this before "make test" should - sidestep the problem: + The tests should pass (with some skipped tests that weren't + applicable in your environment). If they don't pass for you, stop + here and send an email to bup-list@googlegroups.com. Though if + there are symbolic links along the current working directory path, + the tests may fail. Running something like this before "make + test" should sidestep the problem: ```sh cd "$(pwd -P)" @@ -238,19 +254,12 @@ Binary packages of bup are known to be built for the following OSes: - - Debian: - http://packages.debian.org/search?searchon=names&keywords=bup - - Ubuntu: - http://packages.ubuntu.com/search?searchon=names&keywords=bup - - pkgsrc (NetBSD, Dragonfly, and others) - http://pkgsrc.se/sysutils/bup - http://cvsweb.netbsd.org/bsdweb.cgi/pkgsrc/sysutils/bup/ - - Arch Linux: - https://www.archlinux.org/packages/?sort=&q=bup - - Fedora: - https://apps.fedoraproject.org/packages/bup - - macOS (Homebrew): - https://formulae.brew.sh/formula/bup + - [Debian](https://packages.debian.org/bup) + - [Ubuntu](https://packages.ubuntu.com/bup) + - [pkgsrc.se (NetBSD, Dragonfly, and others)](https://pkgsrc.se/sysutils/bup) + - [NetBSD](https://cvsweb.netbsd.org/bsdweb.cgi/pkgsrc/sysutils/bup/) + - [Arch Linux](https://www.archlinux.org/packages/?sort=&q=bup) + - [macOS (Homebrew)](https://formulae.brew.sh/formula/bup) Using bup diff -Nru bup-0.33.2/config/configure.inc bup-0.33.7/config/configure.inc --- bup-0.33.2/config/configure.inc 2023-07-01 20:08:43.000000000 +0000 +++ bup-0.33.7/config/configure.inc 2025-01-08 20:04:11.000000000 +0000 @@ -304,7 +304,7 @@ for hdr in $*; do echo "#include <$hdr>" >> "$__ac_tmpdir/ngc$$.c" done - echo "main() { }" >> "$__ac_tmpdir/ngc$$.c" + echo "int main(int argc, char **argv) { return 0; }" >> "$__ac_tmpdir/ngc$$.c" LOGN "checking for header $hdr" @@ -327,20 +327,14 @@ AC_CHECK_FUNCS () { AC_PROG_CC -F=$1 -shift -rm -f "$__ac_tmpdir/ngc$$.c" - -while [ "$1" ]; do - echo "#include <$1>" >> "$__ac_tmpdir/ngc$$.c" + F="$1" shift -done - + rm -f "$__ac_tmpdir/ngc$$.c" cat >> "$__ac_tmpdir/ngc$$.c" << EOF -main() +char $F(void); +int main(int argc, char **argv) { - - $F(); + return $F(); } EOF @@ -379,9 +373,12 @@ done cat >> "$__ac_tmpdir/ngc$$.c" << EOF -main() +#include +int main(int argc, char **argv) { struct $struct foo; + printf("%p", (void *) &foo); + return 0; } EOF @@ -419,11 +416,13 @@ done cat >> "$__ac_tmpdir/ngc$$.c" << EOF -main() +#include +int main(int argc, char **argv) { struct $struct foo; - foo.$field; + printf("%p", (void *) &(foo.$field)); + return 0; } EOF @@ -450,9 +449,10 @@ cat > "$__ac_tmpdir/ngc$$.c" << \EOF #include -main() +int main(int argc, char **argv) { puts("hello, sailor"); + return 0; } EOF @@ -874,7 +874,11 @@ AC_TYPE_PID_T () { cat > "$__ac_tmpdir/pd$$.c" << EOF #include -main() { pid_t me; } +int main(int argc, char **argv) +{ + pid_t me; + return 0; +} EOF LOGN "checking for pid_t" @@ -920,7 +924,7 @@ AC_SCALAR_TYPES () { cat > "$__ac_tmpdir/pd$$.c" << EOF #include -main() +int main(int argc, char **argv) { unsigned long v_long; unsigned int v_int; @@ -1029,7 +1033,7 @@ #include #include -main() +int main(int argc, char **argv) { int x = open("$$.c", O_RDWR, 0666); int y = open("$$.c", O_RDWR, 0666); @@ -1080,7 +1084,7 @@ #include #include -main() +int main(int argc, char **argv) { char bfr[256]; @@ -1111,7 +1115,7 @@ #include #include -main() +int main(int argc, char **argv) { HEADER hhh; res_init(); diff -Nru bup-0.33.2/debian/changelog bup-0.33.7/debian/changelog --- bup-0.33.2/debian/changelog 2023-07-08 20:11:59.000000000 +0000 +++ bup-0.33.7/debian/changelog 2025-02-02 02:59:10.000000000 +0000 @@ -1,3 +1,62 @@ +bup (0.33.7-1~deb12u1) bookworm; urgency=medium + + * Upstream versions 0.33.3, 0.33.4, 0.33.5, 0.33.6, and 0.33.7 from bup's + stable update branch with various fixes for bugs that affect bookworm. + + -- Robert Edmonds Sat, 01 Feb 2025 21:59:10 -0500 + +bup (0.33.7-1) unstable; urgency=medium + + * New upstream versions 0.33.6, 0.33.7 + * debian/docs: Include upstream release note '0.33.6-from-0.33.5.md' + * debian/docs: Include upstream release note '0.33.7-from-0.33.6.md' + + -- Robert Edmonds Sat, 11 Jan 2025 16:41:56 -0500 + +bup (0.33.6~git20241212-1) unstable; urgency=medium + + [ Rob Browning ] + * debian/rules: Generate checkout info prior to dh_auto_configure + + [ Robert Edmonds ] + * New upstream git snapshot of commit 3cd11d3840b0714820ca5fc540115a339d1ee7fa + due to call for testing + * debian/docs: Include upstream release note '0.33.5-from-0.33.4.md' + * Revert "debian/rules: Relocate bup-config.5 to the correct manpage + directory" (fixed upstream) + + -- Robert Edmonds Sat, 14 Dec 2024 22:24:03 -0500 + +bup (0.33.5-1) unstable; urgency=medium + + * New upstream version 0.33.5 + - Bug fixes for "three issues that could cause a repository to end up + with trees that had dangling references, i.e. missing files, parts of + files, subtrees, etc." if the "bup gc" command is utilized. + (Closes: #1086123) + * debian/rules: Relocate bup-config.5 to the correct manpage directory + + -- Robert Edmonds Wed, 11 Dec 2024 23:38:00 -0500 + +bup (0.33.4-1) unstable; urgency=medium + + * New upstream version 0.33.4 + - "configure.inc: fix main() prototype and other (now) invalid test code" + (Closes: #1074858) + * debian/docs: Include upstream release note '0.33.4-from-0.33.3.md' + + -- Robert Edmonds Sun, 29 Sep 2024 15:41:36 -0400 + +bup (0.33.3-1) unstable; urgency=medium + + * New upstream version 0.33.3 + - "fuse: don't filter output when backgrounding" (Closes: #1050040) + - "test-help: fix dependency on C locale with current pandoc" (Closes: + #1042250) + * debian/docs: Include upstream release note '0.33.3-from-0.33.2.md' + + -- Robert Edmonds Sat, 27 Jan 2024 00:12:36 -0500 + bup (0.33.2-1~deb12u1) bookworm; urgency=medium * Upstream version 0.33.2, with a fix for a problem that can cause POSIX.1e diff -Nru bup-0.33.2/debian/docs bup-0.33.7/debian/docs --- bup-0.33.2/debian/docs 2023-07-08 20:11:59.000000000 +0000 +++ bup-0.33.7/debian/docs 2025-02-02 02:59:10.000000000 +0000 @@ -2,3 +2,8 @@ README.md note/0.33.1-from-0.33.md note/0.33.2-from-0.33.1.md +note/0.33.3-from-0.33.2.md +note/0.33.4-from-0.33.3.md +note/0.33.5-from-0.33.4.md +note/0.33.6-from-0.33.5.md +note/0.33.7-from-0.33.6.md diff -Nru bup-0.33.2/debian/patches/debian-changes bup-0.33.7/debian/patches/debian-changes --- bup-0.33.2/debian/patches/debian-changes 2023-07-08 20:11:59.000000000 +0000 +++ bup-0.33.7/debian/patches/debian-changes 2025-02-02 02:59:10.000000000 +0000 @@ -1,10 +1,12 @@ -This is an autogenerated patch header for a single-debian-patch file. The -delta against upstream is either kept as a single patch, or maintained -in some VCS, and exported as a single patch instead of more manageable -atomic patches. +Description: Autogenerated patch header for a single-debian-patch file. + The delta against upstream is either kept as a single patch, or maintained + in some VCS, and exported as a single patch instead of more manageable + atomic patches. +Forwarded: not-needed ---- bup-0.33.2.orig/GNUmakefile -+++ bup-0.33.2/GNUmakefile +--- +--- bup-0.33.7.orig/GNUmakefile ++++ bup-0.33.7/GNUmakefile @@ -61,7 +61,7 @@ else test_tmp := $(CURDIR)/test/tmp endif @@ -14,20 +16,20 @@ initial_setup := $(call shout,$(initial_setup),update-checkout-info failed)) clean_paths += lib/bup/checkout_info.py -@@ -96,7 +96,7 @@ bup_ext_cmds := lib/cmd/bup-import-rdiff +@@ -140,7 +140,7 @@ else + endif - bup_deps := lib/bup/_helpers$(soext) lib/cmd/bup -all: dev/bup-exec dev/bup-python dev/python $(bup_deps) Documentation/all \ +all: dev/bup-exec dev/bup-python dev/python $(bup_deps) \ - $(current_sampledata) + $(issues) $(current_sampledata) $(current_sampledata): ---- bup-0.33.2.orig/lib/bup/source_info.py -+++ bup-0.33.2/lib/bup/source_info.py +--- bup-0.33.7.orig/lib/bup/source_info.py ++++ bup-0.33.7/lib/bup/source_info.py @@ -1,3 +1,3 @@ --commit='b1c19fb0142ac4bdc60b07640734e0c2d256f423' --date='2023-07-01 15:08:43 -0500' -+commit='db4734ba24249fee8060a186e03e6173ce2e5d55' -+date='2023-07-08 16:12:37 -0400' +-commit='8ba4ff35130be0114e689bb6b745499bdd5299c9' +-date='2025-01-08 14:04:11 -0600' ++commit='240588bd6b0bd86ec2c6f02e6a2dabe7f63eb0e5' ++date='2025-02-01 21:59:21 -0500' modified=False diff -Nru bup-0.33.2/debian/rules bup-0.33.7/debian/rules --- bup-0.33.2/debian/rules 2023-07-08 20:11:59.000000000 +0000 +++ bup-0.33.7/debian/rules 2025-02-02 02:59:10.000000000 +0000 @@ -7,12 +7,14 @@ %: dh $@ --with python3 -override_dh_auto_configure: +lib/bup/checkout_info.py: debian/gen_checkout_info.sh + debian/gen_checkout_info.sh > $@ + +override_dh_auto_configure: lib/bup/checkout_info.py ./configure override_dh_auto_build-indep: $(MAKE) Documentation/all override_dh_auto_install: - $(CURDIR)/debian/gen_checkout_info.sh > $(CURDIR)/lib/bup/checkout_info.py $(MAKE) PREFIX="/usr" DESTDIR="$(CURDIR)/debian/tmp" install diff -Nru bup-0.33.2/dev/cleanup-mounts-under bup-0.33.7/dev/cleanup-mounts-under --- bup-0.33.2/dev/cleanup-mounts-under 2023-07-01 20:08:43.000000000 +0000 +++ bup-0.33.7/dev/cleanup-mounts-under 2025-01-08 20:04:11.000000000 +0000 @@ -4,6 +4,8 @@ set -e for python in \ python3 \ + python3.12 \ + python3.11 \ python3.10 \ python3.9 \ python3.8 \ diff -Nru bup-0.33.2/dev/groups bup-0.33.7/dev/groups --- bup-0.33.2/dev/groups 1970-01-01 00:00:00.000000000 +0000 +++ bup-0.33.7/dev/groups 2025-01-08 20:04:11.000000000 +0000 @@ -0,0 +1,15 @@ +#!/bin/sh +"""": # -*-python-*- +python="$(dirname "$0")/python" || exit $? +exec "$python" "$0" ${1+"$@"} +""" + +import grp, os + +# cf. helpers.getgroups() +egid = os.getegid() +gids = os.getgroups() +if egid not in gids: + gids.append(egid) + +print(*[grp.getgrgid(x).gr_name for x in gids]) diff -Nru bup-0.33.2/dev/lib.sh bup-0.33.7/dev/lib.sh --- bup-0.33.2/dev/lib.sh 2023-07-01 20:08:43.000000000 +0000 +++ bup-0.33.7/dev/lib.sh 2025-01-08 20:04:11.000000000 +0000 @@ -20,33 +20,18 @@ || return $? } -current-filesystem() -{ - local kernel="$(uname -s)" || return $? - case "$kernel" in - NetBSD) - df -G . | sed -En 's/.* ([^ ]*) fstype.*/\1/p' - ;; - SunOS) - df -g . | sed -En 's/.* ([^ ]*) fstype.*/\1/p' - ;; - *) - df -T . | awk 'END{print $2}' - esac -} - path-filesystems() ( # Return filesystem for each dir from $1 to /. # Perhaps for /foo/bar, "ext4\next4\nbtrfs\n". test "$#" -eq 1 || exit $? cd "$1" || exit $? - current-filesystem || exit $? + "$bup_dev_lib_top/dev/path-fs" . || exit $? dir="$(pwd)" || exit $? while test "$dir" != /; do cd .. || exit $? dir="$(pwd)" || exit $? - current-filesystem || exit $? + "$bup_dev_lib_top/dev/path-fs" . || exit $? done exit 0 ) diff -Nru bup-0.33.2/dev/path-fs bup-0.33.7/dev/path-fs --- bup-0.33.2/dev/path-fs 1970-01-01 00:00:00.000000000 +0000 +++ bup-0.33.7/dev/path-fs 2025-01-08 20:04:11.000000000 +0000 @@ -0,0 +1,20 @@ +#!/usr/bin/env bash + +set -ueo pipefail + +kernel="$(uname -s)" +case "$kernel" in + NetBSD) + fs() { df -G "$1" | sed -En 's/.* ([^ ]*) fstype.*/\1/p'; } + ;; + SunOS) + fs() { df -g "$1" | sed -En 's/.* ([^ ]*) fstype.*/\1/p'; } + ;; + *) + fs() { df -T "$1" | awk 'END{print $2}'; } +esac + +while test $# -ne 0; do + fs "$1" + shift +done diff -Nru bup-0.33.2/dev/perforate-repo bup-0.33.7/dev/perforate-repo --- bup-0.33.2/dev/perforate-repo 1970-01-01 00:00:00.000000000 +0000 +++ bup-0.33.7/dev/perforate-repo 2025-01-08 20:04:11.000000000 +0000 @@ -0,0 +1,92 @@ +#!/bin/sh +"""": # -*-python-*- +bup_python="$(dirname "$0")/bup-python" || exit $? +exec "$bup_python" "$0" ${1+"$@"} +""" + +from argparse import ArgumentParser +from glob import glob +from os import fsencode, unlink +from os.path import splitext +from subprocess import PIPE, run +from sys import stderr, stdin +from tempfile import TemporaryDirectory +import os, re, sys + +# Currently does nothing bup-specific, i.e. it's up to the caller to +# reset midx, bloom, etc. if that's relevant. + +script_dir = os.path.dirname(os.path.realpath(__file__)) +bup = os.path.realpath(f'{script_dir}/../bup') + +def runc(*args, **kwargs): + kwargs['check'] = True + return run(*args, **kwargs) + +def log(*args, **kwargs): + if 'file' not in kwargs: + kwargs['file'] = stderr + if 'end' not in kwargs: + kwargs['end'] = '' + print(*args, **kwargs) + +def logn(*args, **kwargs): + kwargs['end'] = '\n' + log(*args, **kwargs) + +def all_objects(repo): + git = runc(('git', '--git-dir', repo, 'cat-file', '--batch-all-objects', + f'--batch-check=%(objectname) %(objecttype)'), + stdout=PIPE) + for line in git.stdout.splitlines(): + yield line.split() + +parser = ArgumentParser() +add_arg = parser.add_argument +add_arg('--drop-oids', action='store_true', + help='drop (hex ASCII) oids given on standard input') +add_arg('repo', metavar='', nargs=1, help='victim repository') +opt = parser.parse_args() + +repo = opt.repo[0] + +if not opt.drop_oids: + logn(f'error: --drop-oids is currently required') + sys.exit(2) + +packdir = f'{repo}/objects/pack' +if not os.path.isdir(packdir): + logn(f'error: {repo!r} does not appear to be a git repository') + sys.exit(2) + +objs = list(all_objects(repo)) +packs = glob(f'{packdir}/*.pack') + +with TemporaryDirectory(prefix='perforate-', dir=repo) as tmpdir: + tmprepo = f'{tmpdir}/repo' + runc((bup, '-d', tmprepo, 'init')) + for pack in packs: + with open(pack, 'rb') as pf: + runc(('git', '--git-dir', tmprepo, 'unpack-objects'), stdin=pf) + + newobjs = list(all_objects(tmprepo)) + if not set(x[0] for x in newobjs) == set(x[0] for x in objs): + logn(f'error: does the source repo have loose objects?') + sys.exit(2) + + # For now assumes only one subdir level, and that locale decoding is fine + oid_rx = re.compile(b'[0-9A-Za-z]{40}') + for oidx in stdin.buffer: + oidx = oidx.rstrip() + if not oid_rx.fullmatch(oidx): + logn(f'error: oid {oidx} is not a 40-char hash') + unlink(b'%s/objects/%s/%s' % (fsencode(tmprepo), oidx[:2], oidx[2:])) + + for pack in packs: + stem, _ = splitext(pack) + for pack_related in glob(f'{stem}.*'): + unlink(pack_related) + + new_oids = (x[0] for x in all_objects(tmprepo)) + runc(('git', '--git-dir', tmprepo, 'pack-objects', f'{packdir}/pack'), + input=b'\n'.join(new_oids)) diff -Nru bup-0.33.2/dev/prep-for-freebsd-build bup-0.33.7/dev/prep-for-freebsd-build --- bup-0.33.2/dev/prep-for-freebsd-build 2023-07-01 20:08:43.000000000 +0000 +++ bup-0.33.7/dev/prep-for-freebsd-build 2025-01-08 20:04:11.000000000 +0000 @@ -21,7 +21,7 @@ case "$pyver" in python3) - pkgs="$pkgs python39 py39-tornado py39-pytest py39-pytest-xdist" + pkgs="$pkgs python311 py311-tornado py311-pytest py311-pytest-xdist" pkg install $pkgs ;; *) diff -Nru bup-0.33.2/dev/prep-for-macos-build bup-0.33.7/dev/prep-for-macos-build --- bup-0.33.2/dev/prep-for-macos-build 2023-07-01 20:08:43.000000000 +0000 +++ bup-0.33.7/dev/prep-for-macos-build 2025-01-08 20:04:11.000000000 +0000 @@ -10,7 +10,8 @@ pyver="${1:-python3}" if ! command -v brew; then - /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)" + /usr/bin/env \ + bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)" fi brew install par2 readline rsync pkg-config md5sha1sum @@ -23,7 +24,7 @@ case "$pyver" in python3) brew install python - pip3 install --user pytest pytest-xdist + python3 -m pip install --break-system-packages --user pytest pytest-xdist ;; *) usage 1>&2 diff -Nru bup-0.33.2/dev/root-status bup-0.33.7/dev/root-status --- bup-0.33.2/dev/root-status 2023-07-01 20:08:43.000000000 +0000 +++ bup-0.33.7/dev/root-status 2025-01-08 20:04:11.000000000 +0000 @@ -4,11 +4,14 @@ exec "$python" "$0" ${1+"$@"} """ -from __future__ import absolute_import, print_function import os, sys if sys.platform.startswith('cygwin'): + # see helpers.getgroups() + egid = os.getegid() groups = os.getgroups() + if egid not in groups: + groups.append(egid) if 544 in groups or 0 in groups: print('root') else: diff -Nru bup-0.33.2/dev/sparse-size bup-0.33.7/dev/sparse-size --- bup-0.33.2/dev/sparse-size 1970-01-01 00:00:00.000000000 +0000 +++ bup-0.33.7/dev/sparse-size 2025-01-08 20:04:11.000000000 +0000 @@ -0,0 +1,85 @@ +#!/bin/sh +"""": # -*-python-*- +bup_python="$(dirname "$0")/bup-python" || exit $? +exec "$bup_python" "$0" ${1+"$@"} +""" + +from argparse import ArgumentParser +from os.path import abspath, dirname, getsize +from sys import argv, stderr + +def log(*args, **kwargs): + if 'file' not in kwargs: + kwargs['file'] = stderr + if 'end' not in kwargs: + kwargs['end'] = '' + print(*args, **kwargs) + +try: + from os import SEEK_DATA, SEEK_HOLE + have_seek_data_hole = True + log('sparse-size: relying on SEEK_DATA SEEK_HOLE\n') +except ImportError: + log('sparse-size: no SEEK_DATA SEEK_HOLE, using du\n') + have_seek_data_hole = False +if have_seek_data_hole: + from errno import ENXIO + from os import SEEK_CUR, SEEK_DATA, SEEK_HOLE, lseek +else: + from os import environb + from subprocess import PIPE, run + from time import sleep + + +parser = ArgumentParser() +parser.add_argument('-v', dest='verbose', action='count', default=0, + help='increase diagnostics (may be repeated)') +parser.add_argument('path', metavar='') +opt = parser.parse_args() + +if not have_seek_data_hole: + # The allocation data (e.g. du) for some filesystems like btrfs + # and zfs may not settle until the next internal sync which runs + # every 5s. For now, just wait unconditionally. + # cf. https://github.com/openzfs/zfs/issues/2134 + path_fs = abspath(dirname(argv[0])) + '/path-fs' + fs = run((path_fs, opt.path), stdout=PIPE, check=True).stdout.rstrip() + if fs in (b'btrfs', b'zfs'): + sleep(5.1) + cmd = 'du', '-s', opt.path + env = environb.copy() + # POSIX says 512 should be the default, but for at least gnu and + # netbsd du, it isn't. This should cover both. + env[b'BLOCKSIZE'] = b'512' + p = run(cmd, stdout=PIPE, check=True, env=env) + data_size = int(p.stdout.split(maxsplit=1)[0]) * 512 + total_size = getsize(opt.path) + if data_size > total_size: + print(0) + else: + print(total_size - data_size) + exit(0) + +end = getsize(opt.path) +sparse = 0 +with open(opt.path, 'rb') as f: + fd = f.fileno() + off = 0 + while True: + new_off = lseek(fd, off, SEEK_HOLE) + if new_off == end: + break + off = new_off + try: + new_off = lseek(fd, off, SEEK_DATA) + except OSError as ex: + if ex.errno == ENXIO: + if opt.verbose: + log(f'hole: {end - off} @ {off}\n') + sparse += end - off + break + if opt.verbose: + log(f'hole: {new_off - off} @ {off}\n') + sparse += new_off - off + off = new_off +print(sparse) diff -Nru bup-0.33.2/dev/sparse-test-data bup-0.33.7/dev/sparse-test-data --- bup-0.33.2/dev/sparse-test-data 2023-07-01 20:08:43.000000000 +0000 +++ bup-0.33.7/dev/sparse-test-data 2025-01-08 20:04:11.000000000 +0000 @@ -4,7 +4,6 @@ exec "$bup_exec" "$0" ${1+"$@"} """ -from __future__ import absolute_import, print_function from random import randint from sys import stderr, stdout import os, sys @@ -92,8 +91,8 @@ print('offsets:', sparse_offsets, file=stderr) for offset in sparse_offsets: count = offset - pos - print('write:', 'x' if data == 'x' else '0', count, file=stderr) - out.write(data * (offset - pos)) + print('write:', 'x' if data == b'x' else '0', count, file=stderr) + out.write(data * count) pos += count data = b'\0' if data == b'x' else b'x' diff -Nru bup-0.33.2/dev/update-checkout-info bup-0.33.7/dev/update-checkout-info --- bup-0.33.2/dev/update-checkout-info 2023-07-01 20:08:43.000000000 +0000 +++ bup-0.33.7/dev/update-checkout-info 2025-01-08 20:04:11.000000000 +0000 @@ -17,10 +17,8 @@ exit 1 fi -git_top=$(git rev-parse --show-toplevel) || true -if test "$git_top" != "$top"; then - # Not a checkout, or perhaps we're building from an archive dir - # unpacked somewhere in the source tree. +if ! test -e .git; then + # Not building from a git tree rm -f "$dest" exit 0 fi diff -Nru bup-0.33.2/dev/update-doc-branches bup-0.33.7/dev/update-doc-branches --- bup-0.33.2/dev/update-doc-branches 2023-07-01 20:08:43.000000000 +0000 +++ bup-0.33.7/dev/update-doc-branches 2025-01-08 20:04:11.000000000 +0000 @@ -12,7 +12,11 @@ man_ref="$1" html_ref="$2" -git diff-index --quiet HEAD -- # no uncommitted changes +if ! git diff-index --quiet HEAD --; then + echo 'error: uncommitted changes' + exit 2 +fi + git rev-parse --verify "$man_ref" git rev-parse --verify "$html_ref" echo "$man_ref" | grep -qE '^refs/heads' @@ -31,7 +35,7 @@ base="$(basename "$f" .md)" if test "$fmt" = man; then ref="$man_ref" - GIT_INDEX_FILE="$tmpidx" git add -f "Documentation/$base.1" + GIT_INDEX_FILE="$tmpidx" git add -f "Documentation/$base" else ref="$html_ref" GIT_INDEX_FILE="$tmpidx" git add -f "Documentation/$base.html" diff -Nru bup-0.33.2/issue/missing-objects-fig-bloom-get.dot bup-0.33.7/issue/missing-objects-fig-bloom-get.dot --- bup-0.33.2/issue/missing-objects-fig-bloom-get.dot 1970-01-01 00:00:00.000000000 +0000 +++ bup-0.33.7/issue/missing-objects-fig-bloom-get.dot 2025-01-08 20:04:11.000000000 +0000 @@ -0,0 +1,23 @@ + +digraph "bloom get" { + rankdir=RL + bgcolor="transparent"; + ranksep=1; // This affects h* -> bloom distance + node [shape=box]; + + bloom2 [shape=plain; label=< + + + + + + + + +
bit
0
1
2
3
4
...
>]; + + bloom2:b1 -> etc:e + bloom2:b4 -> etc:e + + etc [label="etc/ (tree ee...)"]; +} diff -Nru bup-0.33.2/issue/missing-objects-fig-bloom-set.dot bup-0.33.7/issue/missing-objects-fig-bloom-set.dot --- bup-0.33.2/issue/missing-objects-fig-bloom-set.dot 1970-01-01 00:00:00.000000000 +0000 +++ bup-0.33.7/issue/missing-objects-fig-bloom-set.dot 2025-01-08 20:04:11.000000000 +0000 @@ -0,0 +1,27 @@ + +digraph "bloom set" { + rankdir=LR; + // splines=ortho; + bgcolor="transparent"; + ranksep=2; // This affects h* -> bloom distance + node [shape=box]; + + h1:e -> bloom2:b1:w + h1:e -> bloom2:b3:w + h2:e -> bloom2:b3:w + h2:e -> bloom2:b4:w + + bloom2 [shape=plain; label=< + + + + + + + + +
bit
0
1
2
3
4
...
>]; + + h1 [label="something (blob ae0...)"]; + h2 [label="another (tree 273...)"]; +} diff -Nru bup-0.33.2/issue/missing-objects-fig-bup-model-2.dot bup-0.33.7/issue/missing-objects-fig-bup-model-2.dot --- bup-0.33.2/issue/missing-objects-fig-bup-model-2.dot 1970-01-01 00:00:00.000000000 +0000 +++ bup-0.33.7/issue/missing-objects-fig-bup-model-2.dot 2025-01-08 20:04:11.000000000 +0000 @@ -0,0 +1,18 @@ + +digraph "bup model 2" { + bgcolor="transparent"; + node [shape=box]; + + above -> large + large -> part1 + large -> sub + sub -> part2 + sub -> part3 + + above [label="..."]; + large [label="bup-0.35.tar (tree ff...)"]; + sub [label="046 (tree a0...)"]; + part1 [label="000 (blob d9...)"]; + part2 [label="000 (blob 12...)"]; + part3 [label="1cf (blob ee...)"]; +} diff -Nru bup-0.33.2/issue/missing-objects-fig-bup-model.dot bup-0.33.7/issue/missing-objects-fig-bup-model.dot --- bup-0.33.2/issue/missing-objects-fig-bup-model.dot 1970-01-01 00:00:00.000000000 +0000 +++ bup-0.33.7/issue/missing-objects-fig-bup-model.dot 2025-01-08 20:04:11.000000000 +0000 @@ -0,0 +1,22 @@ + +digraph "bup model" { + bgcolor="transparent"; + node [shape=box]; + + above -> bup + bup -> bashrc + bup -> gitconfig + bup -> large + large -> part1 + large -> part2 + large -> part3 + + above [label="..."]; + bup [label="bup/ (tree 88...)"]; + bashrc [label="..."]; + gitconfig [label="..."]; + large [label="bup-0.35.tar (tree 11...)"]; + part1 [label="0000 (blob 0f...)"]; + part2 [label="0879 (blob 91...)"]; + part3 [label="1c10 (blob 63...)"]; +} diff -Nru bup-0.33.2/issue/missing-objects-fig-gc-dangling.dot bup-0.33.7/issue/missing-objects-fig-gc-dangling.dot --- bup-0.33.2/issue/missing-objects-fig-gc-dangling.dot 1970-01-01 00:00:00.000000000 +0000 +++ bup-0.33.7/issue/missing-objects-fig-gc-dangling.dot 2025-01-08 20:04:11.000000000 +0000 @@ -0,0 +1,28 @@ + +digraph "dangling after gc" { + bgcolor="transparent"; + node [shape=box]; + + saves -> save3 [style=dotted] + save3 -> save1 + save3 -> root3 -> dot3 + save1 -> root1 -> dot1 + + etc -> fstab + etc -> hosts + + // Just pushes etc below the saves + dot1 -> etc [dir=none; style=invisible] + + save1 [label="2024-09... (a0...)"]; + save3 [label="2024-11... (c1...)"]; + root1 [label="/"]; + root3 [label="/"]; + dot1 [label="..."]; + dot3 [label="..."]; + etc [label="etc/ (tree ee...)"]; + fstab [label="fstab (blob f9...)"]; + hosts [label="hosts (blob 76...)"; style=invisible]; + + { rank=same; save1, save3 } +} diff -Nru bup-0.33.2/issue/missing-objects-fig-get-bug-save.dot bup-0.33.7/issue/missing-objects-fig-get-bug-save.dot --- bup-0.33.2/issue/missing-objects-fig-get-bug-save.dot 1970-01-01 00:00:00.000000000 +0000 +++ bup-0.33.7/issue/missing-objects-fig-get-bug-save.dot 2025-01-08 20:04:11.000000000 +0000 @@ -0,0 +1,16 @@ + +digraph "bup get bug save" { + bgcolor="transparent"; + node [shape=box]; + + root -> dir1; + dir1 -> dir2; + dir2 -> file2; + dir2 -> file1; + + root [label="c-1 (commit)"]; + dir1 [label="/ (tree)"]; + dir2 [label="etc/ (tree)"]; + file2 [label="fstab (blob)"]; + file1 [label="hosts (blob)"]; +} \ No newline at end of file diff -Nru bup-0.33.2/issue/missing-objects-fig-git-model.dot bup-0.33.7/issue/missing-objects-fig-git-model.dot --- bup-0.33.2/issue/missing-objects-fig-git-model.dot 1970-01-01 00:00:00.000000000 +0000 +++ bup-0.33.7/issue/missing-objects-fig-git-model.dot 2025-01-08 20:04:11.000000000 +0000 @@ -0,0 +1,18 @@ + +digraph "git model" { + bgcolor="transparent"; + node [shape=box]; + + commit -> root + root -> home + home -> bup + bup -> bashrc + bup -> gitconfig + + commit [label="c-1 (commit ae...)"]; + root [label="/ (tree 90...)"]; + home [label="home/ (tree 1f...)"]; + bup [label="bup/ (tree 88...)"]; + bashrc [label=".bashrc (blob b9...)"]; + gitconfig [label=".gitconfig (blob a1...)"]; +} diff -Nru bup-0.33.2/issue/missing-objects-fig-rm-after-gc.dot bup-0.33.7/issue/missing-objects-fig-rm-after-gc.dot --- bup-0.33.2/issue/missing-objects-fig-rm-after-gc.dot 1970-01-01 00:00:00.000000000 +0000 +++ bup-0.33.7/issue/missing-objects-fig-rm-after-gc.dot 2025-01-08 20:04:11.000000000 +0000 @@ -0,0 +1,19 @@ + +digraph "after rm and gc" { + bgcolor="transparent"; + node [shape=box]; + + saves -> save3 [style=dotted] + save3 -> save1 + save3 -> root3 -> dot3 + save1 -> root1 -> dot1 + + save1 [label="2024-09... (a0...)"]; + save3 [label="2024-11... (c1...)"]; + root1 [label="/"]; + root3 [label="/"]; + dot1 [label="..."]; + dot3 [label="..."]; + + { rank=same; save1, save3 } +} diff -Nru bup-0.33.2/issue/missing-objects-fig-rm-after.dot bup-0.33.7/issue/missing-objects-fig-rm-after.dot --- bup-0.33.2/issue/missing-objects-fig-rm-after.dot 1970-01-01 00:00:00.000000000 +0000 +++ bup-0.33.7/issue/missing-objects-fig-rm-after.dot 2025-01-08 20:04:11.000000000 +0000 @@ -0,0 +1,32 @@ + +digraph "after rm" { + bgcolor="transparent"; + node [shape=box]; + + saves -> save3 [style=dotted] + save3 -> save1 + save3 -> root3 -> dot3 + save1 -> root1 -> dot1 + + save3orig -> save2 -> save1 + save3orig -> root3 + save2 -> root2 + root2 -> etc + etc -> fstab + etc -> hosts + + save1 [label="2024-09... (a0...)"]; + save2 [label="2024-10... (b0...)"; style=dotted]; + save3 [label="2024-11... (c1...)"]; + save3orig [label="2024-11... (c0...)"; style=dotted]; + root1 [label="/"]; + root2 [label="/"; style=dotted]; + root3 [label="/"]; + dot1 [label="..."]; + dot3 [label="..."]; + etc [label="etc/ (tree ee...)"; style=dotted]; + fstab [label="fstab (blob f9...)"; style=dotted]; + hosts [label="hosts (blob 76...)"; style=dotted]; + + { rank=same; save1, save2, save3, save3orig } +} diff -Nru bup-0.33.2/issue/missing-objects-fig-rm-before.dot bup-0.33.7/issue/missing-objects-fig-rm-before.dot --- bup-0.33.2/issue/missing-objects-fig-rm-before.dot 1970-01-01 00:00:00.000000000 +0000 +++ bup-0.33.7/issue/missing-objects-fig-rm-before.dot 2025-01-08 20:04:11.000000000 +0000 @@ -0,0 +1,28 @@ + +digraph "before rm" { + bgcolor="transparent"; + node [shape=box]; + + saves -> save3 [style=dotted] + save3 -> save2 -> save1 + save3 -> root3 -> dot3 + save2 -> root2 + root2 -> etc + etc -> fstab + etc -> hosts + save1 -> root1 -> dot1 + + save1 [label="2024-09... (a0...)"]; + save2 [label="2024-10... (b0...)"]; + save3 [label="2024-11... (c0...)"]; + root1 [label="/"]; + root2 [label="/"]; + root3 [label="/"]; + dot1 [label="..."]; + dot3 [label="..."]; + etc [label="etc/ (tree ee...)"]; + fstab [label="fstab (blob f9...)"]; + hosts [label="hosts (blob 76..."]; + + { rank=same; save1, save2, save3 } +} diff -Nru bup-0.33.2/issue/missing-objects.md bup-0.33.7/issue/missing-objects.md --- bup-0.33.2/issue/missing-objects.md 1970-01-01 00:00:00.000000000 +0000 +++ bup-0.33.7/issue/missing-objects.md 2025-01-08 20:04:11.000000000 +0000 @@ -0,0 +1,287 @@ +--- +title: How bup (before 0.33.5) might create incomplete trees +--- + +Versions of bup before 0.33.5 had three issues that could cause a +repository to end up with trees that had dangling references, +i.e. missing files, parts of files, subtrees, etc. This document +describes those issues in greater detail. + +## Background: git storage model + +In git, we have the following structure for an individual commit with +directories and files, labeled as "name (git-type hash)": + +
![](missing-objects-fig-git-model.svg)
+ +bup extends this model such that large files may be split into their +own subtrees during deduplication: + +
![](missing-objects-fig-bup-model.svg)
+ +Files can also be split into multiple levels: + +
![](missing-objects-fig-bup-model-2.svg)
+ +There are some more details, but the git model holds overall. Commits +refer to their parent commits and a single tree, and trees refer to +their children (blobs and trees) -- and of course each object is +identified by its content hash. + +> Note: For the sake of simplicity I'm drawing everything as trees in +> this document. In reality, the bup deduplication works exactly +> because it is _not_ a tree, but rather a directed acyclic graph +> (DAG). Multiple backup commits that record unchanged or otherwise +> identical directories or files obviously point to the object(s) +> representing those, shared across them. + +## How `bup save` operates + +When reading files and directories from the filesystem, `bup save` +will create a number of blob and tree objects, store them into the +repository if not already present, and (usually) finally create a new +commit object that points to the previous commit object and the new +root tree object. + +Each "plain" file saved into the repository is uniquely identified by +the hash (SHA-1) of its object. As mentioned above, and unlike git, a +file might be stored as a tree. + +After reading a file or directory, `bup save` also updates the bup +`index` (not to be confused with git packfile indexes) entry for it +with the related hash. This helps speed up the next `bup save` run -- +if the file is unchanged during the next `bup index` run, the next +`bup save` can simply check whether the object with the hash recorded +in the index is present in the repository, and doesn't have to +open/read the file or directory again if so. + +## How `bup get` operates + +Conceptually, `bup get` simply walks over the graph of a set of objects +in the source repository, checks if the object is present in the +destination repository, and if not then it copies the object over. +If it encounters a tree object that is already present in the destination +repository, it does _not_ walk into that object, for performance +reasons. + +## How `bup prune-older`/`bup rm` operate + +Again conceptually, this works by cutting pieces out of the chain of +commits, for example a `bup rm saves/2024-10...` will change +this branch: + +
![](missing-objects-fig-rm-before.svg)
+ +into this: + +
![](missing-objects-fig-rm-after.svg)
+ +As you can see, the save (commit) `2024-10...` object and the +trees/blobs it points to still exist in the repository, though they're +detached from `saves`. + +## How `bup gc` operates + +GC is intended to clean up those dangling objects. So after the prune example +above, ideally we want to have in the repository only this left after GC: + +
![](missing-objects-fig-rm-after-gc.svg)
+ +This is not exactly what happens, unfortunately. We're still doing some +background, so more on this later. + +## Object existence checking + +In order to check if objects already exist in the repository, bup uses +three different data structures: + +### `*.idx` files + +For each pack file, git and bup use an idx file that contains a list of all +the objects in the pack file, and also points to the object inside the +pack, so you can retrieve a desired object. Checking for existence just +requires seeing if the object name is in the list. + +These files can be recreated from the pack files, but this is expensive. + +### `*.midx` files + +The midx files have a similar structure, except they cover multiple pack +files and tell you which pack file an object is located in (but not +where in that pack file). + +These files are created from the `*.idx` files and are ephemeral, they can +be destroyed and recreated at will. + +### Bloom filter + +To see if it's even worth checking, bup uses a [Bloom +filter](https://en.wikipedia.org/wiki/Bloom_filter) (`bup.bloom`), +which is a probabilistic data structure that can say "I've never heard +about this object before" and "I might have seen this object +before". If it says the object doesn't exist, there's no need to check +the midx/idx files. If it says the object _might_ exist, then those +files need to be consulted. The Bloom filter is therefore not relevant +to the issues at hand. + +Just like the `*.midx` files, this file is created from the `*.idx` +files (or perhaps from the `*.midx` that in turn come from `*.idx`) +and is also ephemeral, so it can be destroyed and recreated at will. + +## Remote save - `bup save -r` + +In order to avoid transferring a lot of data that might not be needed, +bup clients synchronize the idx files with the idx files on the server +when connecting. They then rebuild midx/bloom files, and then the save +can do a local "does this object exist already" check, rather than either +shipping the object to the server for it to check, or asking the server +to check, both of which would take a lot of time (due to bandwidth and +latency respectively.) + +## Bug #1 (remotely cached midx files) + +When GC is done on a repository, of course some pack files will be +removed along with their idx files. + +When a client synchronizes the idx files, it deletes the idx files +from the cache that were removed on the server repository, so that +testing for objects that were previously contained in them should no +longer indicate that they already exist. + +However, the midx files are incorrectly updated. Remember that +midx files are created from the idx files. When updating the midx +files after the idx synchronization, bup doesn't check whether or +not any of the midx file(s) still contain(s) content from a now-deleted +idx file. This can lead to checking for object existence and being +given the answer that an object exists, even though it was GC'ed in +the repository, and in fact the idx files no longer show that it +exists, only the incorrect midx does. + +This in turn can lead to `save -r` or `get -r` omitting an object that had +previously existed, but has been removed by GC on the remote (omitted +because the midx still thinks the remote has it). + +This doesn't happen with local use of the repository (without `-r` or +`bup on`) since gc removes all midx/bloom files. + +Since version 0.33.5, `bup` regenerates the midx files correctly. + +## Bug #2 + +I previously showed that after prune, you have this set of objects +in the repository: + +
![](missing-objects-fig-rm-after.svg)
+ +Remember that after GC, we want this set of objects: + +
![](missing-objects-fig-rm-after-gc.svg)
+ +Unfortunately, the current GC fundamentally doesn't work that way (and +that's the issue), and it might only remove the `2024-11... (c1...)` +and `2024-10...` commits and `hosts (blob 76...)`, leaving us with: + +
![](missing-objects-fig-gc-dangling.svg)
+ +See ["How gc (before 0.33.5) can create dangling references"](#how-gc-before-0.33.5-can-create-dangling-references) +below for further details regarding the cause. + +### Effect on `bup get` + +If you run `bup get` now to write to this repository, and it +encounters the `etc/` tree, originally from `save 2`, in the set of +objects to transfer, it will see that it already exists (because it +*is* still in the repository's packfiles), and as explained earlier, +will assume it's complete and re-use it, without delving further. This +will leave the repository broken, because now, whatever `get` is +building will have a reference to the `etc/` tree that itself refers +to the missing `hosts` blob. + +### Effect on `bup save` + +Similarly, if `bup save` encounters the `etc/` tree, originally from +`save 2`, in the `index`, and sees that it already exists in the +repository, it will prune its index traversal at that point, and +re-use the existing, broken `etc/ (tree ee)` object without noticing +that the tree is broken. + +This can (also) happen if a save is aborted in the middle, `gc` is run +to clean up the repo and remove unreferenced objects, and some objects +that are referenced by the index (say the `etc/` tree) are not removed +by the `gc`, while some other objects (say `hosts`) that are referred +to by the preserved objects are themselves removed. + +However, if the index doesn't exist (say due to a `bup index +--clear`), then it shouldn't be possible for `bup save` to create the +problem, because when saving a path it creates all the objects the +path is comprised of, from the bottom (leaves) up, and then checks to +see if the object exists in the repository. This process would +encounter `hosts` first, and store it, fixing the broken `etc/` tree +before it's reached. + +### How gc (before 0.33.5) can create dangling references + +There are actually two reasons it can do this. + +#### Probabilistic liveness detection + +The first reason is that the garbage collection before 0.33.5 tracks +tree and commit objects probabilistically, not precisely. It +determines whether they're live via a Bloom filter populated by a +reachability walk through all refs. (As of 0.33.5 trees and commits +are tracked precisely.) + +Because [Bloom filters](https://en.wikipedia.org/wiki/Bloom_filter) +can only say "definitely not present" and "maybe present", it means +that some other random object can cause `/etc (tree ee...)` to be +considered "maybe present" (live) when it isn't actually reachable +(wasn't traversed during the walk). + +First, the Bloom filter is populated with live objects. Each live +object sets N bits in the Bloom filter (just 2 here): + +
![](missing-objects-fig-bloom-set.svg)
+ +Then the liveness check can erroneously return true if say `etc/ (tree +ee...)` happens to map to N bits that have been set by other objects: + +
![](missing-objects-fig-bloom-get.svg)
+ +#### Pack file rewrite threshold + +It's also possible that `etc/ (tree ee...)` and `hosts (blob 76...)` +end up in separate pack files (depending on how/when they were +written), and the pack file containing `hosts` ends up being +rewritten, dropping `hosts` (because it has more dead objects than the +threshold), but the pack file containing `etc/ (tree ee)` does not +(because it had enough live objects to survive intact). + +## Bug #3 (bup get) + +While working on all of this, we noticed that `bup get` can also leave +the repository with incomplete trees if it is aborted at the wrong +time during a transfer. Imagine we have a save like this: + +
![](missing-objects-fig-get-bug-save.svg)
+ +Say that `bup get` is called to transfer `c-1` from another +repository. For simplicity we'll ignore its parent commit. It should +transfer `c-1`, `/`, `etc`, `fstab`, and `hosts`. Unfortunately, +versions of `bup get` before 0.33.5 will transfer the objects in +precisely that order, which means that if `bup get` is interrupted at +the wrong time, say just after fetching `fstab`, it will leave an +incomplete `etc/` tree in the destination repo (because the `hosts` +blob is missing). Any future `bup get` attempts won't fix the problem +because (as described previously) they will see `etc` in the +destination repository and assume it's complete. + +And of course there are many ways `bup get` might be interrupted: full +filesystem, system shutdown, network issues, or perhaps even more +likely, `^C` (SIGINT). + +> Note: If you were to run `bup gc` after the aborted transfer even +> the broken version of it would clean up the freshly written pack +> file since the objects aren't connected yet, but chances are that +> one would just attempt to resume the transfer, resulting in it being +> connected, but potentially incomplete. Also, due to the Bloom +> collision bug, gc might incorrectly keep some objects. diff -Nru bup-0.33.2/lib/bup/client.py bup-0.33.7/lib/bup/client.py --- bup-0.33.2/lib/bup/client.py 2023-07-01 20:08:43.000000000 +0000 +++ bup-0.33.7/lib/bup/client.py 2025-01-08 20:04:11.000000000 +0000 @@ -3,11 +3,11 @@ from __future__ import absolute_import from binascii import hexlify, unhexlify -import os, re, struct, time, zlib +import os, re, struct, sys, time, zlib import socket from bup import git, ssh, vfs -from bup.compat import environ, pending_raise, reraise +from bup.compat import environ, pending_raise from bup.helpers import (Conn, atomically_replaced_file, chunkyreader, debug1, debug2, linereader, lines_until_sentinel, mkdirp, nullcontext_if_not, progress, qprogress, DemuxConn) @@ -91,6 +91,7 @@ self.pout = os.fdopen(3, 'rb') self.pin = os.fdopen(4, 'wb') self.conn = Conn(self.pout, self.pin) + sys.stdin.close() else: if self.protocol in (b'ssh', b'file'): try: @@ -100,7 +101,7 @@ self.pin = self.p.stdin self.conn = Conn(self.pout, self.pin) except OSError as e: - reraise(ClientError('connect: %s' % e)) + raise ClientError('connect: %s' % e) from e elif self.protocol == b'bup': self.sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) self.sock.connect((self.host, @@ -111,7 +112,7 @@ self._require_command(b'init-dir') self._require_command(b'set-dir') if self.dir: - self.dir = re.sub(br'[\r\n]', ' ', self.dir) + self.dir = re.sub(br'[\r\n]', b' ', self.dir) if create: self.conn.write(b'init-dir %s\n' % self.dir) else: @@ -181,9 +182,7 @@ try: return self.conn.check_ok() except Exception as e: - reraise(ClientError(e)) - # reraise doesn't return - return None + raise ClientError(e) from e def check_busy(self): if self._busy: @@ -563,12 +562,12 @@ self.remote_closed = True id = self._end() self.file = None - super(PackWriter_Remote, self).close() + super().close() return id def __del__(self): assert self.remote_closed - super(PackWriter_Remote, self).__del__() + super().__del__() def abort(self): raise ClientError("don't know how to abort remote pack writing") @@ -590,7 +589,7 @@ (self._bwcount, self._bwtime) = _raw_write_bwlimit( self.file, outbuf, self._bwcount, self._bwtime) except IOError as e: - reraise(ClientError(e)) + raise ClientError(e) from e self.outbytes += len(data) self.count += 1 diff -Nru bup-0.33.2/lib/bup/cmd/bloom.py bup-0.33.7/lib/bup/cmd/bloom.py --- bup-0.33.2/lib/bup/cmd/bloom.py 2023-07-01 20:08:43.000000000 +0000 +++ bup-0.33.7/lib/bup/cmd/bloom.py 2025-01-08 20:04:11.000000000 +0000 @@ -1,12 +1,16 @@ -from __future__ import absolute_import - import os, glob, sys from bup import options, git, bloom from bup.compat import argv_bytes, hexstr -from bup.helpers import (add_error, debug1, log, progress, qprogress, - saved_errors) +from bup.helpers \ + import (add_error, + debug1, + log, + note_error, + progress, + qprogress, + saved_errors) from bup.io import path_msg @@ -18,7 +22,7 @@ o,output= output bloom filename (default: auto) d,dir= input directory to look for idx files (default: auto) k,hashes= number of hash functions to use (4 or 5) (default: auto) -c,check= check the given .idx file against the bloom filter +c,check= check given *.idx or *.midx file against the bloom filter """ @@ -50,7 +54,11 @@ idx = os.path.join(path, idx) log('bloom: bloom file: %s\n' % path_msg(rbloomfilename)) log('bloom: checking %s\n' % path_msg(ridx)) - with git.open_idx(idx) as oids: + oids = git.open_object_idx(idx) + if not oids: + note_error(f'bloom: ERROR: invalid index {path_msg(idx)}\n') + return + with oids: for oid in oids: if not b.exists(oid): add_error('bloom: ERROR: object %s missing' % hexstr(oid)) diff -Nru bup-0.33.2/lib/bup/cmd/damage.py bup-0.33.7/lib/bup/cmd/damage.py --- bup-0.33.2/lib/bup/cmd/damage.py 2023-07-01 20:08:43.000000000 +0000 +++ bup-0.33.7/lib/bup/cmd/damage.py 2025-01-08 20:04:11.000000000 +0000 @@ -46,13 +46,13 @@ else: maxsize = 1 chunks = opt.num or 10 - chunksize = size // chunks + chunksize = (size // chunks) or 1 for r in range(chunks): sz = random.randrange(1, maxsize+1) if sz > size: sz = size if opt.equal: - ofs = r*chunksize + ofs = (r * chunksize) % size else: ofs = random.randrange(0, size - sz + 1) log(' %6d bytes at %d\n' % (sz, ofs)) diff -Nru bup-0.33.2/lib/bup/cmd/fsck.py bup-0.33.7/lib/bup/cmd/fsck.py --- bup-0.33.2/lib/bup/cmd/fsck.py 2023-07-01 20:08:43.000000000 +0000 +++ bup-0.33.7/lib/bup/cmd/fsck.py 2025-01-08 20:04:11.000000000 +0000 @@ -1,15 +1,17 @@ -from __future__ import absolute_import, print_function from shutil import rmtree from subprocess import PIPE from tempfile import mkdtemp from binascii import hexlify +from os.path import join import glob, os, subprocess, sys from bup import options, git from bup.compat import argv_bytes -from bup.helpers import Sha1, chunkyreader, istty2, log, progress -from bup.io import byte_stream +from bup.helpers \ + import (EXIT_FAILURE, EXIT_FALSE, EXIT_TRUE, EXIT_SUCCESS, + Sha1, chunkyreader, istty2, log, progress, temp_dir) +from bup.io import byte_stream, path_msg par2_ok = 0 @@ -20,14 +22,14 @@ if opt.verbose > 1: log(s) -def run(argv): +def run(argv, *, cwd=None): # at least in python 2.5, using "stdout=2" or "stdout=sys.stderr" below # doesn't actually work, because subprocess closes fd #2 right before # execing for some reason. So we work around it by duplicating the fd # first. fd = os.dup(2) # copy stderr try: - p = subprocess.Popen(argv, stdout=fd, close_fds=False) + p = subprocess.Popen(argv, stdout=fd, close_fds=False, cwd=cwd) return p.wait() finally: os.close(fd) @@ -70,7 +72,7 @@ _par2_parallel = None -def par2(action, args, verb_floor=0): +def par2(action, args, verb_floor=0, cwd=None): global _par2_parallel if _par2_parallel is None: _par2_parallel = is_par2_parallel() @@ -82,12 +84,77 @@ if _par2_parallel: cmd.append(b'-t1') cmd.extend(args) - return run(cmd) + return run(cmd, cwd=cwd) -def par2_generate(base): - return par2(b'create', - [b'-n1', b'-c200', b'--', base, base + b'.pack', base + b'.idx'], - verb_floor=2) +def par2_generate(stem): + parent, base = os.path.split(stem) + # Work in a temp_dir because par2 was observed creating empty + # files when interrupted by C-c. + # cf. https://github.com/Parchive/par2cmdline/issues/84 + with temp_dir(dir=parent, prefix=(base + b'-bup-tmp-')) as tmpdir: + idx = base + b'.idx' + pack = base + b'.pack' + os.symlink(join(b'..', idx), join(tmpdir, idx)) + os.symlink(join(b'..', pack), join(tmpdir, pack)) + rc = par2(b'create', [b'-n1', b'-c200', b'--', base, pack, idx], + verb_floor=2, cwd=tmpdir) + if rc == 0: + # Currently, there should only be two files, the par2 + # index and a single vol000+200 file, but let's be + # defensive for the generation (keep whatever's produced). + p2_idx = base + b'.par2' + p2_vol = base + b'.vol000+200.par2' + expected = frozenset((idx, pack, p2_idx, p2_vol)) + for tmp in os.listdir(tmpdir): + if tmp not in expected: + log(f'Unexpected par2 file (please report) {path_msg(tmp)}\n') + if tmp in (p2_idx, idx, pack): + continue + os.rename(join(tmpdir, tmp), join(parent, tmp)) + # Let this indicate success + os.rename(join(tmpdir, p2_idx), join(parent, p2_idx)) + expected = frozenset((idx, pack)) + remaining = frozenset(os.listdir(tmpdir)) + assert expected == remaining + return rc + +def par2_recovery_file_status(stem): + """Return True if recovery files exist for the stem and we should + assume they're acceptable. Return None if none of them exist, and + return False (after logging appropriate errors) if something + appears to be wrong with them, for example, if any of the files + are empty, or if the set of files is incomplete. + + """ + # Look for empty *.par2 files because C-c during "par2 create" may + # leave them when interrupted, and previous versions of bup didn't + # run par2 create in a tempdir to compensate. For now, we decide + # the existing data is OK if the pack-HASH.par2 and + # pack-HASH.vol000+200.par2 files exist, and neither is empty. + # cf. https://github.com/Parchive/par2cmdline/issues/84 + paths = [stem + suffix for suffix in (b'.par2', b'.vol000+200.par2')] + empty = [] + missing = set(paths) + for path in paths: + try: + st = os.stat(path) + if st.st_size == 0: + empty.append(path) + else: + missing.remove(path) + except FileNotFoundError: + pass + for path in empty: + log(f'error: empty par2 file - {path_msg(path)}\n') + if empty: + return False + if len(missing) == 2: + return None + for path in missing: + log(f'error: missing par2 file - {path_msg(path)}\n') + if not missing: + return True + return False def par2_verify(base): return par2(b'verify', [b'--', base], verb_floor=3) @@ -135,6 +202,9 @@ else: action_result = b'repaired' log('%s par2 repair: succeeded (0)\n' % last) + # FIXME: for this to be useful, we need to define + # the semantics, e.g. what's promised when we have + # this and a competing error from another pack? code = 100 else: action_result = b'failed' @@ -188,49 +258,52 @@ par2_setup() if opt.par2_ok: - if par2_ok: - sys.exit(0) # 'true' in sh - else: - sys.exit(1) + sys.exit(EXIT_TRUE if par2_ok else EXIT_FALSE) if opt.disable_par2: par2_ok = 0 - git.check_repo_or_die() - if extra: extra = [argv_bytes(x) for x in extra] else: debug('fsck: No filenames given: checking all packs.\n') + git.check_repo_or_die() extra = glob.glob(git.repo(b'objects/pack/*.pack')) - sys.stdout.flush() - out = byte_stream(sys.stdout) - code = 0 - count = 0 - outstanding = {} + pack_stems = [] for name in extra: if name.endswith(b'.pack'): - base = name[:-5] + pack_stems.append(name[:-5]) elif name.endswith(b'.idx'): - base = name[:-4] + pack_stems.append(name[:-4]) elif name.endswith(b'.par2'): - base = name[:-5] + pack_stems.append(name[:-5]) elif os.path.exists(name + b'.pack'): - base = name + pack_stems.append(name) else: raise Exception('%r is not a pack file!' % name) - (dir,last) = os.path.split(base) - par2_exists = os.path.exists(base + b'.par2') - if par2_exists and os.stat(base + b'.par2').st_size == 0: - par2_exists = 0 + + sys.stdout.flush() + out = byte_stream(sys.stdout) + code = EXIT_SUCCESS + count = 0 + outstanding = {} + for stem in pack_stems: + base = os.path.basename(stem) + par2_status = par2_recovery_file_status(stem) + if par2_status == False: + if code == EXIT_SUCCESS: + code = EXIT_FAILURE + continue sys.stdout.flush() # Not sure we still need this, but it'll flush out too debug('fsck: checking %r (%s)\n' - % (last, par2_ok and par2_exists and 'par2' or 'git')) + % (base, par2_ok and par2_status and 'par2' or 'git')) if not opt.verbose: progress('fsck (%d/%d)\r' % (count, len(extra))) if not opt.jobs: - nc = do_pack(base, last, par2_exists, out) + assert par2_status != False + nc = do_pack(stem, base, par2_status, out) + # FIXME: is first wins what we really want (cf. repair's 100) code = code or nc count += 1 else: @@ -246,7 +319,8 @@ outstanding[pid] = 1 else: # child try: - sys.exit(do_pack(base, last, par2_exists, out)) + assert par2_status != False + sys.exit(do_pack(stem, base, par2_status, out)) except Exception as e: log('exception: %r\n' % e) sys.exit(99) @@ -260,7 +334,13 @@ count += 1 if not opt.verbose: progress('fsck (%d/%d)\r' % (count, len(extra))) - if istty2: debug('fsck done. \n') + + # double-check (e.g. for (unlikely) problems with generate tmpdir renames) + for stem in pack_stems: + if par2_recovery_file_status(stem) == False: + if code == EXIT_SUCCESS: + code = EXIT_FAILURE + sys.exit(code) diff -Nru bup-0.33.2/lib/bup/cmd/gc.py bup-0.33.7/lib/bup/cmd/gc.py --- bup-0.33.2/lib/bup/cmd/gc.py 2023-07-01 20:08:43.000000000 +0000 +++ bup-0.33.7/lib/bup/cmd/gc.py 2025-01-08 20:04:11.000000000 +0000 @@ -9,10 +9,11 @@ optspec = """ bup gc [options...] -- -v,verbose increase log output (can be used more than once) -threshold= only rewrite a packfile if it's over this percent garbage [10] -#,compress= set compression level to # (0-9, 9 is highest) [1] -unsafe use the command even though it may be DANGEROUS +v,verbose increase log output (can be used more than once) +threshold= only rewrite a packfile if it's over this percent garbage [10] +#,compress= set compression level to # (0-9, 9 is highest) [1] +ignore-missing don't halt halt for missing objects +unsafe use the command even though it may be DANGEROUS """ # FIXME: server mode? @@ -40,6 +41,7 @@ bup_gc(threshold=opt.threshold, compression=opt.compress, - verbosity=opt.verbose) + verbosity=opt.verbose, + ignore_missing=opt.ignore_missing) die_if_errors() diff -Nru bup-0.33.2/lib/bup/cmd/get.py bup-0.33.7/lib/bup/cmd/get.py --- bup-0.33.2/lib/bup/cmd/get.py 2023-07-01 20:08:43.000000000 +0000 +++ bup-0.33.7/lib/bup/cmd/get.py 2025-01-08 20:04:11.000000000 +0000 @@ -12,8 +12,8 @@ environ, hexstr ) -from bup.git import get_cat_data, parse_commit, walk_object -from bup.helpers import add_error, debug1, log, saved_errors +from bup.git import MissingObject, get_cat_data, parse_commit, walk_object +from bup.helpers import debug1, log, note_error, saved_errors from bup.helpers import hostname, tty_width, parse_num from bup.io import path_msg from bup.pwdgrp import userfullname, username @@ -99,8 +99,8 @@ def require_n_args_or_die(n, args): if len(args) < n + 1: - misuse('%s argument requires %d %s' - % (n, 'values' if n == 1 else 'value')) + arg = args[0].decode('ascii') + misuse(f'{arg} argument requires {n} value{"" if n == 1 else "s"}') result = args[1:1+n], args[1+n:] assert len(result[0]) == n return result @@ -122,6 +122,7 @@ opt.print_commits = opt.print_trees = opt.print_tags = False opt.bwlimit = None opt.compress = 1 + opt.ignore_missing = False opt.source = opt.remote = None opt.target_specs = [] @@ -134,6 +135,12 @@ elif arg in (b'-v', b'--verbose'): opt.verbose += 1 remaining = remaining[1:] + elif arg == b'--ignore-missing': + opt.ignore_missing = True + remaining = remaining[1:] + elif arg == b'--no-ignore-missing': + opt.ignore_missing = False + remaining = remaining[1:] elif arg in (b'--ff', b'--append', b'--pick', b'--force-pick', b'--new-tag', b'--replace', b'--unnamed'): (ref,), remaining = require_n_args_or_die(1, remaining) @@ -173,6 +180,9 @@ continue else: misuse() + for target in opt.target_specs: + if opt.ignore_missing and target.method != 'unnamed': + misuse('currently only --unnamed allows --ignore-missing') return opt # FIXME: client error handling (remote exceptions, etc.) @@ -184,6 +194,11 @@ return writer.exists(unhexlify(oid)) for item in walk_object(repo.cat, hash, stop_at=already_seen, include_data=True): + if item.data is False: + if not opt.ignore_missing: + raise MissingObject(item.oid) + note_error(f'skipping missing source object {item.oid.hex()}\n') + continue # already_seen ensures that writer.exists(id) is false. # Otherwise, just_write() would fail. writer.just_write(item.oid, item.type, item.data) @@ -212,6 +227,19 @@ assert(tree is not None) return last_c, tree + +GitLoc = namedtuple('GitLoc', ('ref', 'hash', 'type')) + +def find_git_item(ref, repo): + it = repo.cat(ref) + oidx, typ, _ = next(it) + # FIXME: don't include_data once repo supports it + for _ in it: pass + if not oidx: + return None + return GitLoc(ref, unhexlify(oidx), typ) + + Loc = namedtuple('Loc', ['type', 'hash', 'path']) default_loc = Loc(None, None, None) @@ -288,15 +316,22 @@ return p -def resolve_src(spec, src_repo): - src = find_vfs_item(spec.src, src_repo) +def resolve_src(spec, src_repo, *, allow=None, ignore_missing=False): + assert allow in (None, 'git') spec_args = spec_msg(spec) - if not src: + if spec.src.startswith(b'git:'): + if not allow == 'git': + misuse(f'git references not (yet) allowed here {spec_args}') + src = find_git_item(spec.src[4:], src_repo) + else: + src = find_vfs_item(spec.src, src_repo) + if src: + if src.type == 'root': + misuse('cannot fetch entire repository for %s' % spec_args) + if src.type == 'tags': + misuse('cannot fetch entire /.tag directory for %s' % spec_args) + if not (src or ignore_missing): misuse('cannot find source for %s' % spec_args) - if src.type == 'root': - misuse('cannot fetch entire repository for %s' % spec_args) - if src.type == 'tags': - misuse('cannot fetch entire /.tag directory for %s' % spec_args) debug1('src: %s\n' % loc_desc(src)) return src @@ -324,7 +359,7 @@ spec_args = spec_msg(spec) if not spec.dest: - misuse('no destination (implicit or explicit) for %s', spec_args) + misuse('no destination (implicit or explicit) for %s' % spec_args) dest = find_vfs_item(spec.dest, dest_repo) if dest: @@ -417,7 +452,7 @@ elif src.type == 'save': spec = spec._replace(dest=get_save_branch(src_repo, spec.src)) if not spec.dest: - misuse('no destination provided for %s', spec_args) + misuse('no destination provided for %s' % spec_args) dest = find_vfs_item(spec.dest, dest_repo) if not dest: cp = validate_vfs_path(cleanup_vfs_path(spec.dest), spec) @@ -448,7 +483,7 @@ if not spec.dest and src.path.startswith(b'/.tag/'): spec = spec._replace(dest=src.path) if not spec.dest: - misuse('no destination (implicit or explicit) for %s', spec_args) + misuse('no destination (implicit or explicit) for %s' % spec_args) dest = find_vfs_item(spec.dest, dest_repo) if not dest: dest = default_loc._replace(path=cleanup_vfs_path(spec.dest)) @@ -475,7 +510,7 @@ if src.path.startswith(b'/.tag/') or src.type == 'branch': spec = spec._replace(dest=spec.src) if not spec.dest: - misuse('no destination provided for %s', spec_args) + misuse('no destination provided for %s' % spec_args) dest = find_vfs_item(spec.dest, dest_repo) if dest: if not dest.type == 'branch' and not dest.path.startswith(b'/.tag/'): @@ -503,11 +538,13 @@ return item.src.hash, unhexlify(commit_items.tree) -def resolve_unnamed(spec, src_repo, dest_repo): +def resolve_unnamed(spec, src_repo, dest_repo, *, ignore_missing): if spec.dest: misuse('destination name given for %s' % spec_msg(spec)) - src = resolve_src(spec, src_repo) - return Target(spec=spec, src=src, dest=None) + src = resolve_src(spec, src_repo, allow='git', ignore_missing=ignore_missing) + if src: + return Target(spec=spec, src=src, dest=None) + return None def handle_unnamed(item, src_repo, writer, opt): @@ -516,7 +553,7 @@ return (None,) -def resolve_targets(specs, src_repo, dest_repo): +def resolve_targets(specs, src_repo, dest_repo, *, ignore_missing): resolved_items = [] common_args = src_repo, dest_repo for spec in specs: @@ -532,7 +569,10 @@ elif spec.method == 'replace': resolved_items.append(resolve_replace(spec, *common_args)) elif spec.method == 'unnamed': - resolved_items.append(resolve_unnamed(spec, *common_args)) + tgt = resolve_unnamed(spec, *common_args, + ignore_missing=ignore_missing) + if tgt: + resolved_items.append(tgt) else: # Should be impossible -- prevented by the option parser. assert(False) @@ -593,7 +633,8 @@ # fail before we start writing (for any obviously # broken cases). target_items = resolve_targets(opt.target_specs, - src_repo, dest_repo) + src_repo, dest_repo, + ignore_missing=opt.ignore_missing) updated_refs = {} # ref_name -> (original_ref, tip_commit(bin)) no_ref_info = (None, None) @@ -656,7 +697,7 @@ else: log('updated %r (%s)\n' % (ref_name, new_hex)) except (git.GitError, client.ClientError) as ex: - add_error('unable to update ref %r: %s' % (ref_name, ex)) + note_error('unable to update ref %r: %s\n' % (ref_name, ex)) if saved_errors: log('WARNING: %d errors encountered while saving.\n' % len(saved_errors)) diff -Nru bup-0.33.2/lib/bup/cmd/list_idx.py bup-0.33.7/lib/bup/cmd/list_idx.py --- bup-0.33.2/lib/bup/cmd/list_idx.py 2023-07-01 20:08:43.000000000 +0000 +++ bup-0.33.7/lib/bup/cmd/list_idx.py 2025-01-08 20:04:11.000000000 +0000 @@ -43,11 +43,12 @@ count = 0 idxfiles = [argv_bytes(x) for x in extra] for name in idxfiles: + ix = None try: - ix = git.open_idx(name) + ix = git.open_object_idx(name) except git.GitError as e: add_error('%r: %s' % (name, e)) - ix.close() + if not ix: continue with ix: if len(opt.find) == 40: diff -Nru bup-0.33.2/lib/bup/cmd/midx.py bup-0.33.7/lib/bup/cmd/midx.py --- bup-0.33.2/lib/bup/cmd/midx.py 2023-07-01 20:08:43.000000000 +0000 +++ bup-0.33.7/lib/bup/cmd/midx.py 2025-01-08 20:04:11.000000000 +0000 @@ -9,6 +9,7 @@ log, mmap_readwrite, qprogress, saved_errors, unlink) from bup.io import byte_stream, path_msg +from bup.midx import MissingIdxs, open_midx PAGE_SIZE=4096 @@ -43,18 +44,42 @@ return mf +def _maybe_open_midx(path, *, rm_broken=False): + """Return a PackMidx for path as open_midx() does unless some of + its idx files are missing. In that case, warn, delete the path + if rm_broken is true, and return None. + """ + missing = None + try: + return open_midx(path, ignore_missing=False) + except MissingIdxs as ex: + missing = ex.paths + pathm = path_msg(path) + # FIXME: eventually note_error instead when we're not deleting? + for idx in missing: + idxm = path_msg(idx) + log(f'warning: midx {pathm} refers to mssing idx {idxm}\n') + if rm_broken: + log(f'Removing incomplete midx {pathm}\n') + unlink(path) + return None + + def check_midx(name): nicename = git.repo_rel(name) log('Checking %s.\n' % path_msg(nicename)) try: - ix = git.open_idx(name) + ix = git.open_object_idx(name) except git.GitError as e: add_error('%s: %s' % (path_msg(name), e)) + if not ix: return with ix: for count,subname in enumerate(ix.idxnames): - with git.open_idx(os.path.join(os.path.dirname(name), subname)) \ - as sub: + sub = git.open_object_idx(os.path.join(os.path.dirname(name), subname)) + if not sub: + continue + with sub: for ecount,e in enumerate(sub): if not (ecount % 1234): qprogress(' %d/%d: %s %d/%d\r' @@ -95,7 +120,12 @@ allfilenames = [] with ExitStack() as contexts: for name in infilenames: - ix = git.open_idx(name) + if name.endswith(b'.idx'): + ix = git.open_idx(name) + else: + ix = _maybe_open_midx(name, rm_broken=auto or force) + if not ix: + continue contexts.enter_context(ix) inp.append(( ix.map, @@ -105,6 +135,7 @@ len(allfilenames), )) for n in ix.idxnames: + # FIXME: double-check wrt outfilename above allfilenames.append(os.path.basename(n)) total += len(ix) inp.sort(reverse=True, key=lambda x: x[0][x[2] : x[2] + 20]) @@ -141,7 +172,7 @@ # This is just for testing (if you enable this, don't clear inp above) # if 0: - # p = midx.PackMidx(outfilename) + # p = midx.open_midx(outfilename) # assert(len(p.idxnames) == len(infilenames)) # log(repr(p.idxnames) + '\n') # assert(len(p) == total) @@ -168,10 +199,14 @@ if force and not auto: midxs = [] # don't use existing midx files else: - midxs = glob.glob(b'%s/*.midx' % path) + midxs = [] contents = {} - for mname in midxs: - with git.open_idx(mname) as m: + for mname in glob.glob(b'%s/*.midx' % path): + m = _maybe_open_midx(mname, rm_broken=auto or force) + if not m: + continue + with m: + midxs.append(mname) contents[mname] = [(b'%s/%s' % (path,i)) for i in m.idxnames] sizes[mname] = len(m) diff -Nru bup-0.33.2/lib/bup/cmd/on__server.py bup-0.33.7/lib/bup/cmd/on__server.py --- bup-0.33.2/lib/bup/cmd/on__server.py 2023-07-01 20:08:43.000000000 +0000 +++ bup-0.33.7/lib/bup/cmd/on__server.py 2025-01-08 20:04:11.000000000 +0000 @@ -35,10 +35,11 @@ argv = [argv[0], b'mux', b'--'] + argv - # stdin/stdout are supposedly connected to 'bup server' that the caller - # started for us (often on the other end of an ssh tunnel), so we don't want - # to misuse them. Move them out of the way, then replace stdout with - # a pointer to stderr in case our subcommand wants to do something with it. + # stdin/stdout should be connected to 'bup server' that the caller + # started for us (often on the other end of an ssh tunnel), so we + # don't want to misuse them. Move them out of the way (to fds 3 + # and 4 -- see Client()), then replace stdout with a pointer to + # stderr in case our subcommand wants to do something with it. # # It might be nice to do the same with stdin, but my experiments showed that # ssh seems to make its child's stderr a readable-but-never-reads-anything diff -Nru bup-0.33.2/lib/bup/cmd/split.py bup-0.33.7/lib/bup/cmd/split.py --- bup-0.33.2/lib/bup/cmd/split.py 2023-07-01 20:08:43.000000000 +0000 +++ bup-0.33.7/lib/bup/cmd/split.py 2025-01-08 20:04:11.000000000 +0000 @@ -95,8 +95,11 @@ opt.date = time.time() opt.is_reverse = environ.get(b'BUP_SERVER_REVERSE') - if opt.is_reverse and opt.remote: - o.fatal("don't use -r in reverse mode; it's automatic") + if opt.is_reverse: + if opt.remote: + o.fatal("don't use -r in reverse mode; it's automatic") + if not opt.sources or opt.git_ids: + o.fatal('"bup on ... split" does not support reading from standard input') if opt.name and not valid_save_name(opt.name): o.fatal("'%r' is not a valid branch name." % opt.name) @@ -183,6 +186,12 @@ out = byte_stream(sys.stdout) stdin = byte_stream(sys.stdin) + writing = not (opt.noop or opt.copy) + remote_dest = opt.remote or opt.is_reverse + + if writing or opt.git_ids: + git.check_repo_or_die() + if opt.git_ids: # the input is actually a series of git object ids that we should retrieve # and split. @@ -221,12 +230,6 @@ else: files = [stdin] - writing = not (opt.noop or opt.copy) - remote_dest = opt.remote or opt.is_reverse - - if writing: - git.check_repo_or_die() - if remote_dest and writing: cli = repo = client.Client(opt.remote) else: diff -Nru bup-0.33.2/lib/bup/cmd/validate_object_links.py bup-0.33.7/lib/bup/cmd/validate_object_links.py --- bup-0.33.2/lib/bup/cmd/validate_object_links.py 1970-01-01 00:00:00.000000000 +0000 +++ bup-0.33.7/lib/bup/cmd/validate_object_links.py 2025-01-08 20:04:11.000000000 +0000 @@ -0,0 +1,125 @@ + +from binascii import hexlify, unhexlify +from os import path +import glob, sys, zlib + +from bup import options, git +from bup.compat import pairwise +from bup.helpers import \ + EXIT_FALSE, EXIT_TRUE, log, qprogress, reprogress, wrap_boolean_main +from bup.io import byte_stream, path_msg + + +optspec = """ +bup validate-object-links +-- +""" + +def obj_type_and_data_ofs(buf): + # cf. gitformat-pack(5) + c = buf[0] + kind = (c & 0x70) >> 4 + i = 0 + while c & 0x80: + i += 1 + c = buf[i] + return kind, i + 1 + +class Pack: + def __init__(self, idx, cp): + self._idx = idx + self._cp = cp + self._f = None + + def __enter__(self): + assert self._f is None + self._f = open(self._idx.name[:-3] + b'pack', 'rb', buffering=0) + return self + + def __exit__(self, *args, **kw): + self._f.close() + self._f = None + + def __iter__(self): + # cf. gitformat-pack(5) + assert self._f + assert self._f.read(8) == b'PACK\x00\x00\x00\x02' + ofs_and_idxs = list(self._idx.oid_offsets_and_idxs()) + ofs_and_idxs.sort() + ofs_and_idxs.append((-1, None)) # produces sz < 0 (i.e. read remaining) + for obj, nextobj in pairwise(ofs_and_idxs): + ofs, idx = obj + nextofs, _ = nextobj + self._f.seek(ofs) + sz = nextofs - ofs + hdr = self._f.read(5) # enough for 4GiB objects + kind, data_ofs = obj_type_and_data_ofs(hdr) + assert data_ofs > 0 + if kind == 3: # blob + continue + oid = self._idx._idx_to_hash(idx) + if kind in (1, 2, 4): # commit tree tag + data = hdr[data_ofs:] + self._f.read(sz - 5) + data = zlib.decompress(data) + yield oid, git._typermap[kind], data + elif kind in (5, 6, 7): # reserved obj_ofs_delta obj_ref_delta + it = self._cp.get(hexlify(oid)) + _, tp, _ = next(it) + data = b''.join(it) + if tp == b'blob': + continue + yield oid, tp, data + else: # *should* be impossible to reach (3-bits) for anything but 0 + pm = path_msg(self._idx.name) + raise Exception(f'Invalid object type {kind} in {pm} at {idx}\n') + +def validate(argv): + o = options.Options(optspec) + opt, flags, extra = o.parse_bytes(argv[1:]) + + if extra: + o.fatal("no arguments expected") + + git.check_repo_or_die() + + sys.stdout.flush() + out = byte_stream(sys.stdout) + cp = git.cp() + ret = EXIT_TRUE + with git.PackIdxList(git.repo(b'objects/pack')) as mi: + idxlist = glob.glob(path.join(git.repo(b'objects/pack'), b'*.idx')) + obj_n = 0 + for idxname in idxlist: + with git.open_idx(idxname) as idx: + obj_n += idx.nsha + obj_i = 0 + for idxname in idxlist: + with git.open_idx(idxname) as idx, Pack(idx, cp) as pack: + for oid, tp, data in pack: + # bup doesn't generate tag objects + if tp == b'tag': + out.flush() + sys.stderr.flush() + log(f'warning: skipping tag object {oid.hex()}\n') + reprogress() + continue + if tp == b'tree': + shalist = (x[2] for x in git.tree_decode(data)) + elif tp == b'commit': + commit = git.parse_commit(data) + shalist = map(unhexlify, commit.parents + [commit.tree]) + else: + raise Exception(f'unexpected object type {tp}') + for suboid in shalist: + if not mi.exists(suboid): + out.write(b'no %s for %s\n' + % (hexlify(suboid), hexlify(oid))) + ret = EXIT_FALSE + reprogress() + obj_i += idx.nsha + obj_frac = obj_i / obj_n + qprogress(f'scanned {obj_i}/{obj_n} {obj_frac:.2%}\r') + return ret + +def main(argv): + wrap_boolean_main(lambda: validate(argv)) diff -Nru bup-0.33.2/lib/bup/cmd/validate_ref_links.py bup-0.33.7/lib/bup/cmd/validate_ref_links.py --- bup-0.33.2/lib/bup/cmd/validate_ref_links.py 1970-01-01 00:00:00.000000000 +0000 +++ bup-0.33.7/lib/bup/cmd/validate_ref_links.py 2025-01-08 20:04:11.000000000 +0000 @@ -0,0 +1,63 @@ + +from bup import git, options, vfs +from bup.compat import argv_bytes +from bup.gc import count_objects, find_live_objects +from bup.helpers import EXIT_FALSE, EXIT_TRUE, log, wrap_boolean_main +from bup.io import path_msg +from bup.repo import LocalRepo + + +optspec = """ +bup validate-ref-links [ref...] +-- +v,verbose increase log output (can be used more than once) +""" + +def validate(argv): + o = options.Options(optspec) + opt, flags, extra = o.parse_bytes(argv[1:]) + verbosity = opt.verbose + + git.check_repo_or_die() + cat_pipe = git.cp() + + ref_missing = 0 + ref_info = [] + with LocalRepo() as repo: + for ref in [argv_bytes(x) for x in extra]: + # FIXME: unify with other commands and git: vfs:, etc. + res = vfs.try_resolve(repo, ref, want_meta=False) + # FIXME: if symlink, error(dangling) + # FIXME: IOError ENOTDIR ELOOP + _, leaf = res[-1] + if not leaf: + log(f'missing {path_msg(ref)}') + ref_missing += 1 + continue + kind = type(leaf) + # FIXME: Root Tags FakeLink + if kind in (vfs.Item, vfs.Chunky, vfs.RevList): + ref_info.append((ref, leaf.oid)) + elif kind == vfs.Commit: + ref_info.append((ref, leaf.coid)) + else: + o.fatal(f"can't currently handle VFS {kind} for {path_msg(ref)}") + + found_missing = 0 + # Wanted all refs, or at least some specified weren't missing + if not extra or (extra and ref_info): + existing_count = count_objects(git.repo(b'objects/pack'), verbosity) + if verbosity: + log(f'found {existing_count} objects\n') + + if existing_count: + with git.PackIdxList(git.repo(b'objects/pack')) as idxl: + live_objects, live_trees, found_missing = \ + find_live_objects(existing_count, cat_pipe, idxl, refs=ref_info, + verbosity=verbosity, count_missing=True) + live_objects.close() + + return EXIT_FALSE if (ref_missing + found_missing) else EXIT_TRUE + +def main(argv): + wrap_boolean_main(lambda: validate(argv)) diff -Nru bup-0.33.2/lib/bup/cmd/web.py bup-0.33.7/lib/bup/cmd/web.py --- bup-0.33.2/lib/bup/cmd/web.py 2023-07-01 20:08:43.000000000 +0000 +++ bup-0.33.7/lib/bup/cmd/web.py 2025-01-08 20:04:11.000000000 +0000 @@ -116,7 +116,7 @@ def decode_argument(self, value, name=None): if name == 'path': return value - return super(BupRequestHandler, self).decode_argument(value, name) + return super().decode_argument(value, name) def get(self, path): return self._process_request(path) diff -Nru bup-0.33.2/lib/bup/compat.py bup-0.33.7/lib/bup/compat.py --- bup-0.33.2/lib/bup/compat.py 2023-07-01 20:08:43.000000000 +0000 +++ bup-0.33.7/lib/bup/compat.py 2025-01-08 20:04:11.000000000 +0000 @@ -6,25 +6,21 @@ from shlex import quote import os, sys +ver = sys.version_info + def hexstr(b): """Return hex string (not bytes as with hexlify) representation of b.""" return b.hex() -def reraise(ex): - raise ex.with_traceback(sys.exc_info()[2]) - -# These three functions (add_ex_tb, add_ex_ctx, and pending_raise) are -# vestigial, and code that uses them can probably be rewritten more -# simply now that we require Python versions that automatically -# populate the tracebacks and automatically chain pending exceptions. - -def add_ex_tb(ex): - """Do nothing (already handled by Python 3 infrastructure).""" - return ex - -def add_ex_ctx(ex, context_ex): - """Do nothing (already handled by Python 3 infrastructure).""" - return ex +if (ver.major, ver.minor) >= (3, 10): + from itertools import pairwise +else: + def pairwise(coll): + it = iter(coll) + x = next(it, None) + for y in it: + yield x, y + x = y class pending_raise: """If rethrow is true, rethrow ex (if any), unless the body throws. diff -Nru bup-0.33.2/lib/bup/gc.py bup-0.33.7/lib/bup/gc.py --- bup-0.33.2/lib/bup/gc.py 2023-07-01 20:08:43.000000000 +0000 +++ bup-0.33.7/lib/bup/gc.py 2025-01-08 20:04:11.000000000 +0000 @@ -1,16 +1,18 @@ -from __future__ import absolute_import from binascii import hexlify, unhexlify +from contextlib import ExitStack +from itertools import chain from os.path import basename import glob, os, subprocess, sys, tempfile from bup import bloom, git, midx from bup.compat import hexstr, pending_raise from bup.git import MissingObject, walk_object -from bup.helpers import Nonlocal, log, progress, qprogress +from bup.helpers import \ + Nonlocal, log, note_error, progress, qprogress, reprogress from bup.io import path_msg -# This garbage collector uses a Bloom filter to track the live objects +# This garbage collector uses a Bloom filter to track the live blobs # during the mark phase. This means that the collection is # probabilistic; it may retain some (known) percentage of garbage, but # it can also work within a reasonable, fixed RAM budget for any @@ -19,21 +21,24 @@ # The collection proceeds as follows: # # - Scan all live objects by walking all of the refs, and insert -# every hash encountered into a new Bloom "liveness" filter. -# Compute the size of the liveness filter based on the total -# number of objects in the repository. This is the "mark phase". +# every blob encountered into a new Bloom filter. Compute the +# size of the filter based on the total number of objects in the +# repository. Insert all other object hashes into a set. This +# set and the Bloom filter, taken together, are the "liveness +# filter". This is the "mark phase". # -# - Clear the data that's dependent on the repository's object set, -# i.e. the reflog, the normal Bloom filter, and the midxes. +# - Clear the data that's dependent on the repository's object +# collection, i.e. the reflog, the normal Bloom filter, and the +# midxes. # # - Traverse all of the pack files, consulting the liveness filter # to decide which objects to keep. # -# For each pack file, rewrite it iff it probably contains more -# than (currently) 10% garbage (computed by an initial traversal -# of the packfile in consultation with the liveness filter). To -# rewrite, traverse the packfile (again) and write each hash that -# tests positive against the liveness filter to a packwriter. +# For each pack file, rewrite it if it contains a tree or commit +# that is now garbage, or if it probably contains more than +# (currently) 10% garbage. To rewrite, traverse the packfile +# (again) and write each hash that tests positive against the +# liveness filter to a packwriter. # # During the traversal of all of the packfiles, delete redundant, # old packfiles only after the packwriter has finished the pack @@ -54,17 +59,35 @@ indexes = glob.glob(os.path.join(dir, b'*.idx')) for i, idx_name in enumerate(indexes): if verbosity: - log('found %d objects (%d/%d %s)\r' - % (object_count, i + 1, len(indexes), - path_msg(basename(idx_name)))) + qprogress('found %d objects (%d/%d %s)\r' + % (object_count, i + 1, len(indexes), + path_msg(basename(idx_name)))) with git.open_idx(idx_name) as idx: object_count += len(idx) return object_count +def report_missing(ref_name, item, verbosity): + chunks = item.chunk_path + if chunks: + path = chain(item.path, chunks) + else: + # Top commit, for example has none. + if item.path: + demangled = git.demangle_name(item.path[-1], item.mode)[0] + path = chain(item.path[:-1], [demangled]) + else: + path = item.path + ref = path_msg(ref_name) + path = path_msg(b'/'.join(path)) + if item.type == b'tree': + note_error(f'missing {ref}:{path}/\n') + else: + note_error(f'missing {ref}:{path}\n') + + def report_live_item(n, total, ref_name, ref_id, item, verbosity): status = 'scanned %02.2f%%' % (n * 100.0 / total) - hex_id = hexstr(ref_id) dirslash = b'/' if item.type == b'tree' else b'' chunk_path = item.chunk_path @@ -73,7 +96,7 @@ return ps = b'/'.join(item.path) chunk_ps = b'/'.join(chunk_path) - log('%s %s:%s/%s%s\n' % (status, hex_id, path_msg(ps), + log('%s %s:%s/%s%s\n' % (status, path_msg(ref_name), path_msg(ps), path_msg(chunk_ps), path_msg(dirslash))) return @@ -85,56 +108,66 @@ if demangled: ps = b'/'.join(item.path[:-1] + [demangled]) if verbosity == 1: - qprogress('%s %s:%s%s\r' % (status, hex_id, path_msg(ps), + qprogress('%s %s:%s%s\r' % (status, path_msg(ref_name), path_msg(ps), path_msg(dirslash))) elif (verbosity > 1 and item.type == b'tree') \ or (verbosity > 2 and item.type == b'blob'): - log('%s %s:%s%s\n' % (status, hex_id, path_msg(ps), + log('%s %s:%s%s\n' % (status, path_msg(ref_name), path_msg(ps), path_msg(dirslash))) + reprogress() elif verbosity > 3: ps = b'/'.join(item.path) - log('%s %s:%s%s\n' % (status, hex_id, path_msg(ps), path_msg(dirslash))) + log('%s %s:%s%s\n' % (status, path_msg(ref_name), path_msg(ps), path_msg(dirslash))) -def find_live_objects(existing_count, cat_pipe, verbosity=0): - prune_visited_trees = True # In case we want a command line option later +def find_live_objects(existing_count, cat_pipe, idx_list, refs=None, + verbosity=0, count_missing=False): pack_dir = git.repo(b'objects/pack') ffd, bloom_filename = tempfile.mkstemp(b'.bloom', b'tmp-gc-', pack_dir) os.close(ffd) # FIXME: allow selection of k? # FIXME: support ephemeral bloom filters (i.e. *never* written to disk) - live_objs = bloom.create(bloom_filename, expected=existing_count, k=None) - # live_objs will hold on to the fd until close or exit - os.unlink(bloom_filename) - stop_at, trees_visited = None, None - if prune_visited_trees: - trees_visited = set() - stop_at = lambda x: unhexlify(x) in trees_visited - approx_live_count = 0 - for ref_name, ref_id in git.list_refs(): - for item in walk_object(cat_pipe.get, hexlify(ref_id), stop_at=stop_at, - include_data=None): - # FIXME: batch ids - if verbosity: - report_live_item(approx_live_count, existing_count, - ref_name, ref_id, item, verbosity) - if trees_visited is not None and item.type == b'tree': - trees_visited.add(item.oid) - if verbosity: - if not live_objs.exists(item.oid): - live_objs.add(item.oid) - approx_live_count += 1 - else: - live_objs.add(item.oid) - trees_visited = None - if verbosity: - log('expecting to retain about %.2f%% unnecessary objects\n' - % live_objs.pfalse_positive()) - return live_objs + live_blobs = bloom.create(bloom_filename, expected=existing_count, k=None) + with ExitStack() as maybe_close_bloom: + maybe_close_bloom.enter_context(live_blobs) + # live_blobs will hold on to the fd until close or exit + os.unlink(bloom_filename) + live_trees = set() + stop_at = lambda x: unhexlify(x) in live_trees + oid_exists = (lambda oid: idx_list.exists(oid)) if idx_list else None + approx_live_count = 0 + missing = 0 + for ref_name, ref_id in refs if refs else git.list_refs(): + for item in walk_object(cat_pipe.get, hexlify(ref_id), + stop_at=stop_at, include_data=None, + oid_exists=oid_exists): + if item.data is False: + if count_missing: + report_missing(ref_name, item, verbosity) + missing += 1 + else: + raise MissingObject(item.oid) + # FIXME: batch ids + elif verbosity: + report_live_item(approx_live_count, existing_count, + ref_name, ref_id, item, verbosity) + if item.type != b'blob': + if verbosity and not item.oid in live_trees: + approx_live_count += 1 + live_trees.add(item.oid) + else: + if verbosity and not live_blobs.exists(item.oid): + approx_live_count += 1 + live_blobs.add(item.oid) + maybe_close_bloom.pop_all() + if count_missing: + return live_blobs, live_trees, missing + else: + return live_blobs, live_trees -def sweep(live_objects, existing_count, cat_pipe, threshold, compression, - verbosity): +def sweep(live_objects, live_trees, existing_count, cat_pipe, threshold, + compression, verbosity): # Traverse all the packs, saving the (probably) live data. ns = Nonlocal() @@ -142,11 +175,13 @@ def remove_stale_files(new_pack_prefix): if verbosity and new_pack_prefix: log('created ' + path_msg(basename(new_pack_prefix)) + '\n') + reprogress() for p in ns.stale_files: if new_pack_prefix and p.startswith(new_pack_prefix): continue # Don't remove the new pack file if verbosity: log('removing ' + path_msg(basename(p)) + '\n') + reprogress() os.unlink(p) if ns.stale_files: # So git cat-pipe will close them cat_pipe.restart() @@ -165,31 +200,45 @@ % ((float(collect_count) / existing_count) * 100)) with git.open_idx(idx_name) as idx: idx_live_count = 0 + must_rewrite = False + live_in_this_pack = set() for sha in idx: - if live_objects.exists(sha): + tmp_it = cat_pipe.get(hexlify(sha), include_data=False) + _, typ, _ = next(tmp_it) + if typ != b'blob': + is_live = sha in live_trees + if not is_live: + must_rewrite = True + else: + is_live = live_objects.exists(sha) + if is_live: idx_live_count += 1 + live_in_this_pack.add(sha) collect_count += idx_live_count if idx_live_count == 0: if verbosity: log('deleting %s\n' % path_msg(git.repo_rel(basename(idx_name)))) + reprogress() ns.stale_files.append(idx_name) ns.stale_files.append(idx_name[:-3] + b'pack') continue live_frac = idx_live_count / float(len(idx)) - if live_frac > ((100 - threshold) / 100.0): + if not must_rewrite and live_frac > ((100 - threshold) / 100.0): if verbosity: log('keeping %s (%d%% live)\n' % (git.repo_rel(basename(idx_name)), live_frac * 100)) + reprogress() continue if verbosity: log('rewriting %s (%.2f%% live)\n' % (basename(idx_name), live_frac * 100)) + reprogress() for sha in idx: - if live_objects.exists(sha): + if sha in live_in_this_pack: item_it = cat_pipe.get(hexlify(sha)) _, typ, _ = next(item_it) writer.just_write(sha, typ, b''.join(item_it)) @@ -221,34 +270,51 @@ / float(existing_count) * 100)) -def bup_gc(threshold=10, compression=1, verbosity=0): +def bup_gc(threshold=10, compression=1, verbosity=0, ignore_missing=False): cat_pipe = git.cp() existing_count = count_objects(git.repo(b'objects/pack'), verbosity) if verbosity: log('found %d objects\n' % existing_count) + reprogress() if not existing_count: if verbosity: log('nothing to collect\n') else: try: - live_objects = find_live_objects(existing_count, cat_pipe, - verbosity=verbosity) + with ExitStack() as maybe_close_idxl: + idxl = None + if ignore_missing: + idxl = git.PackIdxList(git.repo(b'objects/pack')) + maybe_close_idxl.enter_context(idxl) + found = find_live_objects(existing_count, cat_pipe, idxl, + verbosity=verbosity, + count_missing=ignore_missing) + live_objects, live_trees = found[:2] + if verbosity: + log('expecting to retain about %.2f%% unnecessary objects\n' + % live_objects.pfalse_positive()) + reprogress() except MissingObject as ex: log('bup: missing object %r \n' % hexstr(ex.oid)) sys.exit(1) with live_objects: - # FIXME: just rename midxes and bloom, and restore them at the end if - # we didn't change any packs? - packdir = git.repo(b'objects/pack') - if verbosity: log('clearing midx files\n') - midx.clear_midxes(packdir) - if verbosity: log('clearing bloom filter\n') - bloom.clear_bloom(packdir) - if verbosity: log('clearing reflog\n') - expirelog_cmd = [b'git', b'reflog', b'expire', b'--all', b'--expire=all'] - expirelog = subprocess.Popen(expirelog_cmd, env=git._gitenv()) - git._git_wait(b' '.join(expirelog_cmd), expirelog) - if verbosity: log('removing unreachable data\n') - sweep(live_objects, existing_count, cat_pipe, - threshold, compression, - verbosity) + try: + # FIXME: just rename midxes and bloom, and restore them at the end if + # we didn't change any packs? + packdir = git.repo(b'objects/pack') + if verbosity: log('clearing midx files\n') + midx.clear_midxes(packdir) + if verbosity: log('clearing bloom filter\n') + bloom.clear_bloom(packdir) + if verbosity: log('clearing reflog\n') + expirelog_cmd = [b'git', b'reflog', b'expire', b'--all', b'--expire=all'] + expirelog = subprocess.Popen(expirelog_cmd, env=git._gitenv()) + git._git_wait(b' '.join(expirelog_cmd), expirelog) + if verbosity: log('removing unreachable data\n') + sweep(live_objects, live_trees, existing_count, cat_pipe, + threshold, compression, + verbosity) + except BaseException as ex: + log('WARNING: Collection interrupted. Run gc (again) to completion before\n' + 'WARNING: adding any new data to the repository (e.g. via save or get).\n') + raise ex diff -Nru bup-0.33.2/lib/bup/git.py bup-0.33.7/lib/bup/git.py --- bup-0.33.2/lib/bup/git.py 2023-07-01 20:08:43.000000000 +0000 +++ bup-0.33.7/lib/bup/git.py 2025-01-08 20:04:11.000000000 +0000 @@ -16,8 +16,7 @@ from bup.compat import (buffer, byte_int, bytes_from_byte, bytes_from_uint, environ, - pending_raise, - reraise) + pending_raise) from bup.io import path_msg from bup.helpers import (Sha1, add_error, chunkyreader, debug1, debug2, exo, @@ -32,6 +31,7 @@ temp_dir, unlink, utc_offset_str) +from bup.midx import open_midx verbose = 0 @@ -50,6 +50,7 @@ def _gitenv(repo_dir=None): + # This is not always used, i.e. sometimes we just use --git-dir if not repo_dir: repo_dir = repo() return merge_dict(environ, {b'GIT_DIR': os.path.abspath(repo_dir)}) @@ -69,9 +70,10 @@ def git_config_get(option, repo_dir=None, opttype=None, cfg_file=None): assert not (repo_dir and cfg_file), "repo_dir and cfg_file cannot both be used" - cmd = [b'git', b'config', b'--null'] if cfg_file: - cmd.extend([b'--file', cfg_file]) + cmd = [b'git', b'config', b'--file', cfg_file, b'--null'] + else: + cmd = [b'git', b'--git-dir', repo_dir or repo(), b'config', b'--null'] if opttype == 'int': cmd.extend([b'--int']) elif opttype == 'bool': @@ -80,10 +82,7 @@ assert opttype is None cmd.extend([b'--get', option]) env=None - if repo_dir: - env = _gitenv(repo_dir=repo_dir) - p = subprocess.Popen(cmd, stdout=subprocess.PIPE, env=env, - close_fds=True) + p = subprocess.Popen(cmd, stdout=subprocess.PIPE, close_fds=True) # with --null, git writes out a trailing \0 after the value r = p.stdout.read()[:-1] rc = p.wait() @@ -411,7 +410,7 @@ class PackIdxV1(PackIdx): """Object representation of a Git pack index (version 1) file.""" def __init__(self, filename, f): - super(PackIdxV1, self).__init__() + super().__init__() self.closed = False self.name = filename self.idxnames = [self.name] @@ -451,6 +450,11 @@ for ofs in range(start, start + 24 * self.nsha, 24): yield self.map[ofs : ofs + 20] + def oid_offsets_and_idxs(self): + end = self.sha_ofs + self.nsha * 24 + for i, ofs in enumerate(range(self.sha_ofs, end, 24)): + yield struct.unpack_from('!I', self.map, offset=ofs)[0], i + def close(self): self.closed = True if self.map is not None: @@ -465,7 +469,7 @@ class PackIdxV2(PackIdx): """Object representation of a Git pack index (version 2) file.""" def __init__(self, filename, f): - super(PackIdxV2, self).__init__() + super().__init__() self.closed = False self.name = filename self.idxnames = [self.name] @@ -491,16 +495,19 @@ def __len__(self): return int(self.nsha) # int() from long for python 2 + def _oid_ofs_from_ofs32_ofs(self, ofs32_ofs): + ofs32 = struct.unpack_from('!I', self.map, offset=ofs32_ofs)[0] + if ofs32 & 0x80000000: + ofs64_i = ofs32 & 0x7fffffff + ofs64_ofs = self.ofs64table_ofs + ofs64_i * 8 + return struct.unpack_from('!Q', self.map, offset=ofs64_ofs)[0] + return ofs32 + def _ofs_from_idx(self, idx): if idx >= self.nsha or idx < 0: raise IndexError('invalid pack index index %d' % idx) - ofs_ofs = self.ofstable_ofs + idx * 4 - ofs = struct.unpack_from('!I', self.map, offset=ofs_ofs)[0] - if ofs & 0x80000000: - idx64 = ofs & 0x7fffffff - ofs64_ofs = self.ofs64table_ofs + idx64 * 8 - ofs = struct.unpack_from('!Q', self.map, offset=ofs64_ofs)[0] - return ofs + ofs32_ofs = self.ofstable_ofs + idx * 4 + return self._oid_ofs_from_ofs32_ofs(ofs32_ofs) def _idx_to_hash(self, idx): if idx >= self.nsha or idx < 0: @@ -520,6 +527,11 @@ self.map.close() self.map = None + def oid_offsets_and_idxs(self): + end = self.ofstable_ofs + self.nsha * 4 + for i, ofs32_ofs in enumerate(range(self.ofstable_ofs, end, 4)): + yield self._oid_ofs_from_ofs32_ofs(ofs32_ofs), i + def __del__(self): assert self.closed @@ -639,20 +651,20 @@ d[os.path.join(self.dir, name)] = ix for full in midxes: if not d.get(full): - mx = midx.PackMidx(full) - (mxd, mxf) = os.path.split(mx.name) - broken = False - for n in mx.idxnames: - if not os.path.exists(os.path.join(mxd, n)): + mx, missing = None, None + try: + mx = open_midx(full, ignore_missing=False) + except midx.MissingIdxs as ex: + missing = ex.paths + if not missing: + if mx: midxl.append(mx) + else: + mxd, mxf = os.path.split(full) + for n in missing: log(('warning: index %s missing\n' ' used by %s\n') % (path_msg(n), path_msg(mxf))) - broken = True - if broken: - mx.close() unlink(full) - else: - midxl.append(mx) midxl.sort(key=lambda ix: (-len(ix), -xstat.stat(ix.name).st_mtime)) for ix in midxl: @@ -667,7 +679,7 @@ d[ix.name] = ix for name in ix.idxnames: d[os.path.join(self.dir, name)] = ix - elif not ix.force_keep: + else: debug1('midx: removing redundant: %s\n' % path_msg(os.path.basename(ix.name))) ix.close() @@ -711,25 +723,35 @@ def open_idx(filename): - if filename.endswith(b'.idx'): - f = open(filename, 'rb') + if not filename.endswith(b'.idx'): # why is this enforced *here*? + raise GitError('pack idx filenames must end with .idx') + f = open(filename, 'rb') + with ExitStack() as contexts: + contexts.enter_context(f) header = f.read(8) if header[0:4] == b'\377tOc': version = struct.unpack('!I', header[4:8])[0] if version == 2: + contexts.pop_all() return PackIdxV2(filename, f) else: raise GitError('%s: expected idx file version 2, got %d' % (path_msg(filename), version)) elif len(header) == 8 and header[0:4] < b'\377tOc': + contexts.pop_all() return PackIdxV1(filename, f) else: raise GitError('%s: unrecognized idx file header' % path_msg(filename)) + + +def open_object_idx(filename): + if filename.endswith(b'.idx'): + return open_idx(filename) elif filename.endswith(b'.midx'): - return midx.PackMidx(filename) + return open_midx(filename) else: - raise GitError('idx filenames must end with .idx or .midx') + raise GitError('pack index filenames must end with .idx or .midx') def idxmerge(idxlist, final_progress=True): @@ -832,7 +854,7 @@ try: f.write(oneblob) except IOError as e: - reraise(GitError(e)) + raise GitError(e) from e nw = len(oneblob) crc = zlib.crc32(oneblob) & 0xffffffff self._update_idx(sha, crc, nw) @@ -941,10 +963,10 @@ os.rename(tmpdir + b'/pack', nameprefix + b'.pack') os.rename(tmpdir + b'/idx', nameprefix + b'.idx') os.fsync(pfd) - if run_midx: - auto_midx(os.path.join(self.repo_dir, b'objects/pack')) if self.on_pack_finish: self.on_pack_finish(nameprefix) + if run_midx: + auto_midx(os.path.join(self.repo_dir, b'objects/pack')) return nameprefix finally: if tmpdir: @@ -1288,33 +1310,65 @@ def __init__(self, repo_dir = None): require_suitable_git() self.repo_dir = repo_dir - self.p = self.inprogress = None + self.p = self.pcheck = self.inprogress = None + + # probe for cat-file --batch-command + tmp = subprocess.Popen([b'git', b'cat-file', b'--batch-command'], + stdin=subprocess.DEVNULL, + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + close_fds=True, + env=_gitenv(self.repo_dir)) + tmp.wait(); + self.have_batch_command = tmp.returncode == 0 def close(self, wait=False): self.p, p = None, self.p + self.pcheck, pcheck = None, self.pcheck self.inprogress = None if p: try: p.stdout.close() finally: - # This will handle pending exceptions correctly once - # we drop py2 p.stdin.close() + if pcheck and pcheck != p: + try: + pcheck.stdout.close() + finally: + pcheck.stdin.close() if wait: - p.wait() - return p.returncode + if p: p.wait() + if pcheck: pcheck.wait() + if p and p.returncode: + return p.returncode + if pcheck and pcheck.returncode: + return pcheck.returncode + return 0 return None def restart(self): self.close() - self.p = subprocess.Popen([b'git', b'cat-file', b'--batch'], + self.p = subprocess.Popen([b'git', b'cat-file', + b'--batch-command' if self.have_batch_command else b'--batch'], stdin=subprocess.PIPE, stdout=subprocess.PIPE, close_fds = True, bufsize = 4096, env=_gitenv(self.repo_dir)) - def get(self, ref): + def _open_check(self): + if self.pcheck is not None: return + if self.have_batch_command: + self.pcheck = self.p + return + self.pcheck = subprocess.Popen([b'git', b'cat-file', b'--batch-check'], + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + close_fds = True, + bufsize = 4096, + env=_gitenv(self.repo_dir)) + + def get(self, ref, include_data=True): """Yield (oidx, type, size), followed by the data referred to by ref. If ref does not exist, only yield (None, None, None). @@ -1331,12 +1385,21 @@ assert ref.find(b'\r') < 0 assert not ref.startswith(b'-') self.inprogress = ref - self.p.stdin.write(ref + b'\n') - self.p.stdin.flush() - hdr = self.p.stdout.readline() + if include_data: + p = self.p + if self.have_batch_command: + p.stdin.write(b'contents ') + else: + self._open_check() + p = self.pcheck + if self.have_batch_command: + p.stdin.write(b'info ') + p.stdin.write(ref + b'\n') + p.stdin.flush() + hdr = p.stdout.readline() if not hdr: raise GitError('unexpected cat-file EOF (last request: %r, exit: %s)' - % (ref, self.p.poll() or 'none')) + % (ref, p.poll() or 'none')) if hdr.endswith(b' missing\n'): self.inprogress = None yield None, None, None @@ -1345,37 +1408,43 @@ if len(info) != 3 or len(info[0]) != 40: raise GitError('expected object (id, type, size), got %r' % info) oidx, typ, size = info + + if not include_data: + self.inprogress = None + yield oidx, typ, size + return + size = int(size) try: - it = chunkyreader(self.p.stdout, size) + it = chunkyreader(p.stdout, size) yield oidx, typ, size - for blob in chunkyreader(self.p.stdout, size): + for blob in chunkyreader(p.stdout, size): yield blob - readline_result = self.p.stdout.readline() + readline_result = p.stdout.readline() assert readline_result == b'\n' self.inprogress = None except Exception as ex: with pending_raise(ex): self.close() - def _join(self, it): + def _join(self, oid, path): + it = self.get(oid) _, typ, _ = next(it) if typ == b'blob': - for blob in it: - yield blob + yield from it elif typ == b'tree': treefile = b''.join(it) - for (mode, name, sha) in tree_decode(treefile): - for blob in self.join(hexlify(sha)): - yield blob + for mode, name, sha in tree_decode(treefile): + yield from self._join(hexlify(sha), path + [name]) elif typ == b'commit': treeline = b''.join(it).split(b'\n')[0] assert treeline.startswith(b'tree ') - for blob in self.join(treeline[5:]): - yield blob + yield from self._join(treeline[5:], path + [f'commit:{oid!r}']) + elif typ is None: + path += [repr(oid)] + raise GitError(f'missing ref at {path!r}') else: - raise GitError('invalid object type %r: expected blob/tree/commit' - % typ) + raise GitError(f'ref {oid!r} type {typ!r} is not blob/tree/commit') def join(self, id): """Generate a list of the content of all blobs that can be reached @@ -1383,8 +1452,7 @@ or a commit. The content of all blobs that can be seen from trees or commits will be added to the list. """ - for d in self._join(self.get(id)): - yield d + yield from self._join(id, []) _cp = {} @@ -1427,52 +1495,73 @@ KeyError.__init__(self, 'object %r is missing' % hexlify(oid)) -WalkItem = namedtuple('WalkItem', ['oid', 'type', 'mode', - 'path', 'chunk_path', 'data']) -# The path is the mangled path, and if an item represents a fragment -# of a chunked file, the chunk_path will be the chunked subtree path -# for the chunk, i.e. ['', '2d3115e', ...]. The top-level path for a -# chunked file will have a chunk_path of ['']. So some chunk subtree -# of the file '/foo/bar/baz' might look like this: -# -# item.path = ['foo', 'bar', 'baz.bup'] -# item.chunk_path = ['', '2d3115e', '016b097'] -# item.type = 'tree' -# ... - - -def walk_object(get_ref, oidx, stop_at=None, include_data=None): - """Yield everything reachable from oidx via get_ref (which must behave - like CatPipe get) as a WalkItem, stopping whenever stop_at(oidx) - returns true. Throw MissingObject if a hash encountered is - missing from the repository, and don't read or return blob content - in the data field unless include_data is set. +class WalkItem: + # The path is the mangled path, and if an item represents a fragment + # of a chunked file, the chunk_path will be the chunked subtree path + # for the chunk, i.e. ['', '2d3115e', ...]. The top-level path for a + # chunked file will have a chunk_path of ['']. So some chunk subtree + # of the file '/foo/bar/baz' might look like this: + # + # item.path = ['foo', 'bar', 'baz.bup'] + # item.chunk_path = ['', '2d3115e', '016b097'] + # item.type = 'tree' + # ... + __slots__ = 'oid', 'type', 'mode', 'path', 'chunk_path', 'data' + def __init__(self, *, oid, type, mode, path, chunk_path, data): + self.oid = oid + self.type = type + self.mode = mode + self.path = path + self.chunk_path = chunk_path + self.data = data + +def walk_object(get_ref, oidx, *, stop_at=None, include_data=None, + oid_exists=None): + """Yield everything reachable from oidx via get_ref (which must + behave like CatPipe get) as a WalkItem, stopping whenever + stop_at(oidx) returns logically true. Set the data field to False + when the object is missing, or None if the object exists but + include_data is logically false. Missing blobs may not be noticed + unless include_data is logically true or oid_exists(oid) is + provided. Yield items depth first, post-order, i.e. parents after + children. """ + # Maintain the pending stack on the heap to avoid stack overflow - pending = [(oidx, [], [], None)] + pending = [(oidx, [], [], None, None)] while len(pending): - oidx, parent_path, chunk_path, mode = pending.pop() - oid = unhexlify(oidx) + if isinstance(pending[-1], WalkItem): + yield pending.pop() + continue + + oidx, parent_path, chunk_path, mode, exp_typ = pending.pop() if stop_at and stop_at(oidx): continue - if (not include_data) and mode and stat.S_ISREG(mode): + oid = unhexlify(oidx) + + if (not include_data) and mode and exp_typ == b'blob': # If the object is a "regular file", then it's a leaf in # the graph, so we can skip reading the data if the caller # hasn't requested it. yield WalkItem(oid=oid, type=b'blob', chunk_path=chunk_path, path=parent_path, mode=mode, - data=None) + data=bool(oid_exists(oid)) if oid_exists else None) continue item_it = get_ref(oidx) get_oidx, typ, _ = next(item_it) if not get_oidx: - raise MissingObject(unhexlify(oidx)) + yield WalkItem(oid=unhexlify(oidx), type=exp_typ, + chunk_path=chunk_path, path=parent_path, + mode=mode, data=False) + continue if typ not in (b'blob', b'commit', b'tree'): raise Exception('unexpected repository object type %r' % typ) + if exp_typ and typ != exp_typ: + raise Exception(f'{oidx.decode("ascii")} object type {typ} != {exp_typ}') # FIXME: set the mode based on the type when the mode is None if typ == b'blob' and not include_data: @@ -1483,18 +1572,23 @@ else: data = b''.join(item_it) - yield WalkItem(oid=oid, type=typ, - chunk_path=chunk_path, path=parent_path, - mode=mode, - data=(data if include_data else None)) + item = WalkItem(oid=oid, type=typ, + chunk_path=chunk_path, path=parent_path, + mode=mode, + data=(data if include_data else None)) - if typ == b'commit': + if typ == b'blob': + yield item + elif typ == b'commit': + pending.append(item) commit_items = parse_commit(data) + # For now, all paths are rooted at the "nearest" commit for pid in commit_items.parents: - pending.append((pid, parent_path, chunk_path, mode)) - pending.append((commit_items.tree, parent_path, chunk_path, - hashsplit.GIT_MODE_TREE)) + pending.append((pid, [], [], mode, b'commit')) + pending.append((commit_items.tree, [oidx], [], + hashsplit.GIT_MODE_TREE, b'tree')) elif typ == b'tree': + pending.append(item) for mode, name, ent_id in tree_decode(data): demangled, bup_type = demangle_name(name, mode) if chunk_path: @@ -1507,4 +1601,5 @@ else: sub_chunk_path = chunk_path pending.append((hexlify(ent_id), sub_path, sub_chunk_path, - mode)) + mode, + b'tree' if stat.S_ISDIR(mode) else b'blob')) diff -Nru bup-0.33.2/lib/bup/helpers.py bup-0.33.7/lib/bup/helpers.py --- bup-0.33.2/lib/bup/helpers.py 2023-07-01 20:08:43.000000000 +0000 +++ bup-0.33.7/lib/bup/helpers.py 2025-01-08 20:04:11.000000000 +0000 @@ -1,6 +1,5 @@ """Helper functions and classes for bup.""" -from __future__ import absolute_import, division from collections import namedtuple from contextlib import ExitStack from ctypes import sizeof, c_void_p @@ -8,6 +7,7 @@ from os import environ from subprocess import PIPE, Popen from tempfile import mkdtemp +from traceback import print_exception from shutil import rmtree import sys, os, subprocess, errno, select, mmap, stat, re, struct import hashlib, heapq, math, operator, time @@ -21,6 +21,17 @@ from bup.options import _tty_width as tty_width +# EXIT_TRUE (just an alias) and EXIT_FALSE are intended for cases like +# POSIX grep or test, or bup's own "fsck --par2-ok", where the command +# is asking a question with a yes or no answer. Eventually all +# commands should avoid exiting with 1 for errors. + +EXIT_SUCCESS = 0 +EXIT_TRUE = 0 +EXIT_FALSE = 1 +EXIT_FAILURE = 2 + + buglvl = int(os.environ.get('BUP_DEBUG', 0)) @@ -33,6 +44,17 @@ return manager if manager is not None else nullcontext() +def getgroups(): + # cf. getgroups(2) - effective group id may or may not be in the + # list, and while on linux, for example, it normally is, in an + # unshare, it wasn't. + egid = os.getegid() + gids = os.getgroups() + if egid not in gids: + gids.append(egid) + return gids + + class finalized: def __init__(self, enter_result=None, finalize=None): assert finalize @@ -413,7 +435,7 @@ if sys.platform.startswith('cygwin'): def is_superuser(): # https://cygwin.com/ml/cygwin/2015-02/msg00057.html - groups = os.getgroups() + groups = getgroups() return 544 in groups or 0 in groups else: def is_superuser(): @@ -784,18 +806,18 @@ def _mmap_do(f, sz, flags, prot, close): - if not sz: - st = os.fstat(f.fileno()) - sz = st.st_size - if not sz: - # trying to open a zero-length map gives an error, but an empty - # string has all the same behaviour of a zero-length map, ie. it has - # no elements :) - return '' - map = io.mmap(f.fileno(), sz, flags, prot) - if close: - f.close() # map will persist beyond file close - return map + with ExitStack() as contexts: + if close: + contexts.enter_context(f) + if not sz: + st = os.fstat(f.fileno()) + sz = st.st_size + if not sz: + # trying to open a zero-length map gives an error, but an empty + # string has all the same behaviour of a zero-length map, ie. it has + # no elements :) + return b'' + return io.mmap(f.fileno(), sz, flags, prot) def mmap_read(f, sz = 0, close=True): @@ -933,6 +955,10 @@ saved_errors.append(e) log('%-70s\n' % e) +def note_error(m): + # FIXME: rework console output, logging, and api... + saved_errors.append(m) + log(m) def clear_errors(): global saved_errors @@ -986,11 +1012,11 @@ rows = len(l) // ncols for s in range(0, len(l), rows): cols.append(l[s:s+rows]) - out = nothing + out = [] fmt = b'%-*s' if binary else '%-*s' for row in zip(*cols): - out += prefix + nothing.join((fmt % (clen+2, s)) for s in row) + nl - return out + out.append(prefix + nothing.join((fmt % (clen+2, s)) for s in row) + nl) + return nothing.join(out) def parse_date_or_fatal(str, fatal): @@ -1192,7 +1218,7 @@ else: localtime = time.localtime def utc_offset_str(t): - return time.strftime(b'%z', localtime(t)) + return time.strftime('%z', localtime(t)).encode('ascii') def to_py_time(x): return x @@ -1233,3 +1259,23 @@ b'w': 60 * 60 * 24 * 7, b'm': 60 * 60 * 24 * 31, b'y': 60 * 60 * 24 * 366}[scale] + + +def wrap_boolean_main(main): + # FIXME: this will eventually migrate to main.py when we clean up + # exit statuses. It's here for now because we need to make sure + # we only exit EXIT_TRUE for main() EXIT_TRUE, since other code + # may not respect the distinction, and because python's default + # error exit status is 1. See also compat.wrap_main(). + try: + rc = main() + except KeyboardInterrupt: # preserve existing behavior for now + sys.exit(130) + except SystemExit as ex: + sys.exit(EXIT_FAILURE if ex.code == EXIT_TRUE else ex.code) + except BaseException as ex: + print_exception(ex) + sys.exit(EXIT_FAILURE) + if rc: + sys.exit(rc) + die_if_errors(status=EXIT_FAILURE) diff -Nru bup-0.33.2/lib/bup/io.py bup-0.33.7/lib/bup/io.py --- bup-0.33.2/lib/bup/io.py 2023-07-01 20:08:43.000000000 +0000 +++ bup-0.33.7/lib/bup/io.py 2025-01-08 20:04:11.000000000 +0000 @@ -37,17 +37,17 @@ def close(self): self._bup_closed = True - super(mmap, self).close() + super().close() if hasattr(py_mmap.mmap, '__enter__'): def __enter__(self): - super(mmap, self).__enter__() + super().__enter__() return self def __exit__(self, type, value, traceback): # Don't call self.close() when the parent has its own __exit__; # defer to it. self._bup_closed = True - result = super(mmap, self).__exit__(type, value, traceback) + result = super().__exit__(type, value, traceback) return result else: def __enter__(self): diff -Nru bup-0.33.2/lib/bup/main.py bup-0.33.7/lib/bup/main.py --- bup-0.33.2/lib/bup/main.py 2023-07-01 20:08:43.000000000 +0000 +++ bup-0.33.7/lib/bup/main.py 2025-01-08 20:04:11.000000000 +0000 @@ -18,13 +18,10 @@ from bup import compat, path, helpers from bup.compat import ( - add_ex_ctx, - add_ex_tb, environ, fsdecode, wrap_main ) -from bup.compat import add_ex_tb, add_ex_ctx, wrap_main from bup.helpers import ( columnate, handle_ctrl_c, @@ -129,6 +126,7 @@ do_profile = False bup_dir = None args = args[1:] +subcmd = None while args: arg = args[0] if arg in (b'-?', b'--help'): @@ -155,7 +153,7 @@ else: break -subcmd = args +subcmd = subcmd or args # Make BUP_DIR absolute, so we aren't affected by chdir (i.e. save -C, etc.). if bup_dir: @@ -178,9 +176,11 @@ usage() try: - cmd_module = import_module('bup.cmd.' - + subcmd_name.decode('ascii').replace('-', '_')) + cmd_module_name = 'bup.cmd.' + subcmd_name.decode('ascii').replace('-', '_') + cmd_module = import_module(cmd_module_name) except ModuleNotFoundError as ex: + if ex.name != cmd_module_name: + raise ex cmd_module = None if not cmd_module: @@ -189,7 +189,7 @@ usage('error: unknown command "%s"' % path_msg(subcmd_name)) already_fixed = int(environ.get(b'BUP_FORCE_TTY', 0)) -if subcmd_name in [b'mux', b'ftp', b'help']: +if subcmd_name in (b'mux', b'ftp', b'help', b'fuse'): fix_stdout = False fix_stderr = False else: @@ -238,7 +238,6 @@ srcs = tuple(srcs) dest_for = dict(zip(srcs, dests)) pending = {} - pending_ex = None try: while srcs: ready_fds, _, _ = select.select(srcs, [], []) @@ -262,21 +261,19 @@ if split[0]: pending.setdefault(fd, []).extend(split) except BaseException as ex: - pending_ex = add_ex_ctx(add_ex_tb(ex), pending_ex) - try: + pending_ex = ex # Try to finish each of the streams - for fd, pending_items in pending.items(): - dest = dest_for[fd] - width = tty_width() - try: - print_clean_line(dest, pending_items, width) - except (EnvironmentError, EOFError) as ex: - pending_ex = add_ex_ctx(add_ex_tb(ex), pending_ex) - except BaseException as ex: - pending_ex = add_ex_ctx(add_ex_tb(ex), pending_ex) - if pending_ex: - raise pending_ex - + try: + for fd, pending_items in pending.items(): + dest = dest_for[fd] + width = tty_width() + try: + print_clean_line(dest, pending_items, width) + except (EnvironmentError, EOFError) as ex: + ex.__cause__ = pending_ex + pending_ex = ex + finally: + raise pending_ex def import_and_run_main(module, args): if do_profile: @@ -299,7 +296,6 @@ dests = [] real_out_fd = real_err_fd = stdout_pipe = stderr_pipe = None filter_thread = filter_thread_started = None - pending_ex = None try: if fix_stdout: sys.stdout.flush() @@ -321,42 +317,29 @@ filter_thread.start() filter_thread_started = True import_and_run_main(module, args) - except Exception as ex: - add_ex_tb(ex) - pending_ex = ex - raise finally: # Try to make sure that whatever else happens, we restore # stdout and stderr here, if that's possible, so that we don't - # risk just losing some output. - try: - real_out_fd is not None and os.dup2(real_out_fd, sys.stdout.fileno()) - except Exception as ex: - add_ex_tb(ex) - add_ex_ctx(ex, pending_ex) - try: - real_err_fd is not None and os.dup2(real_err_fd, sys.stderr.fileno()) - except Exception as ex: - add_ex_tb(ex) - add_ex_ctx(ex, pending_ex) - # Kick filter loose - try: - stdout_pipe is not None and os.close(stdout_pipe[1]) - except Exception as ex: - add_ex_tb(ex) - add_ex_ctx(ex, pending_ex) - try: - stderr_pipe is not None and os.close(stderr_pipe[1]) - except Exception as ex: - add_ex_tb(ex) - add_ex_ctx(ex, pending_ex) + # risk just losing some output. Nest the finally blocks so we + # try each one no matter what happens, and accumulate alll + # exceptions in the pending exception __context__. try: + try: + try: + try: + real_out_fd is not None and \ + os.dup2(real_out_fd, sys.stdout.fileno()) + finally: + real_err_fd is not None and \ + os.dup2(real_err_fd, sys.stderr.fileno()) + finally: + # Kick filter loose + stdout_pipe is not None and os.close(stdout_pipe[1]) + finally: + stderr_pipe is not None and os.close(stderr_pipe[1]) + finally: close_catpipes() - except Exception as ex: - add_ex_tb(ex) - add_ex_ctx(ex, pending_ex) - if pending_ex: - raise pending_ex + # There's no point in trying to join unless we finished the finally block. if filter_thread_started: filter_thread.join() @@ -394,14 +377,10 @@ filter_output(srcs, dests) return p.wait() except BaseException as ex: - add_ex_tb(ex) - try: - if p and p.poll() == None: - os.kill(p.pid, signal.SIGTERM) - p.wait() - except BaseException as kill_ex: - raise add_ex_ctx(add_ex_tb(kill_ex), ex) - raise ex + if p and p.poll() == None: + os.kill(p.pid, signal.SIGTERM) + p.wait() + raise def run_subcmd(module, args): diff -Nru bup-0.33.2/lib/bup/metadata.py bup-0.33.7/lib/bup/metadata.py --- bup-0.33.2/lib/bup/metadata.py 2023-07-01 20:08:43.000000000 +0000 +++ bup-0.33.7/lib/bup/metadata.py 2025-01-08 20:04:11.000000000 +0000 @@ -14,7 +14,8 @@ from bup import vint, xstat from bup.drecurse import recursive_dirlist -from bup.helpers import add_error, mkdirp, log, is_superuser, format_filesize +from bup.helpers import \ + add_error, mkdirp, log, is_superuser, format_filesize, getgroups from bup.io import path_msg from bup.pwdgrp import pwd_from_uid, pwd_from_name, grp_from_gid, grp_from_name from bup.xstat import utime, lutime @@ -427,7 +428,7 @@ if entry: gid = entry.gr_gid else: # not superuser - only consider changing the group/gid - user_gids = os.getgroups() + user_gids = getgroups() if self.gid in user_gids: gid = self.gid if not restore_numeric_ids and self.gid != 0: @@ -741,6 +742,7 @@ 'path', 'size', 'symlink_target', 'hardlink_target', 'linux_attr', 'linux_xattr', 'posix1e_acl') self.mode = self.uid = self.gid = self.user = self.group = None + self.rdev = None self.atime = self.mtime = self.ctime = None # optional members self.path = None @@ -757,6 +759,7 @@ if self.mtime != other.mtime: return False if self.ctime != other.ctime: return False if self.atime != other.atime: return False + if self.rdev != other.rdev: return False if self.path != other.path: return False if self.uid != other.uid: return False if self.gid != other.gid: return False @@ -777,6 +780,7 @@ self.mtime, self.ctime, self.atime, + self.rdev, self.path, self.uid, self.gid, @@ -802,6 +806,8 @@ result += ' user:' + repr(self.user) if self.group is not None: result += ' group:' + repr(self.group) + if self.rdev is not None: + result += ' rdev:' + repr(self.group) if self.size is not None: result += ' size:' + repr(self.size) for name, val in (('atime', self.atime), diff -Nru bup-0.33.2/lib/bup/midx.py bup-0.33.7/lib/bup/midx.py --- bup-0.33.2/lib/bup/midx.py 2023-07-01 20:08:43.000000000 +0000 +++ bup-0.33.7/lib/bup/midx.py 2025-01-08 20:04:11.000000000 +0000 @@ -1,5 +1,5 @@ -from __future__ import absolute_import, print_function +from contextlib import ExitStack import glob, os, struct from bup import _helpers @@ -8,6 +8,7 @@ from bup.io import path_msg +MIDX_HEADER = b'MIDX' MIDX_VERSION = 4 extract_bits = _helpers.extract_bits @@ -15,49 +16,64 @@ _total_steps = 0 +def _midx_header(mmap): return mmap[0:4] +def _midx_version(mmap): return struct.unpack('!I', mmap[4:8])[0] + + +class MissingIdxs(Exception): + __slots__ = 'paths', + def __init__(self, *, paths): + super().__init__() + self.paths = paths + class PackMidx: - """Wrapper which contains data from multiple index files. - Multiple index (.midx) files constitute a wrapper around index (.idx) files - and make it possible for bup to expand Git's indexing capabilities to vast - amounts of files. + """Wrapper which contains data from multiple index files. Create + via open_midx(), not PackMidx(). Multiple index (.midx) files + constitute a wrapper around index (.idx) files and make it + possible for bup to expand Git's indexing capabilities to vast + amounts of files. This class only supports the current + MIDX_VERSION. + """ - def __init__(self, filename): - self.closed = False - self.name = filename - self.force_keep = False - self.map = None - assert(filename.endswith(b'.midx')) - self.map = mmap_read(open(filename)) - if self.map[0:4] != b'MIDX': - log('Warning: skipping: invalid MIDX header in %r\n' - % path_msg(filename)) - self.force_keep = True - self._init_failed() - return - ver = struct.unpack('!I', self.map[4:8])[0] - if ver < MIDX_VERSION: - log('Warning: ignoring old-style (v%d) midx %r\n' - % (ver, path_msg(filename))) - self.force_keep = False # old stuff is boring - self._init_failed() - return - if ver > MIDX_VERSION: - log('Warning: ignoring too-new (v%d) midx %r\n' - % (ver, path_msg(filename))) - self.force_keep = True # new stuff is exciting - self._init_failed() - return - - self.bits = _helpers.firstword(self.map[8:12]) - self.entries = 2**self.bits - self.fanout_ofs = 12 - # fanout len is self.entries * 4 - self.sha_ofs = self.fanout_ofs + self.entries * 4 - self.nsha = self._fanget(self.entries - 1) - # sha table len is self.nsha * 20 - self.which_ofs = self.sha_ofs + 20 * self.nsha - # which len is self.nsha * 4 - self.idxnames = self.map[self.which_ofs + 4 * self.nsha:].split(b'\0') + def __init__(self, filename, mmap, *, _internal=False): + """Takes ownership of mmap.""" + self.closed = finished = False + try: + self.map = mmap + assert _internal, 'call open_midx()' + assert _midx_header(mmap) == MIDX_HEADER + assert _midx_version(mmap) == MIDX_VERSION + self.name = filename + self.bits = _helpers.firstword(self.map[8:12]) + self.entries = 2**self.bits + self.fanout_ofs = 12 + # fanout len is self.entries * 4 + self.sha_ofs = self.fanout_ofs + self.entries * 4 + self.nsha = self._fanget(self.entries - 1) + # sha table len is self.nsha * 20 + self.which_ofs = self.sha_ofs + 20 * self.nsha + # which len is self.nsha * 4 + self.idxnames = self.map[self.which_ofs + 4 * self.nsha:].split(b'\0') + idxdir = os.path.dirname(filename) + missing = [] + for name in self.idxnames: + if not os.path.exists(os.path.join(idxdir, name)): + missing.append(name) + if missing: + raise MissingIdxs(paths=missing) + finished = True + finally: + if not finished: self.close() + + def close(self): + # This must be able to handle __init__ partial initializations too. + if not self.closed: + self.closed = True + self.fanout = self.shatable = self.whichlist = self.idxnames = None + tmp = getattr(self, 'map', None) + if tmp is not None: + self.map = None + tmp.close() def __enter__(self): return self @@ -66,11 +82,6 @@ with pending_raise(value, rethrow=False): self.close() - def _init_failed(self): - self.bits = 0 - self.entries = 1 - self.idxnames = [] - def _fanget(self, i): if i >= self.entries * 4 or i < 0: raise IndexError('invalid midx index %d' % i) @@ -92,13 +103,6 @@ def _get_idxname(self, i): return self.idxnames[self._get_idx_i(i)] - def close(self): - self.closed = True - if self.map is not None: - self.fanout = self.shatable = self.whichlist = self.idxnames = None - self.map.close() - self.map = None - def __del__(self): assert self.closed @@ -145,6 +149,48 @@ return int(self.nsha) +def open_midx(path, *, ignore_missing=True): + """Return a PackMidx for path. Return None if path exists but is + either too old or too new. If any of the constituent indexes are + missing, raise MissingIdxs if ignore_missing is false otherwise + return None. + + """ + # FIXME: eventually note_error when not raising? + assert path.endswith(b'.midx') # FIXME: wanted/needed? + mmap = mmap_read(open(path)) + with ExitStack() as contexts: + contexts.enter_context(mmap) + if _midx_header(mmap) != MIDX_HEADER: + pathm = path_msg(path) + log(f'Warning: skipping: invalid MIDX header in {pathm}\n') + return None + ver = _midx_version(mmap) + if ver == MIDX_VERSION: + if not ignore_missing: + contexts.pop_all() + return PackMidx(path, mmap, _internal=True) + missing = None + contexts.pop_all() + try: + midx = PackMidx(path, mmap, _internal=True) + except MissingIdxs as ex: + missing = ex.paths + if not missing: + return midx + pathm = path_msg(path) + for idx in missing: + idxm = path_msg(idx) + log(f'Warning: ignoring midx {pathm} (missing idx {idxm})\n') + return None + pathm = path_msg(path) + if ver < MIDX_VERSION: + log(f'Warning: ignoring old-style (v{ver}) midx {pathm}\n') + elif ver > MIDX_VERSION: + log(f'Warning: ignoring too-new (v{ver}) midx {pathm}\n') + return None + + def clear_midxes(dir=None): for midx in glob.glob(os.path.join(dir, b'*.midx')): os.unlink(midx) diff -Nru bup-0.33.2/lib/bup/source_info.py bup-0.33.7/lib/bup/source_info.py --- bup-0.33.2/lib/bup/source_info.py 2023-07-01 20:08:43.000000000 +0000 +++ bup-0.33.7/lib/bup/source_info.py 2025-01-08 20:04:11.000000000 +0000 @@ -1,3 +1,3 @@ -commit='b1c19fb0142ac4bdc60b07640734e0c2d256f423' -date='2023-07-01 15:08:43 -0500' +commit='8ba4ff35130be0114e689bb6b745499bdd5299c9' +date='2025-01-08 14:04:11 -0600' modified=False diff -Nru bup-0.33.2/lib/bup/version.py bup-0.33.7/lib/bup/version.py --- bup-0.33.2/lib/bup/version.py 2023-07-01 20:08:43.000000000 +0000 +++ bup-0.33.7/lib/bup/version.py 2025-01-08 20:04:11.000000000 +0000 @@ -1,6 +1,4 @@ -from __future__ import absolute_import, print_function - from bup import source_info try: import bup.checkout_info as checkout_info @@ -22,7 +20,7 @@ # The ~ in a version is a Debian-style "always less than" marker: # https://www.debian.org/doc/debian-policy/ch-controlfields.html#version -base_version = b'0.33.2' +base_version = b'0.33.7' version = base_version if version.endswith(b'~'): diff -Nru bup-0.33.2/lib/bup/vfs.py bup-0.33.7/lib/bup/vfs.py --- bup-0.33.2/lib/bup/vfs.py 2023-07-01 20:08:43.000000000 +0000 +++ bup-0.33.7/lib/bup/vfs.py 2025-01-08 20:04:11.000000000 +0000 @@ -677,7 +677,7 @@ # Assumes the tree is properly formed, i.e. there are no # duplicates, and entries will be in git tree order. - if isinstance(names, (frozenset, set)): + if not isinstance(names, (frozenset, set)): names = frozenset(names) remaining = len(names) @@ -855,7 +855,7 @@ return # Assumes no duplicate refs - if isinstance(names, (frozenset, set)): + if not isinstance(names, (frozenset, set)): names = frozenset(names) remaining = len(names) last_name = max(names) diff -Nru bup-0.33.2/lib/cmd/bup.c bup-0.33.7/lib/cmd/bup.c --- bup-0.33.2/lib/cmd/bup.c 2023-07-01 20:08:43.000000000 +0000 +++ bup-0.33.7/lib/cmd/bup.c 2025-01-08 20:04:11.000000000 +0000 @@ -201,8 +201,6 @@ # define PROC_SELF_EXE "/proc/self/exe" #elif defined(__sun) || defined (sun) # define PROC_SELF_EXE "/proc/self/path/a.out" -#else -# define PROC_SELF_EXE NULL #endif static char *find_in_path(const char * const name, const char * const path) @@ -286,40 +284,40 @@ static char *exe_parent_dir(const char * const argv_0) { - if (PROC_SELF_EXE != NULL) { - char sbuf[2048]; - char *path = sbuf; - size_t path_n = sizeof(sbuf); - ssize_t len; - while (1) { - len = readlink(PROC_SELF_EXE, path, path_n); - if (len == -1 || (size_t) len != path_n) - break; - if (!INT_MULTIPLY_OK(path_n, 2, &path_n)) - die(2, "memory buffer for executable path would be too big\n"); - if (path != sbuf) free(path); - path = malloc(path_n); - if (!path) - die(2, "unable to allocate memory for executable path\n"); - } - if (len != -1) { - path[len] = '\0'; - char *result = strdup(dirname(path)); - if (path != sbuf) - free(path); - return result; - } - switch (errno) { - case ENOENT: case EACCES: case EINVAL: case ELOOP: case ENOTDIR: - case ENAMETOOLONG: - break; - default: - die(2, "cannot resolve %s: %s\n", path, strerror(errno)); +#ifdef PROC_SELF_EXE + char sbuf[2048]; + char *path = sbuf; + size_t path_n = sizeof(sbuf); + ssize_t len; + while (1) { + len = readlink(PROC_SELF_EXE, path, path_n); + if (len == -1 || (size_t) len != path_n) break; - } + if (!INT_MULTIPLY_OK(path_n, 2, &path_n)) + die(2, "memory buffer for executable path would be too big\n"); + if (path != sbuf) free(path); + path = malloc(path_n); + if (!path) + die(2, "unable to allocate memory for executable path\n"); + } + if (len != -1) { + path[len] = '\0'; + char *result = strdup(dirname(path)); if (path != sbuf) free(path); + return result; } + switch (errno) { + case ENOENT: case EACCES: case EINVAL: case ELOOP: case ENOTDIR: + case ENAMETOOLONG: + break; + default: + die(2, "cannot resolve %s: %s\n", path, strerror(errno)); + break; + } + if (path != sbuf) + free(path); +#endif return find_exe_parent(argv_0); } diff -Nru bup-0.33.2/note/0.32.1-from-0.32.md bup-0.33.7/note/0.32.1-from-0.32.md --- bup-0.33.2/note/0.32.1-from-0.32.md 1970-01-01 00:00:00.000000000 +0000 +++ bup-0.33.7/note/0.32.1-from-0.32.md 2025-01-08 20:04:11.000000000 +0000 @@ -0,0 +1,38 @@ + +Notable changes in 0.32.1 since 0.32 +==================================== + +Bugs +---- + +* POSIX1e ACLs should be restored more correctly now. + + Previously bup incorrectly restored default (`ACL_TYPE_DEFAULT`) + ACLs as access acls (`ACL_TYPE_ACCESS`). When both existed, it + restored the access ACL first and then the default ACL as an access + ACL. Now, bup should restore each with the proper type. This issue + only affects saves created on platforms where bup currently supports + ACLs, so presumably mostly just saves created on Linux since the + current ACL support depends on non-standard functions like + `acl_extended(3)`. + + There is one remaining issue, which isn't fixed in this release, but + is fixed in 0.33.1 (because fixing it here could create saves that + are backward incompatible with 0.33). + + The problem is that in this version and older versions, bup stores + ACLs in the `acl_to_any_text(3)` format with a newline delimiter, + when the standard (and `acl_from_text(3)` which restore depends on) + requires commas. This may cause restores that include ACLs (likely + only those from Linux right now) to fail on some platforms + (e.g. Cygwin). + +Build and install +----------------- + +* pytest 7 is now supported. + +Thanks to (at least) +==================== + +Johannes Berg and Rob Browning diff -Nru bup-0.33.2/note/0.32.2-from-0.32.1.md bup-0.33.7/note/0.32.2-from-0.32.1.md --- bup-0.33.2/note/0.32.2-from-0.32.1.md 1970-01-01 00:00:00.000000000 +0000 +++ bup-0.33.7/note/0.32.2-from-0.32.1.md 2025-01-08 20:04:11.000000000 +0000 @@ -0,0 +1,26 @@ + +Notable changes in 0.32.2 since 0.32.1 +====================================== + +Bugs +---- + +* `bup gc` should no longer remove stale packfiles before it creates + the new midx at the end of a run. This could cause midx to complain + about missing files, e.g.: + + warning: index pack-....idx missing + +Build system +------------ + +* The automated FreeBSD tests have moved from 12.1 to 12.4 and to + Python 3.9, and have dropped testing of Python 2. + +* The automated macOS tests have moved from Catalina to Monterey and + from x86 to ARM. + +Thanks to (at least) +==================== + +Johannes Berg and Rob Browning diff -Nru bup-0.33.2/note/0.33.3-from-0.33.2.md bup-0.33.7/note/0.33.3-from-0.33.2.md --- bup-0.33.2/note/0.33.3-from-0.33.2.md 1970-01-01 00:00:00.000000000 +0000 +++ bup-0.33.7/note/0.33.3-from-0.33.2.md 2025-01-08 20:04:11.000000000 +0000 @@ -0,0 +1,57 @@ + +Notable changes in 0.33.3 since 0.33.2 +====================================== + +General +------- + +* The performance of `bup ls` when columnating a large number of + results should be *much* better. + +Bugs +---- + +* `bup fuse` should no longer linger after its mount point has been + unmounted: https://bugs.debian.org/1050040 + +* `bup --version` should work again. + +* `bup gc` should no longer remove stale packfiles before it creates + the new midx at the end of a run. This could cause midx to complain + about missing files, e.g.: + + warning: index pack-....idx missing + +* `bup` will no longer (since 0.33) ignore `BUP_DIR` looking for a + `pack.packSizeLimit`. + +* A VFS optimization of the handling of requests for specific names + within a given tree has been restored. + +* Incorrect uses of some exception handling code designed for + compatibility with Python 2 have been replaced with the built-in + Python 3 equivalents. + +* Some client calls to `super(CLASS, self)` have been replaced by + `super()` to avoid errors during `__del__`. + +Build system +------------ + +* `test-help` should no longer hang when run interactively, and should + no longer be affected by the current locale. Previously it might + fail in a non-C locale. + +* The `bup damage` invocation in `test-fsck` has been made much more + aggressive in order to avoid transient failures caused both by + unpredictable `par2` behavior and the (currently) probabilistic + nature of the damage. + +* OpenBSD builds should no longer fail on `-Werror` related issues + involving `PROC_SELF_EXE`. + +Thanks to (at least) +==================== + +Alain Cochard, Austin Schuh, Johannes Berg, Julian Smith, Lucas +Nussbaum, Nix, Rob Browning, Rob Leslie, and Robert Edmonds diff -Nru bup-0.33.2/note/0.33.4-from-0.33.3.md bup-0.33.7/note/0.33.4-from-0.33.3.md --- bup-0.33.2/note/0.33.4-from-0.33.3.md 1970-01-01 00:00:00.000000000 +0000 +++ bup-0.33.7/note/0.33.4-from-0.33.3.md 2025-01-08 20:04:11.000000000 +0000 @@ -0,0 +1,44 @@ +Notable changes in 0.33.4 since 0.33.3 +====================================== + +May require attention +--------------------- + +* The `par2` command (invoked by `bup fsck -g`) may generate empty + recovery files if interrupted (say via C-c). To mitigate this, bup + now runs `par2` in a temporary directory, and only moves the + recovery files into place if the generation succeeds. It will also + look for any empty par2 files, or incomplete sets, associated with + packfiles that it has been asked to examine. If found, they will + provoke an error. See also + https://github.com/Parchive/par2cmdline/issues/84 + +* Previously, any `bup on REMOTE ...` commands that attempted to read + from standard input (for example `bup on HOST split < something` or + `bup on HOST split --git-ids ...`) would read nothing instead of the + original content of standard input. Now those commands will either + reject the invocation, or just crash when they attempt to read a + closed stream. + +* When building bup, if your version of GNU make is not named `make`, + prefer invoking it directly (e.g. `gmake -j4 check`) instead of + relying on the ./Makefile to redirect to GNU make. Some non-GNU + makes now break this redirection. For example, if you include a + `-j` option in your build command one of them adds a `-J` to + `MAKEFLAGS` that GNU make rejects. In future releases, we're likely + to drop the redirection entirely. + +Bugs +---- + +* `bup-config(5)` has been added, and mentions that at the moment `bup + on REMOTE ...` incorrectly reads the `pack.packSizeLimit` from the + `REMOTE` repository. + +* `bup fsck` no longer requires a repository via `BUP_DIR`, `-d`, + etc. when paths are provided on the command line. + +Thanks to (at least) +==================== + +Ed Maste, Greg Troxel, Johannes Berg, and Rob Browning diff -Nru bup-0.33.2/note/0.33.5-from-0.33.4.md bup-0.33.7/note/0.33.5-from-0.33.4.md --- bup-0.33.2/note/0.33.5-from-0.33.4.md 1970-01-01 00:00:00.000000000 +0000 +++ bup-0.33.7/note/0.33.5-from-0.33.4.md 2025-01-08 20:04:11.000000000 +0000 @@ -0,0 +1,156 @@ +Notable changes in 0.33.5 since 0.33.4 +====================================== + +May require attention +--------------------- + +* Problems have been discovered that could have allowed the creation + of incomplete trees or commits, for example `bup save` or `bup get` + could have created saves with missing data. This should no longer + be possible, but any existing incomplete trees might also be re-used + by `bup get` (for example), and so represent a continuing hazard. + Note that if you've never used `bup gc` or `bup get`, then we don't + currently believe your repositories could have been affected. + + You can detect whether you've been affected by running + `bup-validate-object-links(1)`. If it doesn't report any broken + links (as `no HASH for PARENT_HASH`), then you can stop here, the + repository should be fine. You can also run `bup midx -af` first, + which may speed up the validation. + + If it does report broken links, then you should run `bup gc --ignore + missing` to completion before making any further additions to the + repository. But first, if you have other repositories that might + still contain the missing objects, then you may want to try to + retrieve them. See "repopulate missing objects" below for details. + `bup gc --ignore-missing` will eliminate some of the hazards and + report at least one of the paths to each missing object. + + If `gc` doesn't report any broken paths (missing objects), then you + can stop here, the repository should be fine. + + If `gc` does report broken paths, then you should clear the related + indexes, e.g. `bup index --clear` or `bup on HOST index --clear`, + etc. + + If you don't rely on `bup get` (e.g. if you only `save`) then + clearing the index(es) should ensure that new saves will be complete + (though existing broken saves will remain structurally broken for + now). + + If you do rely on `bup get`, and if `gc` reports broken paths, then + there's not yet an easy way to ensure `get` won't continue to re-use + incomplete trees when building new saves. For now, you could start + a new repository (and save the old one for future repairs). + + You can try to repopulate missing objects from the source (or some + other) repository. To do so, you can collect a list of missing + objects via `bup validate-object-links`: + + bup validate-object-links | tee validate-out + grep -E '^no ' validate-out | cut -d' ' -f 2 | sort -u > missing-objects + sed -e 's/^/--unnamed git:/' missing-objects > unnamed-objects + + and then try to retrieve the missing objects from another repository + via `bup get`. For example, perhaps: + + xargs bup get --source repo --unnamed --ignore-missing < unnamed-objects + + or + + xargs bup on HOST get --unnamed --ignore-missing < unnamed-objects + + After that, you can run `bup validate-object-links` to see whether + you were able to fix all of the broken references (i.e. whether it + still reports missing objects). + + If you have enough missing objects, it's possible xargs might split + the argument list between `--unnamed` and its argument, causing + `get` to fail. If so, you can just specify an even numbered value + for `xargs -n`, for example `xargs -n 64 bup get ...`. + On most systems, you can choose a much larger `n`. + + If you would just like to validate some saves, you can now run `bup + validate-ref-links SAVE...` which should be much more efficient than + attempting a restore or joining the saves to /dev/null. + + We're also working on a command that will repair the structure of + any existing broken trees so that commands like restore will still + be able to work with them. + + See issue/missing-objects.md for a detailed explanation of the + problem. If you have pandoc and graphviz dot installed, this will + be rendered to issue/missing-objects.html which you can open in a + browser, or you can find it + [here](https://bup.github.io/issue/missing-objects.html). + +General +------- + +* `bup validate-object-links` has been added. This command scans the + objects in the repository and reports any "broken links" it finds, + i.e. any links from a tree or commit in the repository to an object + that doesn't exist. + +* `bup validate-ref-links` has been added. This command traverses + repository references (e.g. saves) and logs paths to missing + objects, i.e. references from a tree or commit to an object that + doesn't exist in the repository. At the moment, it will report at + least one path to each missing object; it does not attempt to find + all of the paths. + +* `bup gc` now provides `--ignore-missing` which allows a `gc` + operation to continue after encountering objects that are missing + from the repository. + +* `bup join` now reports the path to any missing object it encounters. + +Bugs +---- + +* `bup gc` should no longer risk leaving the repository with + incomplete tree or commit objects -- trees or commits with + references to objects that are no longer in the repository. + + Previously this could happen because the collection was + probabilistic with respect to all object types, and so it could + leave (completely orphaned) vestigial commits or trees that referred + to objects that had been removed. It could also do this if the + `--threshold` caused it to keep a parent in one "live enough" pack, + while discarding a descendant in a pack that doesn't cross the + threshold. + + These objects can cause serious trouble because they can be re-used + as-is (without noticing that they are incomplete) by other commands + like `bup get`. + +* `bup get` should no longer be able to leave the repository with + incomplete trees or commits if it's interrupted at the wrong time. + Previously it fetched objects "top down", and so if it was + interrupted after the parent tree/commit was written to the + repository, but before all the children were written, then the + repository would be left with an incomplete tree. + +* `bup` should always ignore midx files that refer to missing indexes. + Previously it might not notice when objects had disappeared (via + `gc`) which could, in particular, cause remote/client operations + like a remote save to decide that the repository already contained + data that it did not. + +* `bup midx` `--auto` and `--force` now delete midx files that refer + to missing indexes. + +* `bup gc` should no longer throw bloom close-related exceptions when + interrupted. + +Build system +------------ + +* [Graphviz](https://graphviz.org) `dot` is optional, but must be + available in order to render the figures referred to by + issue/missing-objects.md. + +Thanks to (at least) +==================== + +Greg Troxel, Johannes Berg, and Rob Browning diff -Nru bup-0.33.2/note/0.33.6-from-0.33.5.md bup-0.33.7/note/0.33.6-from-0.33.5.md --- bup-0.33.2/note/0.33.6-from-0.33.5.md 1970-01-01 00:00:00.000000000 +0000 +++ bup-0.33.7/note/0.33.6-from-0.33.5.md 2025-01-08 20:04:11.000000000 +0000 @@ -0,0 +1,22 @@ +Notable changes in 0.33.6 since 0.33.5 +====================================== + +Bugs +---- + +* `bup` should now always include the effective group ID when + retrieving groups via getgroups(2) (often via python + `os.getgroups()`). That call is not required to include the + effective group ID, and in the Debian buildds' current sbuild + unshares, it doesn't. + +Build system +------------ + +* `make install` should now place bup-config(5) in the correct + sectional directory. + +Thanks to (at least) +==================== + +Johannes Berg, Michael Tokarev, Rob Browning, and Robert Edmonds diff -Nru bup-0.33.2/note/0.33.7-from-0.33.6.md bup-0.33.7/note/0.33.7-from-0.33.6.md --- bup-0.33.2/note/0.33.7-from-0.33.6.md 1970-01-01 00:00:00.000000000 +0000 +++ bup-0.33.7/note/0.33.7-from-0.33.6.md 2025-01-08 20:04:11.000000000 +0000 @@ -0,0 +1,18 @@ +Notable changes in 0.33.7 since 0.33.6 +====================================== + +Bugs +---- + +* When `FILE` has no directory component, i.e. no `/`, `bup midx + --check FILE` should now look in the current directory for the + constituent indexes, rather than `/`. + +* `bup` should no longer crash when opening a midx file with missing + indexes. + + +Thanks to (at least) +==================== + +Greg Troxel, Johannes Berg, MichaƂ Karol, and Rob Browning diff -Nru bup-0.33.2/pytest bup-0.33.7/pytest --- bup-0.33.2/pytest 2023-07-01 20:08:43.000000000 +0000 +++ bup-0.33.7/pytest 2025-01-08 20:04:11.000000000 +0000 @@ -7,5 +7,8 @@ script_home="$(cd "$(dirname "$0")" && pwd -P)" testlibdir="$script_home/test/lib" +export BUP_DIR=/dev/null +export GIT_DIR=/dev/null + export PYTHONPATH="$testlibdir${PYTHONPATH:+:$PYTHONPATH}" exec dev/bup-python -m pytest -v -m 'not release' "$@" diff -Nru bup-0.33.2/test/ext/conftest.py bup-0.33.7/test/ext/conftest.py --- bup-0.33.2/test/ext/conftest.py 2023-07-01 20:08:43.000000000 +0000 +++ bup-0.33.7/test/ext/conftest.py 2025-01-08 20:04:11.000000000 +0000 @@ -1,4 +1,5 @@ +import os from pathlib import Path from subprocess import CalledProcessError import pytest, subprocess, sys @@ -9,12 +10,12 @@ # Handle all test-* files as wvtest protocol subprocesses # cf. https://docs.pytest.org/en/latest/example/nonpython.html -# version_tuple was added in 7 -use_node_path = hasattr(pytest, 'version_tuple') +# version_tuple was added in 7.0 +pytest_ver = getattr(pytest, 'version_tuple', None) class BupSubprocFailure(Exception): def __init__(self, msg, cmd, status, failures): - super(BupSubprocFailure, self).__init__(msg) + super().__init__(msg) self.cmd = cmd self.status = status self.failures = failures @@ -22,7 +23,7 @@ class BupSubprocTestRunner(pytest.Item): def __init__(self, name, parent): - super(BupSubprocTestRunner, self).__init__(name, parent) + super().__init__(name, parent) def runtest(self): cmd = str(self.fspath) @@ -75,16 +76,21 @@ except AttributeError: yield BupSubprocTestRunner('', self) -def pytest_collect_file(parent, path): - base = path.basename - if base.startswith('test-') and not base.endswith('~'): - if use_node_path: - item = BupSubprocTestFile.from_parent(parent, path=Path(path)) - else: - try: - item = BupSubprocTestFile.from_parent(parent, fspath=path) - except AttributeError: - item = BupSubprocTestFile(path, parent) - if base == 'test-release-archive': +def _collect_item(item): + name = os.path.basename(item.name) + if name.startswith('test-') and not name.endswith('~'): + if name == 'test-release-archive': item.add_marker(pytest.mark.release) return item + +if pytest_ver: # 7+ + def pytest_collect_file(parent, file_path): + item = BupSubprocTestFile.from_parent(parent, path=file_path) + return _collect_item(item) +else: + def pytest_collect_file(parent, path): + try: + item = BupSubprocTestFile.from_parent(parent, fspath=path) + except AttributeError: + item = BupSubprocTestFile(path, parent) + return _collect_item(item) diff -Nru bup-0.33.2/test/ext/test-fsck bup-0.33.7/test/ext/test-fsck --- bup-0.33.2/test/ext/test-fsck 2023-07-01 20:08:43.000000000 +0000 +++ bup-0.33.7/test/ext/test-fsck 2025-01-08 20:04:11.000000000 +0000 @@ -42,10 +42,52 @@ WVFAIL bup fsck --quick -j4 WVPASS bup damage "$BUP_DIR"/objects/pack/*.pack -n10 -s1024 --percent 0.4 -S0 WVFAIL bup fsck --quick -WVFAIL bup fsck --quick -rvv -j99 # fails because repairs were needed + +# Fails because repairs were needed or we don't have a suitable par2 +WVFAIL bup fsck --quick -rvv -j9 + if bup fsck --par2-ok; then WVPASS bup fsck -r # ok because of repairs from last time - WVPASS bup damage "$BUP_DIR"/objects/pack/*.pack -n202 -s1 --equal -S0 + + some_idx="$(WVPASS find "$BUP_DIR" -name "pack-*.par2" \! -name "*.vol*.par2" | head -1)" || exit $? + some_vol="$(WVPASS find "$BUP_DIR" -name "pack-*.vol*.par2" | head -1)" || exit $? + + WVPASS cp -p "$some_idx" some-pack.par2 + WVPASS cp -p "$some_vol" some-pack.vol.par2 + + WVSTART 'fsck rejects empty par2 index files' + WVPASS echo -n > "$some_idx" + WVFAIL bup fsck -v + WVPASS test -e "$some_idx" -a ! -s "$some_idx" + WVFAIL bup fsck -vr + WVPASS test -e "$some_idx" -a ! -s "$some_idx" + WVFAIL bup fsck -vg + WVPASS test -e "$some_idx" -a ! -s "$some_idx" + WVPASS cp -p some-pack.par2 "$some_idx" + + WVSTART 'fsck rejects empty par2 vol files' + WVPASS echo -n > "$some_vol" + WVFAIL bup fsck -v + WVPASS test -e "$some_vol" -a ! -s "$some_vol" + WVFAIL bup fsck -vr + WVPASS test -e "$some_vol" -a ! -s "$some_vol" + WVFAIL bup fsck -vg + WVPASS test -e "$some_vol" -a ! -s "$some_vol" + WVPASS cp -p some-pack.vol.par2 "$some_vol" + + # This must do "too much" damage. Currently par2 is invoked with + # -c200, which should allow up to 200 damaged "blocks", but since + # we don't specify the block size, it's dynamically computed. + # Even if we did specify a size, the actual size appears to be + # affected by the input file sizes, and the specific behavior + # doesn't appear to be documented/promised -- see par2 + # comandline.cpp. Also worth noting that bup damage's output is + # currently probabilistic, so it might not actually damage any + # given byte. For now, just try to overdo it -- randomly change + # (or not 1/256th of the time) 600 evenly spaced bytes in each + # pack file. + WVPASS bup damage "$BUP_DIR"/objects/pack/*.pack -n600 -s1 --equal -S0 + WVFAIL bup fsck WVFAIL bup fsck -rvv # too many errors to be repairable WVFAIL bup fsck -r # too many errors to be repairable diff -Nru bup-0.33.2/test/ext/test-gc bup-0.33.7/test/ext/test-gc --- bup-0.33.2/test/ext/test-gc 2023-07-01 20:08:43.000000000 +0000 +++ bup-0.33.7/test/ext/test-gc 2025-01-08 20:04:11.000000000 +0000 @@ -175,7 +175,7 @@ WVPASS compare-trees src-ab/ "$tmpdir/restore/latest/" -WVSTART "gc (threshold)" +WVSTART "gc (threshold 0)" WVPASS rm -rf "$BUP_DIR" WVPASS bup init @@ -185,60 +185,55 @@ WVPASS bup index src WVPASS bup save -n src-1 src -WVPASS rm src/0 -WVPASS bup index src -WVPASS bup save -n src-2 src -WVPASS bup rm --unsafe src-1 -packs_before="$(ls "$BUP_DIR/objects/pack/"*.pack)" || exit $? -WVPASS bup gc -v $GC_OPTS --threshold 99 2>&1 | tee gc.log -packs_after="$(ls "$BUP_DIR/objects/pack/"*.pack)" || exit $? -WVPASSEQ 0 "$(grep -cE '^rewriting ' gc.log)" -WVPASSEQ "$packs_before" "$packs_after" - -WVPASS bup gc -v $GC_OPTS --threshold 1 2>&1 | tee gc.log -packs_after="$(ls "$BUP_DIR/objects/pack/"*.pack)" || exit $? +pack_contents_before="$(git show-index < "$BUP_DIR/objects/pack/"*.idx | cut -d' ' -f2- | sort)" || exit $? +WVPASS bup gc -v $GC_OPTS --threshold 0 2>&1 | tee gc.log +pack_contents_after="$(git show-index < "$BUP_DIR/objects/pack/"*.idx | cut -d' ' -f2- | sort)" || exit $? +# Check that the pack was rewritten or a new pack written, but +# with the same objects. Note that the name of the pack will +# likely change as the *order* of objects is different. The +# "git show-index | cut | sort" ignores the offsets but checks +# the object and their crc. WVPASSEQ 1 "$(grep -cE '^rewriting ' gc.log)" +WVPASSEQ "$pack_contents_before" "$pack_contents_after" -# Check that only one pack was rewritten - -# Accommodate some systems that apparently used to change the default -# ls sort order which must match LC_COLLATE for comm to work. -packs_before="$(sort <(echo "$packs_before"))" || die $? -packs_after="$(sort <(echo "$packs_after"))" || die $? - -only_in_before="$(comm -2 -3 <(echo "$packs_before") <(echo "$packs_after"))" \ - || die $? - -only_in_after="$(comm -1 -3 <(echo "$packs_before") <(echo "$packs_after"))" \ - || die $? - -in_both="$(comm -1 -2 <(echo "$packs_before") <(echo "$packs_after"))" || die $? - -WVPASSEQ 1 $(echo "$only_in_before" | wc -l) -WVPASSEQ 1 $(echo "$only_in_after" | wc -l) -WVPASSEQ 1 $(echo "$in_both" | wc -l) -WVSTART "gc (threshold 0)" +WVSTART "gc (--ignore-missing)" WVPASS rm -rf "$BUP_DIR" WVPASS bup init WVPASS rm -rf src && mkdir src WVPASS echo 0 > src/0 WVPASS echo 1 > src/1 +WVPASS mkdir src/victim +WVPASS echo 2 > src/victim/2 +WVPASS bup index src +WVPASS bup save -n src --strip src + +root_bupm="$(WVPASS git cat-file -t "$(git rev-parse src:.bupm)")" || exit $? +WVPASS echo 3 > src/3 WVPASS bup index src -WVPASS bup save -n src-1 src +WVPASS bup save -n src --strip src -pack_contents_before="$(git show-index < "$BUP_DIR/objects/pack/"*.idx | cut -d' ' -f2- | sort)" || exit $? -WVPASS bup gc -v $GC_OPTS --threshold 0 2>&1 | tee gc.log -pack_contents_after="$(git show-index < "$BUP_DIR/objects/pack/"*.idx | cut -d' ' -f2- | sort)" || exit $? -# Check that the pack was rewritten or a new pack written, but -# with the same objects. Note that the name of the pack will -# likely change as the *order* of objects is different. The -# "git show-index | cut | sort" ignores the offsets but checks -# the object and their crc. -WVPASSEQ 1 "$(grep -cE '^rewriting ' gc.log)" -WVPASSEQ "$pack_contents_before" "$pack_contents_after" +last_save="$(WVPASS bup ls -s src | tail -n -2 | head -n 1 | cut -d ' ' -f 2)" || exit $? +WVPASS bup rm --unsafe src/"$last_save" + +# Drop the victim tree +WVPASS git ls-tree src | grep victim | WVPASS cut -d' ' -f 3 \ + | WVPASS cut -b -40 > victim-oid +WVPASS test -n "$(&1 | tee gc.log +WVPASSEQ 1 "$(WVPASS grep -cE '^rewriting ' gc.log)" +WVPASSEQ 1 "$(WVPASS grep -cE '^missing ' gc.log)" + +obj_n_after="$(WVPASS git cat-file --batch-all-objects --batch-check='%(objectname)' | wc -l)" || exit $? +if test "$root_bupm" = blob; then + WVPASSEQ 5 $obj_n_after +else + WVPASSEQ 7 $obj_n_after +fi WVPASS rm -rf "$tmpdir" diff -Nru bup-0.33.2/test/ext/test-gc-removes-incomplete-trees bup-0.33.7/test/ext/test-gc-removes-incomplete-trees --- bup-0.33.2/test/ext/test-gc-removes-incomplete-trees 1970-01-01 00:00:00.000000000 +0000 +++ bup-0.33.7/test/ext/test-gc-removes-incomplete-trees 2025-01-08 20:04:11.000000000 +0000 @@ -0,0 +1,75 @@ +#!/usr/bin/env bash +. ./wvtest-bup.sh + +set -o pipefail + +top="$(WVPASS pwd)" || exit $? +tmpdir="$(WVPASS wvmktempdir)" || exit $? + +export BUP_DIR="$tmpdir/bup" +export GIT_DIR="$tmpdir/bup" + +bup() { "$top/bup" "$@"; } + +# In the past, gc treated all objects probabilistically (including +# trees and commits). This meant that it could leave tree fragments +# after a collection if the tree were split across packfiles and gc +# decided (based on the --threshold) to remove any of the packfiles +# containing children while keeping packfiles containing any of the +# parents. Other commands could then decide to re-use these +# incomplete trees without noticing that they still needed to fill in +# the holes. This is particularly easy to reproduce via get, which we +# do here. + +WVPASS cd "$tmpdir" +WVPASS bup init + +# Create a save that has two packfiles. The first contains mostly +# data we're going to drop and part of a smaller "straddle" tree we're +# going to keep. The second contains the rest of the straddle tree +# (including the "top"), and a much larger "hold" tree we're going to +# keep. +WVPASS git config pack.packSizeLimit 1000k +WVPASS mkdir -p src/{1-hold,2-straddle,3-transient} +WVPASS bup random -S 1 945k > src/3-transient/data +WVPASS bup random -S 2 100k > src/2-straddle/data +WVPASS bup random -S 3 945k > src/1-hold/data +WVPASS bup index src +WVPASS bup save -vv --strip -n src src + +(cd bup/objects/pack && ls -lrth *.pack) + +packs=($(cd bup/objects/pack && ls -rt *.pack)) +WVPASSEQ "${#packs[@]}" 2 + +transient_oid="$(WVPASS git rev-parse src:3-transient)" || exit $? +straddle_oid="$(WVPASS git rev-parse src:2-straddle)" || exit $? +hold_oid="$(WVPASS git rev-parse src:1-hold)" || exit $? + +WVPASS git show-index < bup/objects/pack/"$(basename ${packs[0]} .pack).idx" \ + | WVPASS grep -F "$transient_oid" + +WVPASS git show-index < bup/objects/pack/"$(basename ${packs[1]} .pack).idx" \ + | WVPASS grep -F "$straddle_oid" + +WVPASS git show-index < bup/objects/pack/"$(basename ${packs[1]} .pack).idx" \ + | WVPASS grep -F "$hold_oid" + +# Keep a safe copy of src in another repo +WVPASS bup -d bup-complete init +WVPASS bup save -r :bup-complete --strip -n src src + +# Promote hold to its own branch so we can drop everything else and gc +WVPASS bup get --append: src/latest/1-hold hold +WVPASS bup rm --unsafe src +WVPASS bup gc --unsafe -v --threshold 10 + +# Fetch src back from the safe copy via get. Because the pack with an +# internal part of straddle was dropped, the pack with hold (that was +# kept) has an incomplete straddle tree; get will happily re-use that +# when creating the local src without noticing it's incomplete, +# creating a broken src. +WVPASS bup get -s bup-complete --append src +WVPASS bup join "$(git rev-parse src)" > /dev/null + +WVPASS rm -rf "$tmpdir" diff -Nru bup-0.33.2/test/ext/test-get-missing bup-0.33.7/test/ext/test-get-missing --- bup-0.33.2/test/ext/test-get-missing 1970-01-01 00:00:00.000000000 +0000 +++ bup-0.33.7/test/ext/test-get-missing 2025-01-08 20:04:11.000000000 +0000 @@ -0,0 +1,49 @@ +#!/usr/bin/env bash +. ./wvtest-bup.sh + +set -o pipefail + +top="$(WVPASS pwd)" || exit $? +tmpdir="$(WVPASS wvmktempdir)" || exit $? + +export BUP_DIR="$tmpdir/bup" +export GIT_DIR="$tmpdir/bup" + +bup() { "$top/bup" "$@"; } + +WVPASS cd "$tmpdir" +WVPASS bup init + +WVPASS mkdir -p src/a +WVPASS echo 1 > src/a/1 +WVPASS echo 2 > src/a/2 +WVPASS echo 3 > src/a/3 +WVPASS bup index src +WVPASS bup save --strip -n src src + +src_oid="$(git rev-parse src)" + +WVPASS bup -d dest-repo init +WVPASS bup -d dest-repo get -s bup --unnamed "git:$src_oid" +WVPASS bup -d dest-repo join "$src_oid" > /dev/null +WVPASS rm -rf dest-repo + +WVPASS git ls-tree src:a | WVPASS cut -d' ' -f 3 \ + | WVPASS cut -b -40 | WVPASS head -1 > bupm-oid +WVPASS "$top/dev/perforate-repo" --drop-oids "$BUP_DIR" < bupm-oid + +WVPASS bup -d dest-repo init +WVFAIL bup -d dest-repo get -s bup --unnamed "git:$src_oid" 2>&1 | tee get.log +# For now... +WVPASS grep -E 'raise MissingObject' get.log +WVPASS rm -rf dest-repo + +WVPASS bup -d dest-repo init +WVFAIL bup -d dest-repo get --ignore-missing -s bup \ + --unnamed "git:$src_oid" 2>&1 \ + | tee get.log +WVPASSEQ 1 "$(grep -cF "skipping missing source object $( "$tmpdir/data" +WVFAIL bup -d "$bup_dir2" on - split -n foo < "$tmpdir/data" +WVPASS bup -d "$bup_dir2" on - split -n foo "$tmpdir/data" +WVPASS bup -d "$bup_dir2" join foo > "$tmpdir/data-joined" +WVPASS cmp -l "$tmpdir/data" "$tmpdir/data-joined" +WVPASSEQ $(WVPASS find "$BUP_DIR"/objects/pack -name "*.pack" | wc -l) 0 +WVPASS test $(WVPASS find "$bup_dir2"/objects/pack/*.pack | wc -l) -gt 2 + WVPASS rm -r "$tmpdir" diff -Nru bup-0.33.2/test/ext/test-save-data-race bup-0.33.7/test/ext/test-save-data-race --- bup-0.33.2/test/ext/test-save-data-race 2023-07-01 20:08:43.000000000 +0000 +++ bup-0.33.7/test/ext/test-save-data-race 2025-01-08 20:04:11.000000000 +0000 @@ -7,7 +7,9 @@ top="$(WVPASS pwd)" || exit $? tmpdir="$(WVPASS wvmktempdir)" || exit $? + export BUP_DIR="$tmpdir/bup" +export GIT_DIR="$tmpdir/bup" bup() { "$top/bup" "$@"; } diff -Nru bup-0.33.2/test/ext/test-save-symlink-race bup-0.33.7/test/ext/test-save-symlink-race --- bup-0.33.2/test/ext/test-save-symlink-race 2023-07-01 20:08:43.000000000 +0000 +++ bup-0.33.7/test/ext/test-save-symlink-race 2025-01-08 20:04:11.000000000 +0000 @@ -7,7 +7,9 @@ top="$(WVPASS pwd)" || exit $? tmpdir="$(WVPASS wvmktempdir)" || exit $? + export BUP_DIR="$tmpdir/bup" +export GIT_DIR="$tmpdir/bup" bup() { "$top/bup" "$@"; } diff -Nru bup-0.33.2/test/ext/test-sparse-files bup-0.33.7/test/ext/test-sparse-files --- bup-0.33.2/test/ext/test-sparse-files 2023-07-01 20:08:43.000000000 +0000 +++ bup-0.33.7/test/ext/test-sparse-files 2025-01-08 20:04:11.000000000 +0000 @@ -25,13 +25,13 @@ readonly block_size data_size WVPASS dd if=/dev/zero of=test-sparse-probe seek="$data_size" bs=1 count=1 -probe_size=$(WVPASS du -k -s test-sparse-probe | WVPASS cut -f1) || exit $? -if [ "$probe_size" -ge "$((data_size / 1024))" ]; then +sparse_size=$(WVPASS "$top/dev/sparse-size" test-sparse-probe) || exit $? +if [ "$sparse_size" -lt "$block_size" ]; then WVSKIP "no sparse support detected -- skipping tests" exit 0 fi -WVSTART "sparse restore on $(current-filesystem), assuming ${block_size}B blocks" +WVSTART "sparse restore on $("$top/dev/path-fs" .), assuming ${block_size}B blocks" WVPASS bup init WVPASS mkdir src @@ -42,22 +42,22 @@ WVSTART "sparse file restore (all sparse)" WVPASS bup restore -C restore "src/latest/$(pwd)/" -restore_size=$(WVPASS du -k -s restore/src/foo | WVPASS cut -f1) || exit $? -WVPASS [ "$restore_size" -ge "$((data_size / 1024))" ] +sparse_size=$(WVPASS "$top/dev/sparse-size" restore/src/foo) || exit $? +WVPASS [ "$sparse_size" = 0 ] WVPASS "$top/dev/compare-trees" -c src/ restore/src/ WVSTART "sparse file restore --no-sparse (all sparse)" WVPASS rm -r restore WVPASS bup restore --no-sparse -C restore "src/latest/$(pwd)/" -restore_size=$(WVPASS du -k -s restore/src/foo | WVPASS cut -f1) || exit $? -WVPASS [ "$restore_size" -ge "$((data_size / 1024))" ] +sparse_size=$(WVPASS "$top/dev/sparse-size" restore/src/foo) || exit $? +WVPASS [ "$sparse_size" = 0 ] WVPASS "$top/dev/compare-trees" -c src/ restore/src/ WVSTART "sparse file restore --sparse (all sparse)" WVPASS rm -r restore WVPASS bup restore --sparse -C restore "src/latest/$(pwd)/" -restore_size=$(WVPASS du -k -s restore/src/foo | WVPASS cut -f1) || exit $? -WVPASS [ "$restore_size" -le "$((3 * (block_size / 1024)))" ] +sparse_size=$(WVPASS "$top/dev/sparse-size" restore/src/foo) || exit $? +WVPASS [ "$sparse_size" -gt "$((15 * block_size))" ] WVPASS "$top/dev/compare-trees" -c src/ restore/src/ WVSTART "sparse file restore --sparse (sparse end)" @@ -67,8 +67,8 @@ WVPASS bup save -n src src WVPASS rm -r restore WVPASS bup restore --sparse -C restore "src/latest/$(pwd)/" -restore_size=$(WVPASS du -k -s restore/src/foo | WVPASS cut -f1) || exit $? -WVPASS [ "$restore_size" -le "$((3 * (block_size / 1024)))" ] +sparse_size=$(WVPASS "$top/dev/sparse-size" restore/src/foo) || exit $? +WVPASS [ "$sparse_size" -gt "$((15 * block_size))" ] WVPASS "$top/dev/compare-trees" -c src/ restore/src/ WVSTART "sparse file restore --sparse (sparse middle)" @@ -77,8 +77,8 @@ WVPASS bup save -n src src WVPASS rm -r restore WVPASS bup restore --sparse -C restore "src/latest/$(pwd)/" -restore_size=$(WVPASS du -k -s restore/src/foo | WVPASS cut -f1) || exit $? -WVPASS [ "$restore_size" -le "$((5 * (block_size / 1024)))" ] +sparse_size=$(WVPASS "$top/dev/sparse-size" restore/src/foo) || exit $? +WVPASS [ "$sparse_size" -gt "$((15 * block_size))" ] WVPASS "$top/dev/compare-trees" -c src/ restore/src/ WVSTART "sparse file restore --sparse (bracketed zero run in buf)" @@ -98,8 +98,8 @@ WVPASS bup save -n src src WVPASS rm -r restore WVPASS bup restore --sparse -C restore "src/latest/$(pwd)/" -restore_size=$(WVPASS du -k -s restore/src/foo | WVPASS cut -f1) || exit $? -WVPASS [ "$restore_size" -le "$((5 * (block_size / 1024)))" ] +sparse_size=$(WVPASS "$top/dev/sparse-size" restore/src/foo) || exit $? +WVPASS [ "$sparse_size" -gt "$((15 * block_size))" ] WVPASS "$top/dev/compare-trees" -c src/ restore/src/ WVSTART "sparse file restore --sparse (sparse start and end)" @@ -110,8 +110,8 @@ WVPASS bup save -n src src WVPASS rm -r restore WVPASS bup restore --sparse -C restore "src/latest/$(pwd)/" -restore_size=$(WVPASS du -k -s restore/src/foo | WVPASS cut -f1) || exit $? -WVPASS [ "$restore_size" -le "$((5 * (block_size / 1024)))" ] +sparse_size=$(WVPASS "$top/dev/sparse-size" restore/src/foo) || exit $? +WVPASS [ "$sparse_size" -gt "$((15 * block_size))" ] WVPASS "$top/dev/compare-trees" -c src/ restore/src/ if test "$block_size" -gt $mb; then diff -Nru bup-0.33.2/test/ext/test-validate-object-links bup-0.33.7/test/ext/test-validate-object-links --- bup-0.33.2/test/ext/test-validate-object-links 1970-01-01 00:00:00.000000000 +0000 +++ bup-0.33.7/test/ext/test-validate-object-links 2025-01-08 20:04:11.000000000 +0000 @@ -0,0 +1,41 @@ +#!/usr/bin/env bash +. ./wvtest-bup.sh || exit $? + +set -o pipefail + +top="$(WVPASS pwd)" || exit $? +tmpdir="$(WVPASS wvmktempdir)" || exit $? + +export BUP_DIR="$tmpdir/bup" +export GIT_DIR="$tmpdir/bup" + +bup() { "$top/bup" "$@"; } + +WVPASS cd "$tmpdir" +WVPASS bup init + +WVPASS mkdir -p src/a +WVPASS echo 1 > src/a/1 +WVPASS echo 2 > src/a/2 +WVPASS echo 3 > src/a/3 +WVPASS bup index src +WVPASS bup save --strip -n src src + +WVPASS bup validate-object-links | tee validate-out +WVPASS test -z "$( bupm-oid +WVPASS "$top/dev/perforate-repo" --drop-oids "$BUP_DIR" < bupm-oid + +set -x +bup validate-object-links > validate-out +rc=$? +set +x +cat validate-out +WVPASSEQ 1 "$rc" + +src_a_oid="$(git rev-parse src:a)" +WVPASS grep -E "^no $( validate.log 2>&1 + rc=$? + set +x + cat validate.log + WVPASSEQ 1 "$rc" + WVPASSEQ 1 "$(grep -cE '^missing ' validate.log)" +} + +WVPASS cd "$tmpdir" +WVPASS bup init + +WVPASS mkdir -p src/a +WVPASS echo 1 > src/a/1 +WVPASS echo 2 > src/a/2 +WVPASS echo 3 > src/a/3 +WVPASS bup index src +WVPASS bup save --strip -n src src + +WVPASS bup validate-ref-links 2>&1 | tee validate.log +WVPASS grep -vE '^missing ' validate.log + +WVPASS git ls-tree src:a | WVPASS cut -d' ' -f 3 \ + | WVPASS cut -b -40 | WVPASS head -1 > bupm-oid +WVPASS "$top/dev/perforate-repo" --drop-oids "$BUP_DIR" < bupm-oid + +expect-one-src-missing + + +WVSTART 'validate specific refs' + +WVPASS mkdir -p more/a +WVPASS echo 4 > more/a/4 +WVPASS echo 5 > more/a/5 +WVPASS bup index more +WVPASS bup save --strip -n more more + +WVPASS bup validate-ref-links more 2>&1 | tee validate.log +WVPASS grep -vE '^missing ' validate.log + +expect-one-src-missing +expect-one-src-missing src + +WVPASS bup rm --unsafe src +WVPASS bup validate-ref-links 2>&1 | tee validate.log +WVPASS grep -vE '^missing ' validate.log + + +WVPASS rm -rf "$tmpdir" diff -Nru bup-0.33.2/test/ext/test-walk-object-order bup-0.33.7/test/ext/test-walk-object-order --- bup-0.33.2/test/ext/test-walk-object-order 1970-01-01 00:00:00.000000000 +0000 +++ bup-0.33.7/test/ext/test-walk-object-order 2025-01-08 20:04:11.000000000 +0000 @@ -0,0 +1,51 @@ +#!/usr/bin/env bash +. ./wvtest-bup.sh + +set -o pipefail + +top="$(WVPASS pwd)" || exit $? +tmpdir="$(WVPASS wvmktempdir)" || exit $? + +export BUP_DIR="$tmpdir/bup" +export GIT_DIR="$tmpdir/bup" + +bup() { "$top/bup" "$@"; } + +# In the past walk_object performed a pre-order traversal which could +# cause operations like bup get to leave the repository with +# incomplete trees if interrupted at the wrong time. + +WVPASS cd "$tmpdir" +WVPASS bup init + +WVPASS mkdir -p src/a +WVPASS echo 1 > src/a/1 +WVPASS echo 2 > src/a/2 +WVPASS echo 3 > src/a/3 +WVPASS bup index src +WVPASS bup save --strip -n src src +src_oid="$(WVPASS git rev-parse src)" || exit $? + +# Drop a/.bupm since it'll be one of the last things a bup get +# post-order traversal would fetch/store (ordering for a/ is 3 2 1 +# .bupm), leaving the destination with an incomplete a/. + +WVPASS git ls-tree src:a | WVPASS cut -d' ' -f 3 \ + | WVPASS cut -b -40 | WVPASS head -1 > bupm-oid +WVPASS cp -a bup bup-missing +WVPASS "$top/dev/perforate-repo" --drop-oids bup-missing < bupm-oid + +# Now fetch from the broken repo, make sure that creates a broken src, +# then fetch again from the complete repo and make sure that produces +# a joinable src. Before the fix, the second get would produce a src +# ref, but it wouldn't notice the incomplete a/. After the fix, +# walk_objects (and by extension, get) operates bottom up and so never +# leaves incomplete trees in the destination. + +WVPASS bup -d bup-dest init +WVFAIL bup -d bup-dest get -s bup-missing --ff src +WVFAIL bup -d bup-dest join "$src_oid" > /dev/null +WVPASS bup -d bup-dest get -s bup --ff src +WVPASS bup -d bup-dest join "$src_oid" > /dev/null + +WVPASS rm -rf "$tmpdir" diff -Nru bup-0.33.2/test/int/test_git.py bup-0.33.7/test/int/test_git.py --- bup-0.33.2/test/int/test_git.py 2023-07-01 20:08:43.000000000 +0000 +++ bup-0.33.7/test/int/test_git.py 2025-01-08 20:04:11.000000000 +0000 @@ -29,6 +29,8 @@ def test_git_version_detection(): # Test version types from git's tag history + # reset in case a previous test in this process set it + git._git_great = None for expected, ver in \ (('insufficient', b'git version 0.99'), ('insufficient', b'git version 0.99.1'), @@ -541,7 +543,7 @@ # check that we don't have it open anymore WVPASSEQ(False, b'deleted' in fn) -def test_config(): +def test_config(tmpdir): cfg_file = os.path.join(os.path.dirname(__file__), 'sample.conf') no_such_file = os.path.join(os.path.dirname(__file__), 'nosuch.conf') git_config_get = partial(git.git_config_get, cfg_file=cfg_file) @@ -568,3 +570,10 @@ WVPASSEQ(2, git_config_get(b'bup.istrue2', opttype='int')) WVPASSEQ(0, git_config_get(b'bup.isfalse2', opttype='int')) WVPASSEQ(0x777, git_config_get(b'bup.hex', opttype='int')) + + # Make sure get_config respects the repo() + git_dir = tmpdir + b'/repo' + git.init_repo(git_dir) + git.check_repo_or_die(git_dir) + exc(b'git', b'--git-dir', git_dir, b'config', b'bup.foo', b'yep') + assert b'yep' == git.git_config_get(b'bup.foo') diff -Nru bup-0.33.2/test/int/test_midx.py bup-0.33.7/test/int/test_midx.py --- bup-0.33.2/test/int/test_midx.py 1970-01-01 00:00:00.000000000 +0000 +++ bup-0.33.7/test/int/test_midx.py 2025-01-08 20:04:11.000000000 +0000 @@ -0,0 +1,38 @@ + +from glob import glob +from os import environb, unlink +from subprocess import run +from sys import stderr + +from bup import path + +bup_exe = path.exe() + +def runc(*args, **kwargs): + assert 'check' not in kwargs + run(*args, check='True', **kwargs) + +def bupc(*args, **kwargs): + cmd = [bup_exe] + list(args[0]) + print(cmd, file=stderr) + run(cmd, *args[1:], check=True, **kwargs) + +def test_missing_midx(tmpdir): + bup_dir = tmpdir + b'/bup' + pack_dir = bup_dir + b'/objects/pack' + environb[b'GIT_DIR'] = bup_dir + environb[b'BUP_DIR'] = bup_dir + bupc(('init',)) + bupc(('index', 'test/sampledata/var/lib')) + bupc(('save', '-n', 'save', 'test')) + bupc(('index', 'test/sampledata/var/doc')) + bupc(('save', '-n', 'save', 'test')) + bupc(('midx', '-f')) + midxs = glob(bup_dir + b'/objects/pack/*.midx') + assert len(midxs) == 1 + midx = midxs[0] + bupc(('midx', '--check', '-a')) + idxs = glob(bup_dir + b'/objects/pack/*.idx') + assert len(idxs) > 1 + unlink(idxs[0]) + bupc(('midx', '--check', '-a')) diff -Nru bup-0.33.2/test/lib/wvpytest.py bup-0.33.7/test/lib/wvpytest.py --- bup-0.33.2/test/lib/wvpytest.py 2023-07-01 20:08:43.000000000 +0000 +++ bup-0.33.7/test/lib/wvpytest.py 2025-01-08 20:04:11.000000000 +0000 @@ -1,5 +1,11 @@ +import os import pytest +# Precaution -- here just because it's already imported "everywhere". + +os.environb[b'BUP_DIR'] = b'/dev/null' +os.environb[b'GIT_DIR'] = b'/dev/null' + def WVPASS(cond = True, fail_value=None): if fail_value: assert cond, fail_value diff -Nru bup-0.33.2/wvtest.sh bup-0.33.7/wvtest.sh --- bup-0.33.2/wvtest.sh 2023-07-01 20:08:43.000000000 +0000 +++ bup-0.33.7/wvtest.sh 2025-01-08 20:04:11.000000000 +0000 @@ -4,6 +4,10 @@ # . ./wvtest.sh # +# Here just because it's already sourced "everywhere". +export BUP_DIR=/dev/null +export GIT_DIR=/dev/null + # we don't quote $TEXT in case it contains newlines; newlines # aren't allowed in test output. However, we set -f so that # at least shell glob characters aren't processed.