Workaround for pg_dump non-idempotent behaviour

Problem
=======

* `pg_dump` may dump the data rows in arbitrary order. This messes with
  the final hash of the dump file, even though the data after restoring
  would be same. This creates additional uploads with no value

Solution
========

* Utilize `pgdump-sort` script by `tigra564`, which attempts to sort
  data in the dump file specifically for hashing purposes.
This commit is contained in:
2022-12-26 12:35:38 -08:00
parent fddd8e84dd
commit 9702287a92
3 changed files with 287 additions and 7 deletions

View File

@@ -2,15 +2,20 @@
set -euo pipefail
scriptpath="$( cd -- "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )"
dumpfile="dump.sql"
tmpdir="$(mktemp -d -p "${PWD}")"
function check_for_hash() {
local ret=0
echo "Checking if hash ${1} is present"
echo -n "Checking if hash ${1} is present: "
aws s3 ls "s3://${S3_BUCKET}/sums/${1}" || ret=$?
echo "Returned: ${ret}"
case "$ret" in
0) echo "Yes." ;;
*) echo "No." ;;
esac
return $ret
}
@@ -18,6 +23,7 @@ function create_and_upload() {
local sum=$1
local backup_file
backup_file="$(date +%Y/%m/backup-%d-%H-%M-%S.tar.gz)"
echo "Uploading ${backup_file}"
tar -zc . | aws s3 cp - "s3://${S3_BUCKET}/${backup_file}"
aws s3api put-object --bucket "${S3_BUCKET}" --key "sums/${sum}"
}
@@ -29,16 +35,17 @@ pushd "${tmpdir}"
rm -rf "${dumpfile}"
touch "${dumpfile}"
chmod ugo+w "${dumpfile}"
sudo -u postgres -- pg_dump --no-owner --no-privileges --clean --if-exists --quote-all-identifiers "${DATABASE_URL}" -F plain -f "${dumpfile}"
sudo -u postgres -- pg_dump --no-owner --no-privileges --clean --if-exists --quote-all-identifiers "${DATABASE_URL}" -f "${dumpfile}"
"${scriptpath}/pgdump-sort" "${dumpfile}" "sorted.sql"
cp -r "${ROOT_DIR}/data" "./data"
cp "${ROOT_DIR}/.env" "./.env"
cp "${ROOT_DIR}/bitwarden.exceede.com.conf" "./bitwarden.exceede.com.conf"
# remove icon_cache
rm -rf ./data/icon_cache
sum=$(find . -type f -not -name "${dumpfile}" -and -not -path "./data/icon_cache/*" -exec md5sum {} + | LC_ALL=C sort | md5sum | cut -d ' ' -f 1)
sum=$(find . -type f -exec md5sum {} + | LC_ALL=C sort | md5sum | cut -d ' ' -f 1)
rm sorted.sql
check_for_hash "$sum" || create_and_upload "${sum}"