diff --git a/README.md b/README.md index cd6f30c..26fe7db 100644 --- a/README.md +++ b/README.md @@ -4,10 +4,13 @@ * systemd * [AWS client](https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html) +* python3 + * docopt +* [pgdump-sort](https://github.com/tigra564/pgdump-sort) (Bundled) ### Install * Run `aws configure` * Symlink unit files to `/etc/systemd/system` * Run `systemctl enable` on units and slices -* Run `systemctl start` on timers +* Run `systemctl start` on timers diff --git a/backup-bitwarden.sh b/backup-bitwarden.sh index cad8bcc..7a0bc23 100755 --- a/backup-bitwarden.sh +++ b/backup-bitwarden.sh @@ -2,15 +2,20 @@ set -euo pipefail +scriptpath="$( cd -- "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )" + dumpfile="dump.sql" tmpdir="$(mktemp -d -p "${PWD}")" function check_for_hash() { local ret=0 - echo "Checking if hash ${1} is present" + echo -n "Checking if hash ${1} is present: " aws s3 ls "s3://${S3_BUCKET}/sums/${1}" || ret=$? - echo "Returned: ${ret}" + case "$ret" in + 0) echo "Yes." ;; + *) echo "No." ;; + esac return $ret } @@ -18,6 +23,7 @@ function create_and_upload() { local sum=$1 local backup_file backup_file="$(date +%Y/%m/backup-%d-%H-%M-%S.tar.gz)" + echo "Uploading ${backup_file}" tar -zc . | aws s3 cp - "s3://${S3_BUCKET}/${backup_file}" aws s3api put-object --bucket "${S3_BUCKET}" --key "sums/${sum}" } @@ -29,16 +35,17 @@ pushd "${tmpdir}" rm -rf "${dumpfile}" touch "${dumpfile}" chmod ugo+w "${dumpfile}" -sudo -u postgres -- pg_dump --no-owner --no-privileges --clean --if-exists --quote-all-identifiers "${DATABASE_URL}" -F plain -f "${dumpfile}" +sudo -u postgres -- pg_dump --no-owner --no-privileges --clean --if-exists --quote-all-identifiers "${DATABASE_URL}" -f "${dumpfile}" + +"${scriptpath}/pgdump-sort" "${dumpfile}" "sorted.sql" cp -r "${ROOT_DIR}/data" "./data" cp "${ROOT_DIR}/.env" "./.env" cp "${ROOT_DIR}/bitwarden.exceede.com.conf" "./bitwarden.exceede.com.conf" -# remove icon_cache -rm -rf ./data/icon_cache +sum=$(find . -type f -not -name "${dumpfile}" -and -not -path "./data/icon_cache/*" -exec md5sum {} + | LC_ALL=C sort | md5sum | cut -d ' ' -f 1) -sum=$(find . -type f -exec md5sum {} + | LC_ALL=C sort | md5sum | cut -d ' ' -f 1) +rm sorted.sql check_for_hash "$sum" || create_and_upload "${sum}" diff --git a/pgdump-sort b/pgdump-sort new file mode 100755 index 0000000..9694ac8 --- /dev/null +++ b/pgdump-sort @@ -0,0 +1,270 @@ +#!/usr/bin/python3 + +""" +Usage: pgdump-sort [options] [] + pgdump-sort -h | --help | --version + +Options: + -n Sort entries in natural order (requires python3 module natsort) + + -h --help Show this usage and exit + --version Show version and exit +""" + +from docopt import docopt +import os +import sys +import re +import tempfile +import shutil +from enum import Enum + +_has_natsort = False +try: + import natsort + _has_natsort = True +except ModuleNotFoundError: + pass + +version='0.2' + + +RE_OBJDESC = re.compile( + '-- (?P(Data for )?)Name: (?P.*?); ' + 'Type: (?P.*?); ' + 'Schema: (?P.*?); ' + 'Owner: (?P.*)' +) +RE_SEQSET = re.compile("SELECT pg_catalog.setval\('(?P.*?)'.*") + + +class state(Enum): + EMPTY = 1 + SETTINGS = 2 + DEF = 3 + DATA = 4 + COPY = 5 + INSERT = 6 + SEQSET = 7 + + +class buffer(list): + destdir = None + st = state.EMPTY + fname = None + title = None + + def __init__(self, destdir, fsorted, fsorted_args): + self.destdir = destdir + self.fsorted = fsorted + self.fsorted_args = fsorted_args + + def flushto(self, st, fname, title): + #print("EVICTING", self.st, "to", self.fname, "New state:", st) + + # Trim ellipsing comments and empty lines + while self and ('' == self[0] or self[0].startswith('--')): + del self[0] + while self and ('' == self[-1] or self[-1].startswith('--')): + del self[-1] + + if len(self): + if self.st in (state.COPY, state.INSERT): + self[:] = sort_datalines(self, self.fsorted, self.fsorted_args) + + self[:] = [ + '--', + self.title, + '--', + '', + ] + self + + with open(os.path.join(self.destdir, self.fname), "w") as out: + out.writelines([l + '\n' for l in self]) + + self.clear() + self.st = st + self.fname = fname + self.title = title + + + def proc_comment(self, line): + # Returns True if the line is a comment, i.e. it has been processed + if not line.startswith('--'): + return False + + m = re.match(RE_OBJDESC, line) + if not m: + return True + + if 'SEQUENCE SET' == m.group('type'): + st = state.SEQSET + elif m.group('isdata'): + st = state.DATA + else: + st = state.DEF + + fname = '%d-%s-%s-%s-%s' % ( + st.value, + m.group('type'), + m.group('schema'), + m.group('name'), + m.group('owner') + ) + + if 255 < len(fname): + fname = fname[:255-3] + "..." + + self.flushto(st, fname, line) + + return True + + +def sort_datalines(lines, fsorted, fsorted_args): + pre = [] + data = [] + post = [] + + state = 0 + ptr = pre + isins = False + for line in lines: + if 0 == state: + if line.startswith('COPY'): + ptr.append(line) + ptr = data + state = 1 + elif line.startswith('INSERT'): + ptr = data + ptr.append(line) + isins = True + state = 1 + else: + ptr.append(line) + elif 1 == state: + if isins and '\n' == line or not isins and '\\.\n' == line: + ptr = post + ptr.append(line) + status = 2 + else: + ptr.append(line) + else: + ptr.append(line) + + return pre + fsorted(data, **fsorted_args) + post + + +def dissect(dump, destdir, fsorted, fsorted_args): + buf = buffer(destdir, fsorted, fsorted_args) + + for line in open(dump): + # trim trailing newline (if any) + if '\n' == line[-1]: + line = line[:-1] + + #print(buf.st.name.ljust(10), "\t[%s]" % line) + if buf.st == state.EMPTY: + if buf.proc_comment(line): + pass + elif '' == line: + pass + else: + buf.flushto(state.SETTINGS, "%d-%s" % (state.SETTINGS.value, "SETTINGS"), + '-- Sorted PostgreSQL database dump') + buf.append(line) + + elif buf.st in (state.SETTINGS, state.DEF, state.INSERT): + if buf.proc_comment(line): + pass + else: + buf.append(line) + + elif buf.st == state.DATA: + if line.startswith('COPY '): + buf.st = state.COPY + elif line.startswith('INSERT '): + buf.st = state.INSERT + buf.append(line) + + elif buf.st == state.COPY: + buf.append(line) + if r'\.' == line: + buf.flushto(state.EMPTY, None, None) + + elif buf.st == state.SEQSET: + if buf.proc_comment(line): + pass + elif line.startswith('SELECT pg_catalog.setval'): + m = re.match(RE_SEQSET, line) + line = "SELECT pg_catalog.setval('%s', 1, false);" % m.group('name') + buf.append(line) + else: + buf.append(line) + + else: + print("This should not happen") + + buf.flushto(state.EMPTY, None, None) + + +def recombine(destdir, dump, fsorted, fsorted_args): + out = open(dump, 'w') + + first = True + sorted_files = fsorted(os.listdir(destdir), **fsorted_args) + for fname in sorted_files: + if first: + first = False + else: + out.write('\n') + with open(os.path.join(destdir, fname)) as f: + out.writelines(f.readlines()) + + if sorted_files: + out.writelines([ + '\n', + '--\n', + '-- Sorted dump complete\n', + '--\n', + ]) + + out.close() + + +def pgdump_sort(dump, sdump, fsorted=sorted, **fsorted_args): + destdir = tempfile.mkdtemp(suffix=os.path.basename(dump), prefix='pgdump-sort') + + try: + dissect(dump, destdir, fsorted, fsorted_args) + recombine(destdir, sdump, fsorted, fsorted_args) + + finally: + shutil.rmtree(destdir) + +natsort_error = \ +"""In order to use natural sort you need to install natsort module: + pip install natsort +""" + +if __name__ == '__main__': + args = docopt(__doc__, version=version) + + dump = args[''] + sdump = args[''] + if sdump is None: + sdump = re.sub(r'\.sql$', '', dump) + '-sorted.sql' + + if args['-n']: + if _has_natsort: + fsorted = natsort.natsorted + fsorted_args = {'alg': natsort.ns.IGNORECASE} + else: + print(natsort_error, file=sys.stderr) + exit(1) + else: + fsorted = sorted + fsorted_args = {} + + + pgdump_sort(dump, sdump, fsorted, **fsorted_args) +