#!/usr/bin/python3 """ Usage: pgdump-sort [options] [] pgdump-sort -h | --help | --version Options: -n Sort entries in natural order (requires python3 module natsort) -h --help Show this usage and exit --version Show version and exit """ from docopt import docopt import os import sys import re import tempfile import shutil from enum import Enum _has_natsort = False try: import natsort _has_natsort = True except ModuleNotFoundError: pass version='0.2' RE_OBJDESC = re.compile( '-- (?P(Data for )?)Name: (?P.*?); ' 'Type: (?P.*?); ' 'Schema: (?P.*?); ' 'Owner: (?P.*)' ) RE_SEQSET = re.compile("SELECT pg_catalog.setval\('(?P.*?)'.*") class state(Enum): EMPTY = 1 SETTINGS = 2 DEF = 3 DATA = 4 COPY = 5 INSERT = 6 SEQSET = 7 class buffer(list): destdir = None st = state.EMPTY fname = None title = None def __init__(self, destdir, fsorted, fsorted_args): self.destdir = destdir self.fsorted = fsorted self.fsorted_args = fsorted_args def flushto(self, st, fname, title): #print("EVICTING", self.st, "to", self.fname, "New state:", st) # Trim ellipsing comments and empty lines while self and ('' == self[0] or self[0].startswith('--')): del self[0] while self and ('' == self[-1] or self[-1].startswith('--')): del self[-1] if len(self): if self.st in (state.COPY, state.INSERT): self[:] = sort_datalines(self, self.fsorted, self.fsorted_args) self[:] = [ '--', self.title, '--', '', ] + self with open(os.path.join(self.destdir, self.fname), "w") as out: out.writelines([l + '\n' for l in self]) self.clear() self.st = st self.fname = fname self.title = title def proc_comment(self, line): # Returns True if the line is a comment, i.e. it has been processed if not line.startswith('--'): return False m = re.match(RE_OBJDESC, line) if not m: return True if 'SEQUENCE SET' == m.group('type'): st = state.SEQSET elif m.group('isdata'): st = state.DATA else: st = state.DEF fname = '%d-%s-%s-%s-%s' % ( st.value, m.group('type'), m.group('schema'), m.group('name'), m.group('owner') ) if 255 < len(fname): fname = fname[:255-3] + "..." self.flushto(st, fname, line) return True def sort_datalines(lines, fsorted, fsorted_args): pre = [] data = [] post = [] state = 0 ptr = pre isins = False for line in lines: if 0 == state: if line.startswith('COPY'): ptr.append(line) ptr = data state = 1 elif line.startswith('INSERT'): ptr = data ptr.append(line) isins = True state = 1 else: ptr.append(line) elif 1 == state: if isins and '\n' == line or not isins and '\\.\n' == line: ptr = post ptr.append(line) status = 2 else: ptr.append(line) else: ptr.append(line) return pre + fsorted(data, **fsorted_args) + post def dissect(dump, destdir, fsorted, fsorted_args): buf = buffer(destdir, fsorted, fsorted_args) for line in open(dump): # trim trailing newline (if any) if '\n' == line[-1]: line = line[:-1] #print(buf.st.name.ljust(10), "\t[%s]" % line) if buf.st == state.EMPTY: if buf.proc_comment(line): pass elif '' == line: pass else: buf.flushto(state.SETTINGS, "%d-%s" % (state.SETTINGS.value, "SETTINGS"), '-- Sorted PostgreSQL database dump') buf.append(line) elif buf.st in (state.SETTINGS, state.DEF, state.INSERT): if buf.proc_comment(line): pass else: buf.append(line) elif buf.st == state.DATA: if line.startswith('COPY '): buf.st = state.COPY elif line.startswith('INSERT '): buf.st = state.INSERT buf.append(line) elif buf.st == state.COPY: buf.append(line) if r'\.' == line: buf.flushto(state.EMPTY, None, None) elif buf.st == state.SEQSET: if buf.proc_comment(line): pass elif line.startswith('SELECT pg_catalog.setval'): m = re.match(RE_SEQSET, line) line = "SELECT pg_catalog.setval('%s', 1, false);" % m.group('name') buf.append(line) else: buf.append(line) else: print("This should not happen") buf.flushto(state.EMPTY, None, None) def recombine(destdir, dump, fsorted, fsorted_args): out = open(dump, 'w') first = True sorted_files = fsorted(os.listdir(destdir), **fsorted_args) for fname in sorted_files: if first: first = False else: out.write('\n') with open(os.path.join(destdir, fname)) as f: out.writelines(f.readlines()) if sorted_files: out.writelines([ '\n', '--\n', '-- Sorted dump complete\n', '--\n', ]) out.close() def pgdump_sort(dump, sdump, fsorted=sorted, **fsorted_args): destdir = tempfile.mkdtemp(suffix=os.path.basename(dump), prefix='pgdump-sort') try: dissect(dump, destdir, fsorted, fsorted_args) recombine(destdir, sdump, fsorted, fsorted_args) finally: shutil.rmtree(destdir) natsort_error = \ """In order to use natural sort you need to install natsort module: pip install natsort """ if __name__ == '__main__': args = docopt(__doc__, version=version) dump = args[''] sdump = args[''] if sdump is None: sdump = re.sub(r'\.sql$', '', dump) + '-sorted.sql' if args['-n']: if _has_natsort: fsorted = natsort.natsorted fsorted_args = {'alg': natsort.ns.IGNORECASE} else: print(natsort_error, file=sys.stderr) exit(1) else: fsorted = sorted fsorted_args = {} pgdump_sort(dump, sdump, fsorted, **fsorted_args)