270 lines
5.4 KiB
Python
270 lines
5.4 KiB
Python
#!/usr/bin/python3
|
|
|
|
"""
|
|
Usage: pgdump-sort [options] <dump> [<sorted-dump>]
|
|
pgdump-sort -h | --help | --version
|
|
|
|
Options:
|
|
-n Sort entries in natural order (requires python3 module natsort)
|
|
|
|
-h --help Show this usage and exit
|
|
--version Show version and exit
|
|
"""
|
|
|
|
from docopt import docopt
|
|
import os
|
|
import sys
|
|
import re
|
|
import tempfile
|
|
import shutil
|
|
from enum import Enum
|
|
|
|
_has_natsort = False
|
|
try:
|
|
import natsort
|
|
_has_natsort = True
|
|
except ModuleNotFoundError:
|
|
pass
|
|
|
|
version='0.2'
|
|
|
|
|
|
RE_OBJDESC = re.compile(
|
|
'-- (?P<isdata>(Data for )?)Name: (?P<name>.*?); '
|
|
'Type: (?P<type>.*?); '
|
|
'Schema: (?P<schema>.*?); '
|
|
'Owner: (?P<owner>.*)'
|
|
)
|
|
RE_SEQSET = re.compile(r"SELECT pg_catalog.setval\('(?P<name>.*?)'.*")
|
|
|
|
|
|
class state(Enum):
|
|
EMPTY = 1
|
|
SETTINGS = 2
|
|
DEF = 3
|
|
DATA = 4
|
|
COPY = 5
|
|
INSERT = 6
|
|
SEQSET = 7
|
|
|
|
|
|
class buffer(list):
|
|
destdir = None
|
|
st = state.EMPTY
|
|
fname = None
|
|
title = None
|
|
|
|
def __init__(self, destdir, fsorted, fsorted_args):
|
|
self.destdir = destdir
|
|
self.fsorted = fsorted
|
|
self.fsorted_args = fsorted_args
|
|
|
|
def flushto(self, st, fname, title):
|
|
#print("EVICTING", self.st, "to", self.fname, "New state:", st)
|
|
|
|
# Trim ellipsing comments and empty lines
|
|
while self and ('' == self[0] or self[0].startswith('--')):
|
|
del self[0]
|
|
while self and ('' == self[-1] or self[-1].startswith('--')):
|
|
del self[-1]
|
|
|
|
if len(self):
|
|
if self.st in (state.COPY, state.INSERT):
|
|
self[:] = sort_datalines(self, self.fsorted, self.fsorted_args)
|
|
|
|
self[:] = [
|
|
'--',
|
|
self.title,
|
|
'--',
|
|
'',
|
|
] + self
|
|
|
|
with open(os.path.join(self.destdir, self.fname), "w") as out:
|
|
out.writelines([l + '\n' for l in self])
|
|
|
|
self.clear()
|
|
self.st = st
|
|
self.fname = fname
|
|
self.title = title
|
|
|
|
|
|
def proc_comment(self, line):
|
|
# Returns True if the line is a comment, i.e. it has been processed
|
|
if not line.startswith('--'):
|
|
return False
|
|
|
|
m = re.match(RE_OBJDESC, line)
|
|
if not m:
|
|
return True
|
|
|
|
if 'SEQUENCE SET' == m.group('type'):
|
|
st = state.SEQSET
|
|
elif m.group('isdata'):
|
|
st = state.DATA
|
|
else:
|
|
st = state.DEF
|
|
|
|
fname = '%d-%s-%s-%s-%s' % (
|
|
st.value,
|
|
m.group('type'),
|
|
m.group('schema'),
|
|
m.group('name'),
|
|
m.group('owner')
|
|
)
|
|
|
|
if 255 < len(fname):
|
|
fname = fname[:255-3] + "..."
|
|
|
|
self.flushto(st, fname, line)
|
|
|
|
return True
|
|
|
|
|
|
def sort_datalines(lines, fsorted, fsorted_args):
|
|
pre = []
|
|
data = []
|
|
post = []
|
|
|
|
state = 0
|
|
ptr = pre
|
|
isins = False
|
|
for line in lines:
|
|
if 0 == state:
|
|
if line.startswith('COPY'):
|
|
ptr.append(line)
|
|
ptr = data
|
|
state = 1
|
|
elif line.startswith('INSERT'):
|
|
ptr = data
|
|
ptr.append(line)
|
|
isins = True
|
|
state = 1
|
|
else:
|
|
ptr.append(line)
|
|
elif 1 == state:
|
|
if isins and '\n' == line or not isins and '\\.\n' == line:
|
|
ptr = post
|
|
ptr.append(line)
|
|
status = 2
|
|
else:
|
|
ptr.append(line)
|
|
else:
|
|
ptr.append(line)
|
|
|
|
return pre + fsorted(data, **fsorted_args) + post
|
|
|
|
|
|
def dissect(dump, destdir, fsorted, fsorted_args):
|
|
buf = buffer(destdir, fsorted, fsorted_args)
|
|
|
|
for line in open(dump):
|
|
# trim trailing newline (if any)
|
|
if '\n' == line[-1]:
|
|
line = line[:-1]
|
|
|
|
#print(buf.st.name.ljust(10), "\t[%s]" % line)
|
|
if buf.st == state.EMPTY:
|
|
if buf.proc_comment(line):
|
|
pass
|
|
elif '' == line:
|
|
pass
|
|
else:
|
|
buf.flushto(state.SETTINGS, "%d-%s" % (state.SETTINGS.value, "SETTINGS"),
|
|
'-- Sorted PostgreSQL database dump')
|
|
buf.append(line)
|
|
|
|
elif buf.st in (state.SETTINGS, state.DEF, state.INSERT):
|
|
if buf.proc_comment(line):
|
|
pass
|
|
else:
|
|
buf.append(line)
|
|
|
|
elif buf.st == state.DATA:
|
|
if line.startswith('COPY '):
|
|
buf.st = state.COPY
|
|
elif line.startswith('INSERT '):
|
|
buf.st = state.INSERT
|
|
buf.append(line)
|
|
|
|
elif buf.st == state.COPY:
|
|
buf.append(line)
|
|
if r'\.' == line:
|
|
buf.flushto(state.EMPTY, None, None)
|
|
|
|
elif buf.st == state.SEQSET:
|
|
if buf.proc_comment(line):
|
|
pass
|
|
elif line.startswith('SELECT pg_catalog.setval'):
|
|
m = re.match(RE_SEQSET, line)
|
|
line = "SELECT pg_catalog.setval('%s', 1, false);" % m.group('name')
|
|
buf.append(line)
|
|
else:
|
|
buf.append(line)
|
|
|
|
else:
|
|
print("This should not happen")
|
|
|
|
buf.flushto(state.EMPTY, None, None)
|
|
|
|
|
|
def recombine(destdir, dump, fsorted, fsorted_args):
|
|
out = open(dump, 'w')
|
|
|
|
first = True
|
|
sorted_files = fsorted(os.listdir(destdir), **fsorted_args)
|
|
for fname in sorted_files:
|
|
if first:
|
|
first = False
|
|
else:
|
|
out.write('\n')
|
|
with open(os.path.join(destdir, fname)) as f:
|
|
out.writelines(f.readlines())
|
|
|
|
if sorted_files:
|
|
out.writelines([
|
|
'\n',
|
|
'--\n',
|
|
'-- Sorted dump complete\n',
|
|
'--\n',
|
|
])
|
|
|
|
out.close()
|
|
|
|
|
|
def pgdump_sort(dump, sdump, fsorted=sorted, **fsorted_args):
|
|
destdir = tempfile.mkdtemp(suffix=os.path.basename(dump), prefix='pgdump-sort')
|
|
|
|
try:
|
|
dissect(dump, destdir, fsorted, fsorted_args)
|
|
recombine(destdir, sdump, fsorted, fsorted_args)
|
|
|
|
finally:
|
|
shutil.rmtree(destdir)
|
|
|
|
natsort_error = \
|
|
"""In order to use natural sort you need to install natsort module:
|
|
pip install natsort
|
|
"""
|
|
|
|
if __name__ == '__main__':
|
|
args = docopt(__doc__, version=version)
|
|
|
|
dump = args['<dump>']
|
|
sdump = args['<sorted-dump>']
|
|
if sdump is None:
|
|
sdump = re.sub(r'\.sql$', '', dump) + '-sorted.sql'
|
|
|
|
if args['-n']:
|
|
if _has_natsort:
|
|
fsorted = natsort.natsorted
|
|
fsorted_args = {'alg': natsort.ns.IGNORECASE}
|
|
else:
|
|
print(natsort_error, file=sys.stderr)
|
|
exit(1)
|
|
else:
|
|
fsorted = sorted
|
|
fsorted_args = {}
|
|
|
|
|
|
pgdump_sort(dump, sdump, fsorted, **fsorted_args)
|