Last active
December 10, 2020 23:21
-
-
Save mlt/69cf82e5c8a34d1015648e31821376f0 to your computer and use it in GitHub Desktop.
Slightly modified from http://furius.ca/pubcode/pub/conf/bin/pg-split-dump to get type/schema/name.sql structure with all overloaded function definitions in same file
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
""" | |
Split a PostgreSQL plain dump file into files for each of its objects. | |
This is used to compare two SQL database contents: both contents are dumped to a | |
plain text file, and they are then processed through this script which generates | |
a file for each database object, which can then be compared using a directory | |
diff program. | |
""" | |
# stdlib imports | |
import os, re | |
from os.path import join, exists | |
from string import maketrans | |
# Note: this code is pulled straight out of the xxdiff library. | |
# Copied here to avoid the dependency. | |
sec_re = re.compile('^--\n-- (?:Data for )?Name:\s+(?:DOMAIN|OPERATOR|SCHEMA|VIEW|COLUMN|EXTENSION|TABLE|FUNCTION|SEQUENCE)?\s*([^(;]+).*;\s*Type:\s+([^;]+);\s*Schema:\s+([^;]+);.*$' , re.M) | |
def parse_dump(dbdump, sort_columns=False): | |
""" | |
Parse a PostgreSQL database dump, extracting all its section into a list of | |
(name, type, contents) tuples. The entries are built from the Name and Type | |
fields in the header comments. Contents of data chunks for tables are | |
returned as type 'DATA', so if you're going to turn this into a map, be | |
careful to filter out the 'data' entries or concatenate them to the schema | |
(whichever is appropriate for your application). | |
""" | |
# Class to contain info about chunks. | |
class Chunk: | |
def __init__(self, mo): | |
self.mo = mo | |
self.name, self.typ, self.nspname = mo.group(1, 2, 3) | |
# Parse chunks. | |
chunks = map(lambda mo: Chunk(mo), sec_re.finditer(dbdump)) | |
for c1, c2 in consepairs(chunks): | |
c1.contents = dbdump[c1.mo.end():c2.mo.start()] | |
chunks[-1].contents = dbdump[chunks[-1].mo.end():] | |
for c in chunks: | |
c.descline = c.mo.group(0) | |
c.mo = None # release the match objects | |
return list((c.name, c.typ.replace(' ', '_').lower(), c.nspname, | |
c.descline + c.contents if c.contents else None) for c in chunks) | |
# Note: this code is pulled straight out of the xxdiff library. | |
# Copied here to avoid the dependency. | |
def consepairs(seq): | |
""" | |
Iterate over consecutive pairs. | |
""" | |
it = iter(seq) | |
prev = it.next() | |
for el in it: | |
if prev.name == el.name: | |
el.contents = None | |
continue | |
yield prev, el | |
prev = el | |
def main(): | |
import optparse | |
parser = optparse.OptionParser(__doc__.strip()) | |
opts, args = parser.parse_args() | |
if len(args) not in (1, 2): | |
parser.error("You must specify a single dumpfile, " | |
"and optionally an output directory.") | |
fn = args[0] | |
rootdir = args[1] if len(args) == 2 else os.getcwd() | |
if not exists(rootdir): | |
os.makedirs(rootdir) | |
contents = parse_dump(open(fn).read()) | |
for name, type, nspname, contents in contents: | |
if not contents: | |
continue | |
outdir = join(rootdir, type, nspname) | |
if not exists(outdir): | |
os.makedirs(outdir) | |
fn = '{name}.sql'.format(name=name.translate(maketrans('[]', '__'), '"')) | |
print fn | |
f = open(join(outdir, fn), 'w') | |
f.write(contents.rstrip() + '\n') | |
f.close() | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment