12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697 |
- from future.utils import viewitems
- import argparse
- import json
- import logging
- import os
- import shutil
- import sys
- LOG = logging.getLogger(__name__)
- def deserialize(fn):
- with open(fn) as ifs:
- return json.loads(ifs.read())
- def assert_exists(fn):
- if not os.path.isfile(fn):
- raise Exception('Does not exist: {!r}'.format(fn))
- def mkdir(dirname):
- if not os.path.isdir(dirname):
- # Possible race-condition, so dirs must be created serially.
- os.makedirs(dirname)
- #def symlink(name, src):
- # msg = '{} -> {}'.format(name, src)
- # assert not os.path.lexists(name), msg
- # #print msg
- # os.symlink(src, name)
- def copy(name, rel_src):
- try:
- if not os.path.isabs(rel_src):
- dn = os.path.normpath(os.path.dirname(name))
- src = os.path.join(dn, rel_src)
- else:
- src = rel_src
- shutil.copy2(src, name)
- except Exception:
- msg = '{} -> {}'.format(name, rel_src)
- LOG.error(msg)
- raise
- def run(special_split_fn, fn_patterns):
- """
- Symlink targets will be relative to cwd.
- For each pattern, each wildcard will be substituted everywhere, e.g.
- fn_pattern == 'top/{key}/input_{key}.txt'
- """
- fnkeypattdict = dict(fnkeypatt.split('=') for fnkeypatt in fn_patterns)
- jobs = deserialize(special_split_fn)
- mapdir = os.path.normpath(os.path.dirname(os.path.normpath(special_split_fn)))
- for job in jobs:
- inputs = job['input']
- wildcards = job['wildcards']
- for (fnkey, fn_pattern) in viewitems(fnkeypattdict):
- val = inputs[fnkey]
- # val should be relative to the location of the special_split_fn.
- #assert not os.path.isabs(val), 'mapped input (dynamic output) filename {!r} must be relative (to serialzed file location {!r})'.format(
- # val, special_split_fn)
- if not os.path.isabs(val):
- mapped_input_fn = os.path.join(mapdir, val)
- else:
- mapped_input_fn = val
- assert_exists(mapped_input_fn)
- try:
- symlink_name = fn_pattern.format(**wildcards)
- except Exception as err:
- import pprint
- msg = str(err) + ': for pattern {!r} and wildcards\n{!r}'.format(
- fn_pattern, pprint.pformat(wildcards))
- raise Exception(msg)
- outdir = os.path.normpath(os.path.dirname(symlink_name))
- mkdir(outdir)
- target_name = os.path.relpath(mapped_input_fn, outdir)
- copy(symlink_name, target_name)
- def parse_args(argv):
- description = 'Create copies called "fn_pattern", of files named by values in "mapped_fn".'
- parser = argparse.ArgumentParser(
- description=description,
- )
- parser.add_argument(
- '--special-split-fn', required=True,
- help='Serialized split-file (in our special format), where "mapped_inputs" has a map with key to filename, relative to the directory of this file.')
- parser.add_argument(
- 'fn_patterns', nargs='+',
- help='"fnkey=pattern" Can appear multiple times. Each is a pattern for output filename, to be substituted with keys in special_split_fn. Each fnkey=filename must appear in the input section of each job listed in special-split.')
- return parser.parse_args(argv[1:])
- def main(argv=sys.argv):
- args = parse_args(argv)
- run(**vars(args))
- if __name__ == "__main__":
- logging.basicConfig()
- main()
|