copy_mapped.py 3.5 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697
  1. from future.utils import viewitems
  2. import argparse
  3. import json
  4. import logging
  5. import os
  6. import shutil
  7. import sys
  8. LOG = logging.getLogger(__name__)
  9. def deserialize(fn):
  10. with open(fn) as ifs:
  11. return json.loads(ifs.read())
  12. def assert_exists(fn):
  13. if not os.path.isfile(fn):
  14. raise Exception('Does not exist: {!r}'.format(fn))
  15. def mkdir(dirname):
  16. if not os.path.isdir(dirname):
  17. # Possible race-condition, so dirs must be created serially.
  18. os.makedirs(dirname)
  19. #def symlink(name, src):
  20. # msg = '{} -> {}'.format(name, src)
  21. # assert not os.path.lexists(name), msg
  22. # #print msg
  23. # os.symlink(src, name)
  24. def copy(name, rel_src):
  25. try:
  26. if not os.path.isabs(rel_src):
  27. dn = os.path.normpath(os.path.dirname(name))
  28. src = os.path.join(dn, rel_src)
  29. else:
  30. src = rel_src
  31. shutil.copy2(src, name)
  32. except Exception:
  33. msg = '{} -> {}'.format(name, rel_src)
  34. LOG.error(msg)
  35. raise
  36. def run(special_split_fn, fn_patterns):
  37. """
  38. Symlink targets will be relative to cwd.
  39. For each pattern, each wildcard will be substituted everywhere, e.g.
  40. fn_pattern == 'top/{key}/input_{key}.txt'
  41. """
  42. fnkeypattdict = dict(fnkeypatt.split('=') for fnkeypatt in fn_patterns)
  43. jobs = deserialize(special_split_fn)
  44. mapdir = os.path.normpath(os.path.dirname(os.path.normpath(special_split_fn)))
  45. for job in jobs:
  46. inputs = job['input']
  47. wildcards = job['wildcards']
  48. for (fnkey, fn_pattern) in viewitems(fnkeypattdict):
  49. val = inputs[fnkey]
  50. # val should be relative to the location of the special_split_fn.
  51. #assert not os.path.isabs(val), 'mapped input (dynamic output) filename {!r} must be relative (to serialzed file location {!r})'.format(
  52. # val, special_split_fn)
  53. if not os.path.isabs(val):
  54. mapped_input_fn = os.path.join(mapdir, val)
  55. else:
  56. mapped_input_fn = val
  57. assert_exists(mapped_input_fn)
  58. try:
  59. symlink_name = fn_pattern.format(**wildcards)
  60. except Exception as err:
  61. import pprint
  62. msg = str(err) + ': for pattern {!r} and wildcards\n{!r}'.format(
  63. fn_pattern, pprint.pformat(wildcards))
  64. raise Exception(msg)
  65. outdir = os.path.normpath(os.path.dirname(symlink_name))
  66. mkdir(outdir)
  67. target_name = os.path.relpath(mapped_input_fn, outdir)
  68. copy(symlink_name, target_name)
  69. def parse_args(argv):
  70. description = 'Create copies called "fn_pattern", of files named by values in "mapped_fn".'
  71. parser = argparse.ArgumentParser(
  72. description=description,
  73. )
  74. parser.add_argument(
  75. '--special-split-fn', required=True,
  76. help='Serialized split-file (in our special format), where "mapped_inputs" has a map with key to filename, relative to the directory of this file.')
  77. parser.add_argument(
  78. 'fn_patterns', nargs='+',
  79. help='"fnkey=pattern" Can appear multiple times. Each is a pattern for output filename, to be substituted with keys in special_split_fn. Each fnkey=filename must appear in the input section of each job listed in special-split.')
  80. return parser.parse_args(argv[1:])
  81. def main(argv=sys.argv):
  82. args = parse_args(argv)
  83. run(**vars(args))
  84. if __name__ == "__main__":
  85. logging.basicConfig()
  86. main()