symlink_mapped.py 3.0 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879
  1. from future.utils import viewitems
  2. import argparse
  3. import json
  4. import os
  5. import sys
  6. def deserialize(fn):
  7. with open(fn) as ifs:
  8. return json.loads(ifs.read())
  9. def assert_exists(fn):
  10. if not os.path.isfile(fn):
  11. raise Exception('Does not exist: {!r}'.format(fn))
  12. def mkdir(dirname):
  13. if not os.path.isdir(dirname):
  14. # Possible race-condition, so dirs must be created serially.
  15. os.makedirs(dirname)
  16. def symlink(name, target):
  17. msg = '{} -> {}'.format(name, target)
  18. assert not os.path.lexists(name), msg
  19. #print msg
  20. os.symlink(target, name)
  21. def run(special_split_fn, fn_patterns):
  22. """
  23. Symlink targets will be relative to cwd.
  24. For each pattern, each wildcard will be substituted everywhere, e.g.
  25. fn_pattern == 'top/{key}/input_{key}.txt'
  26. """
  27. fnkeypattdict = dict(fnkeypatt.split('=') for fnkeypatt in fn_patterns)
  28. jobs = deserialize(special_split_fn)
  29. mapdir = os.path.normpath(os.path.dirname(os.path.normpath(special_split_fn)))
  30. for job in jobs:
  31. inputs = job['input']
  32. wildcards = job['wildcards']
  33. for (fnkey, fn_pattern) in viewitems(fnkeypattdict):
  34. val = inputs[fnkey]
  35. # val should be relative to the location of the special_split_fn.
  36. #assert not os.path.isabs(val), 'mapped input (dynamic output) filename {!r} must be relative (to serialzed file location {!r})'.format(
  37. # val, special_split_fn)
  38. if not os.path.isabs(val):
  39. mapped_input_fn = os.path.join(mapdir, val)
  40. else:
  41. mapped_input_fn = val
  42. assert_exists(mapped_input_fn)
  43. try:
  44. symlink_name = fn_pattern.format(**wildcards)
  45. except Exception as err:
  46. import pprint
  47. msg = str(err) + ': for pattern {!r} and wildcards\n{!r}'.format(
  48. fn_pattern, pprint.pformat(wildcards))
  49. raise Exception(msg)
  50. outdir = os.path.normpath(os.path.dirname(symlink_name))
  51. mkdir(outdir)
  52. target_name = os.path.relpath(mapped_input_fn, outdir)
  53. symlink(symlink_name, target_name)
  54. def parse_args(argv):
  55. description = 'Create symlinks named after "fn_pattern", targeting values in "mapped_fn".'
  56. parser = argparse.ArgumentParser(
  57. description=description,
  58. )
  59. parser.add_argument(
  60. '--special-split-fn', required=True,
  61. help='Serialized split-file (in our special format), where "mapped_inputs" has a map with key to filename, relative to the directory of this file.')
  62. parser.add_argument(
  63. 'fn_patterns', nargs='+',
  64. help='"fnkey=pattern" Can appear multiple times. Each is a pattern for symlinks, to be substituted with keys in special_split_fn. Each fnkey=filename must appear in the input section of each job listed in special-split.')
  65. return parser.parse_args(argv[1:])
  66. def main(argv=sys.argv):
  67. args = parse_args(argv)
  68. run(**vars(args))
  69. if __name__ == "__main__":
  70. main()