pbs_fileutils.py 7.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256
  1. # coding: utf-8
  2. # Copyright (C) 1994-2018 Altair Engineering, Inc.
  3. # For more information, contact Altair at www.altair.com.
  4. #
  5. # This file is part of the PBS Professional ("PBS Pro") software.
  6. #
  7. # Open Source License Information:
  8. #
  9. # PBS Pro is free software. You can redistribute it and/or modify it under the
  10. # terms of the GNU Affero General Public License as published by the Free
  11. # Software Foundation, either version 3 of the License, or (at your option) any
  12. # later version.
  13. #
  14. # PBS Pro is distributed in the hope that it will be useful, but WITHOUT ANY
  15. # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
  16. # FOR A PARTICULAR PURPOSE.
  17. # See the GNU Affero General Public License for more details.
  18. #
  19. # You should have received a copy of the GNU Affero General Public License
  20. # along with this program. If not, see <http://www.gnu.org/licenses/>.
  21. #
  22. # Commercial License Information:
  23. #
  24. # For a copy of the commercial license terms and conditions,
  25. # go to: (http://www.pbspro.com/UserArea/agreement.html)
  26. # or contact the Altair Legal Department.
  27. #
  28. # Altair’s dual-license business model allows companies, individuals, and
  29. # organizations to create proprietary derivative works of PBS Pro and
  30. # distribute them - whether embedded or bundled with other software -
  31. # under a commercial license agreement.
  32. #
  33. # Use of Altair’s trademarks, including but not limited to "PBS™",
  34. # "PBS Professional®", and "PBS Pro™" and Altair’s logos is subject to Altair's
  35. # trademark licensing policies.
  36. FILE_HEAD = 'head'
  37. FILE_TAIL = 'tail'
  38. class FileUtils:
  39. """
  40. Utility to walk a file from ``'head'`` or ``'tail'`` on the local
  41. filesystem
  42. :param f: File to process
  43. :type f: str
  44. :param mode: One of FILE_HEAD or FILE_TAIL, which respectively set the
  45. file for processing from head or tail. Defaults to head.
  46. """
  47. def __init__(self, f, mode=FILE_HEAD):
  48. self.filename = f
  49. self.fd = open(f, 'rb')
  50. self._buf_size = 1024
  51. self._fseek_ptr = 0
  52. self._lines_cache = []
  53. self.set_mode(mode)
  54. self.num_lines = None
  55. def get_file_descriptor(self):
  56. """
  57. Return the file descriptor associated to the file being processed
  58. """
  59. return self.fd
  60. def set_buf_size(self, bs=1024):
  61. """
  62. Set the buffer size to read blocks of file into
  63. """
  64. self._buf_size = bs
  65. def set_mode(self, m=None):
  66. """
  67. :param m: ``FILE_TAIL`` if file to be tailed, and ``FILE_HEAD`` to read
  68. from head
  69. """
  70. if m == FILE_TAIL:
  71. self._backward = True
  72. self._bytes = self.get_size()
  73. else:
  74. self._backward = False
  75. self.fd.seek(0, 0)
  76. def tell(self):
  77. """
  78. :returns: The current file ``'cursor'``
  79. """
  80. return self.fd.tell()
  81. def get_size(self):
  82. """
  83. :returns: The size of the file
  84. """
  85. cur_pos = self.fd.tell()
  86. self.fd.seek(0, 2)
  87. size = self.fd.tell()
  88. self.fd.seek(cur_pos)
  89. return size
  90. def get_num_lines(self):
  91. """
  92. :returns: No of lines for the file
  93. """
  94. if self.num_lines is not None:
  95. return self.num_lines
  96. _c = self.fd.tell()
  97. self.num_lines = sum(1 for _ in self.fd)
  98. self.fd.seek(_c)
  99. return self.num_lines
  100. def next(self, n=1):
  101. """
  102. Get the next n lines of the file
  103. :param n: the numer of lines to retrieve
  104. :type n: int
  105. """
  106. if self._backward:
  107. return self.tail(n)
  108. else:
  109. return self.head(n)
  110. def get_line(self, n=1):
  111. """
  112. :returns: The nth line from file
  113. """
  114. self.fd.seek(0, 0)
  115. i = 0
  116. while i != (n - 1):
  117. try:
  118. self.fd.readline()
  119. except:
  120. return None
  121. i += 1
  122. return self.fd.readline()
  123. def get_block(self, from_n=1, to_n=1):
  124. """
  125. :returns: A block of lines between ``from_n`` and ``to_n``
  126. """
  127. if to_n < from_n:
  128. return None
  129. self.fd.seek(0, 0)
  130. i = 0
  131. block = []
  132. while i != (from_n - 1):
  133. try:
  134. self.fd.readline()
  135. i += 1
  136. except:
  137. return None
  138. while i != to_n:
  139. try:
  140. block.append(self.fd.readline())
  141. i += 1
  142. except:
  143. del block
  144. return None
  145. return block
  146. def next_head(self, n=1):
  147. """
  148. Next line(s) from head
  149. """
  150. return self.head(n)
  151. def next_tail(self, n=1):
  152. """
  153. Next line(s) from tail
  154. """
  155. if not self._backward:
  156. self.set_mode(FILE_TAIL)
  157. return self.tail(n)
  158. def head(self, n=1):
  159. """
  160. :returns: n lines of head
  161. """
  162. head_lines = []
  163. i = 0
  164. while i != n:
  165. line = self.fd.readline()
  166. if line == '':
  167. break
  168. head_lines.append(line.strip())
  169. i += 1
  170. return head_lines
  171. def tail(self, n=1):
  172. """
  173. :returns: n lines of tail
  174. """
  175. if not self._backward:
  176. self.set_mode(FILE_TAIL)
  177. ret = []
  178. n_read = n
  179. # Retrieve as many lines from the cache as available and as requested
  180. while len(self._lines_cache) > 0 and n_read > 0:
  181. ret.append(self._lines_cache.pop())
  182. n_read -= 1
  183. if n_read == 0:
  184. return ret
  185. # searching backwards, the line may be truncated too early, to avoid
  186. # returning an incomplete line we look for an additional line and will
  187. # not return that possibly truncated line as part of our result
  188. size = n + 1
  189. data = []
  190. while size > 0 and self._bytes > 0:
  191. if (self._bytes - self._buf_size > 0):
  192. self._fseek_ptr -= self._buf_size
  193. self.fd.seek(self._fseek_ptr, 2)
  194. data.append(self.fd.read(self._buf_size))
  195. else:
  196. # file too small, start from beginning
  197. self.fd.seek(0, 0)
  198. # only read what was not read
  199. data.append(self.fd.read(self._bytes))
  200. linesFound = data[-1].count('\n')
  201. size -= linesFound
  202. self._bytes -= self._buf_size
  203. data.reverse()
  204. tmp_ret = ''.join(data).splitlines()
  205. if len(tmp_ret) > 0:
  206. # If we have reached the beginning of the file, we need to
  207. # include the first line rather than bypass it
  208. if self._bytes <= 0:
  209. first_idx = 0
  210. else:
  211. first_idx = 1
  212. self._lines_cache = tmp_ret[first_idx:] + self._lines_cache
  213. # account for the possibly truncated first line. We will only
  214. # read back buf_size from that index
  215. self._fseek_ptr += len(tmp_ret[0])
  216. # adjust the number of bytes actually read
  217. self._bytes += len(tmp_ret[0])
  218. # if the number of lines requested is greater than what is 'there'
  219. # reset the total number of lines requested to what is available
  220. if self._bytes <= 0 and len(self._lines_cache) < n_read:
  221. n_read = len(self._lines_cache)
  222. while len(self._lines_cache) > 0 and n_read > 0:
  223. ret.append(self._lines_cache.pop())
  224. n_read -= 1
  225. if n_read == 0:
  226. return ret