pgzf.c 7.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253
  1. /*
  2. *
  3. * Copyright (c) 2018, Jue Ruan <ruanjue@gmail.com>
  4. *
  5. *
  6. * This program is free software: you can redistribute it and/or modify
  7. * it under the terms of the GNU General Public License as published by
  8. * the Free Software Foundation, either version 3 of the License, or
  9. * (at your option) any later version.
  10. *
  11. * This program is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. * GNU General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU General Public License
  17. * along with this program. If not, see <http://www.gnu.org/licenses/>.
  18. */
  19. #include "pgzf.h"
  20. int usage(int ret){
  21. fprintf(stdout,
  22. "PGZF: Parallel gzip file IO\n"
  23. "Author: Jue Ruan <ruanjue@gmail.com>\n"
  24. "Version: 1.1\n"
  25. "Usage: pgzf [options] file1 [file2 ...]\n"
  26. "Options:\n"
  27. " -d Decompress mode\n"
  28. " -t <int> Number of threads, [8]\n"
  29. " -f Force to overwrite\n"
  30. " -o <string> Output file name, support directory\n"
  31. " -x Delete input files after done\n"
  32. " -b <int> Block size in MB, 1 ~ 256 [16]\n"
  33. " -l <int> Compress level, 1-9, see gzip, [6]\n"
  34. " -h Show this document\n"
  35. " -V Print version information and exit\n"
  36. "\n"
  37. "File format:\n"
  38. " PGZF fellows standard GZIP format (rfc1952), and is blocked compressed.\n"
  39. " It defines two TAGs in each GZIP header, ZS: block size, ZX: random access index.\n"
  40. " Program pgzf can decompress .pgzf and .gz files. When decompressing .gz files,\n"
  41. " pgzf is in fact a buffered gzip reader. Also, .pgzf files can be decompressed\n"
  42. " by program gzip.\n"
  43. "\n"
  44. "In plan to support random access\n"
  45. );
  46. return ret;
  47. }
  48. int main(int argc, char **argv){
  49. PGZF *pz;
  50. char *outf, *ftag;
  51. FILE *in, *out;
  52. void *buff;
  53. u4i bufsize, nbyte;
  54. int c, rw, ncpu, level, overwrite, del, is_dir;
  55. rw = PGZF_MODE_W;
  56. ncpu = 8;
  57. bufsize = PGZF_DEFAULT_BUFF_SIZE;
  58. level = 6;
  59. overwrite = 0;
  60. del = 0;
  61. outf = NULL;
  62. while((c = getopt(argc, argv, "hdxft:b:l:o:V")) != -1){
  63. switch(c){
  64. case 'h': return usage(0);
  65. case 'd': rw = PGZF_MODE_R; break;
  66. case 't': ncpu = atoi(optarg); break;
  67. case 'b': bufsize = (atol(optarg) << 20); break;
  68. case 'l': level = atoi(optarg); break;
  69. case 'f': overwrite = 1; break;
  70. case 'o': outf = optarg; break;
  71. case 'x': del = 1; break;
  72. case 'V': fprintf(stdout, "pgzf 1.1\n"); return 0;
  73. default: return usage(1);
  74. }
  75. }
  76. if(optind == argc){
  77. return usage(1);
  78. }
  79. if(0 && del){
  80. if(outf == NULL && overwrite == 0){
  81. if(optind < argc){
  82. fprintf(stderr, " ** WARNNING: won't delete input files. To force delete input files, please specify -o or/and -f\n");
  83. }
  84. del = 0;
  85. }
  86. }
  87. is_dir = 0;
  88. out = NULL;
  89. if(outf){
  90. if(file_exists(outf)){
  91. if(overwrite == 0){
  92. fprintf(stderr, " ** ERROR: '%s' exists\n", outf);
  93. return 1;
  94. } else {
  95. for(c=optind;c<argc;c++){
  96. if(strcmp(outf, argv[c]) == 0){
  97. fprintf(stderr, " ** ERROR: The same file in INPUT and OUTPUT, '%s'\n", outf);
  98. return 1;
  99. }
  100. }
  101. }
  102. out = open_file_for_write(outf, NULL, overwrite);
  103. } else if(dir_exists(outf)){
  104. is_dir = 1;
  105. } else {
  106. out = open_file_for_write(outf, NULL, overwrite);
  107. }
  108. }
  109. buff = malloc(bufsize);
  110. if(rw == PGZF_MODE_R){
  111. if(outf == NULL || is_dir){
  112. for(c=optind;c<argc;c++){
  113. if(strlen(argv[c]) < 4 || strcasecmp(argv[c] + strlen(argv[c]) - 3, ".gz")){
  114. fprintf(stderr, " ** ERROR: cannot auto generate output file name for '%s'\n", argv[c]);
  115. return 1;
  116. } else if(is_dir){
  117. char *rtag;
  118. rtag = relative_filename(argv[c]);
  119. rtag[strlen(rtag) - 3] = 0;
  120. ftag = malloc(strlen(outf) + 1 + strlen(rtag) + 1);
  121. sprintf(ftag, "%s/%s", outf, rtag);
  122. free(rtag);
  123. if(overwrite == 0 && file_exists(ftag)){
  124. fprintf(stderr, " ** ERROR: '%s' exists\n", ftag);
  125. return 1;
  126. }
  127. free(ftag);
  128. } else {
  129. ftag = strdup(argv[optind]);
  130. ftag[strlen(ftag) - 3] = 0;
  131. if(overwrite == 0 && file_exists(ftag)){
  132. fprintf(stderr, " ** ERROR: '%s' exists\n", ftag);
  133. return 1;
  134. }
  135. free(ftag);
  136. }
  137. }
  138. }
  139. do {
  140. in = open_file_for_read(argv[optind], NULL);
  141. if(outf == NULL){
  142. ftag = strdup(argv[optind]);
  143. ftag[strlen(ftag) - 3] = 0;
  144. out = open_file_for_write(ftag, NULL, overwrite);
  145. free(ftag);
  146. } else if(is_dir){
  147. char *rtag;
  148. rtag = relative_filename(argv[optind]);
  149. rtag[strlen(rtag) - 3] = 0;
  150. ftag = malloc(strlen(outf) + 1 + strlen(rtag) + 1);
  151. sprintf(ftag, "%s/%s", outf, rtag);
  152. free(rtag);
  153. out = open_file_for_write(ftag, NULL, overwrite);
  154. free(ftag);
  155. }
  156. pz = open_pgzf_reader(in, bufsize, ncpu);
  157. while((nbyte = read_pgzf(pz, buff, bufsize))){
  158. fwrite(buff, 1, nbyte, out);
  159. }
  160. if(pz->error){
  161. fprintf(stderr, " ** ERROR: error code (%d)'\n", pz->error);
  162. return 1;
  163. }
  164. close_pgzf(pz);
  165. if(in != stdin){
  166. fclose(in);
  167. if(del){
  168. unlink(argv[optind]);
  169. }
  170. }
  171. optind ++;
  172. if(outf == NULL || is_dir){
  173. fclose(out);
  174. }
  175. } while(optind < argc);
  176. } else {
  177. if(outf && !is_dir){
  178. pz = open_pgzf_writer(out, bufsize, ncpu, level);
  179. } else {
  180. pz = NULL;
  181. for(c=optind;c<argc;c++){
  182. if(strlen(argv[c]) >= 4 && strcasecmp(argv[c] + strlen(argv[c]) - 3, ".gz") == 0){
  183. fprintf(stderr, " ** ERROR: file seems already compressed '%s'\n", argv[c]);
  184. return 1;
  185. } else if(strcmp(argv[c], "-") == 0){
  186. fprintf(stderr, " ** ERROR: Please specify output file when read from STDIN '%s'\n", argv[c]);
  187. return 1;
  188. } else if(is_dir){
  189. char *rtag;
  190. rtag = relative_filename(argv[c]);
  191. ftag = malloc(strlen(outf) + 1 + strlen(rtag) + 3 + 1);
  192. sprintf(ftag, "%s/%s.gz", outf, rtag);
  193. free(rtag);
  194. if(overwrite == 0 && file_exists(ftag)){
  195. fprintf(stderr, " ** ERROR: '%s' exists\n", ftag);
  196. return 1;
  197. }
  198. free(ftag);
  199. } else {
  200. ftag = malloc(strlen(argv[c]) + 4);
  201. sprintf(ftag, "%s.gz", argv[c]);
  202. if(overwrite == 0 && file_exists(ftag)){
  203. fprintf(stderr, " ** ERROR: '%s' exists\n", ftag);
  204. return 1;
  205. }
  206. free(ftag);
  207. }
  208. }
  209. }
  210. do {
  211. if(outf == NULL){
  212. ftag = malloc(strlen(argv[optind]) + 4);
  213. sprintf(ftag, "%s.gz", argv[optind]);
  214. out = open_file_for_write(ftag, NULL, overwrite);
  215. pz = open_pgzf_writer(out, bufsize, ncpu, level);
  216. free(ftag);
  217. } else if(is_dir){
  218. char *rtag;
  219. rtag = relative_filename(argv[optind]);
  220. ftag = malloc(strlen(outf) + 1 + strlen(rtag) + 3 + 1);
  221. sprintf(ftag, "%s/%s.gz", outf, rtag);
  222. free(rtag);
  223. out = open_file_for_write(ftag, NULL, overwrite);
  224. pz = open_pgzf_writer(out, bufsize, ncpu, level);
  225. free(ftag);
  226. }
  227. in = open_file_for_read(argv[optind], NULL);
  228. while((nbyte = fread(buff, 1, bufsize, in))){
  229. write_pgzf(pz, buff, nbyte);
  230. }
  231. if(in != stdin){
  232. fclose(in);
  233. if(del){
  234. unlink(argv[optind]);
  235. }
  236. }
  237. if(outf == NULL || is_dir){
  238. close_pgzf(pz);
  239. fclose(out);
  240. }
  241. optind ++;
  242. } while(optind < argc);
  243. if(outf && !is_dir){
  244. close_pgzf(pz);
  245. }
  246. }
  247. if(outf && !is_dir) fclose(out);
  248. free(buff);
  249. return 0;
  250. }