pgzf.h 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852
  1. /*
  2. *
  3. * Copyright (c) 2018, Jue Ruan <ruanjue@gmail.com>
  4. *
  5. *
  6. * This program is free software: you can redistribute it and/or modify
  7. * it under the terms of the GNU General Public License as published by
  8. * the Free Software Foundation, either version 3 of the License, or
  9. * (at your option) any later version.
  10. *
  11. * This program is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. * GNU General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU General Public License
  17. * along with this program. If not, see <http://www.gnu.org/licenses/>.
  18. */
  19. #ifndef __PGZF_RJ_H
  20. #define __PGZF_RJ_H
  21. #include <zlib.h>
  22. #include "mem_share.h"
  23. #include "list.h"
  24. #include "thread.h"
  25. #define PGZF_DEFAULT_BUFF_SIZE (1U << 24) // 16MB
  26. #define PGZF_MAX_BUFF_SIZE (1U << 28)
  27. #define PGZF_HEAD_SIZE 30
  28. #define PGZF_HEAD_ZS_OFFSET 16
  29. #define PGZF_HEAD_ZX_OFFSET 24
  30. #define PGZF_TAIL_SIZE 8
  31. #define PGZF_INDEX_BIN 64
  32. #define PGZF_MODE_W 1 // pgzf writer
  33. #define PGZF_MODE_R 2 // pgzf reader
  34. #define PGZF_MODE_R_GZ 3 // gz reader
  35. #define PGZF_MODE_R_UNKNOWN 4 // unknown file type
  36. #define PGZF_FILETYPE_UNKNOWN 0
  37. #define PGZF_FILETYPE_GZ 1
  38. #define PGZF_FILETYPE_PGZF 2
  39. struct PGZF;
  40. #define PGZF_TASK_NULL 0
  41. #define PGZF_TASK_DEFLATE 1
  42. #define PGZF_TASK_INFLATE 2
  43. thread_beg_def(pgz);
  44. struct PGZF *pz;
  45. u4i zsval, soff;
  46. u8i zxval, doff;
  47. u1v *dst, *src;
  48. u4i token;
  49. int level;
  50. int task;
  51. thread_end_def(pgz);
  52. typedef struct PGZF {
  53. u4i ncpu, ridx, widx;
  54. int rw_mode, seekable;
  55. u4i bufsize; // MUST be multiple of 1MB
  56. u8i xsize; // total uncompressed size
  57. u8v *boffs;
  58. u8v *xoffs;
  59. u8i tot_in, tot_out;
  60. u1v **dsts, **srcs, *tmp;
  61. z_stream *z;
  62. u8i offset;
  63. FILE *file;
  64. thread_def_shared_vars(pgz);
  65. int step; // used in decompress gzip file
  66. int eof, error;
  67. } PGZF;
  68. static inline void _num2bytes_pgzf(u1i *bs, u1i bl, u8i val){
  69. u1i i;
  70. for(i=0;i<bl;i++){
  71. bs[i] = (u1i)val;
  72. val >>= 8;
  73. }
  74. }
  75. static inline u8i _bytes2num_pgzf(u1i *bs, u1i bl){
  76. u8i val;
  77. u1i i;
  78. val = 0;
  79. for(i=0;i<bl;i++){
  80. val = val | (((u8i)bs[i]) << (8 * i));
  81. }
  82. return val;
  83. }
  84. /**
  85. * Please see https://tools.ietf.org/html/rfc1952
  86. */
  87. static inline void _gen_pgzf_header(u1i bs[30], u4i z_size){
  88. bs[0] = 0x1f; // GZIP ID1
  89. bs[1] = 0x8b; // GZIP ID2
  90. bs[2] = 8; // CM = deflate
  91. bs[3] = 0x4; // FLG = 0b00000100, FEXTRA is ture
  92. bs[4] = 0; // MTIME
  93. bs[5] = 0; // MTIME
  94. bs[6] = 0; // MTIME
  95. bs[7] = 0; // MTIME
  96. bs[8] = 0; // XFL
  97. bs[9] = 3; // OS = unix
  98. bs[10] = 18; // XLEN
  99. bs[11] = 0; // = 18
  100. bs[12] = 'Z'; // TAG ZS
  101. bs[13] = 'S'; // compressed size
  102. bs[14] = 4; // TAG LEN
  103. bs[15] = 0; // 4
  104. bs[16] = z_size >> 0; // compressed block size
  105. bs[17] = z_size >> 8; //
  106. bs[18] = z_size >> 16; //
  107. bs[19] = z_size >> 24; // = z_size
  108. bs[20] = 'Z'; //TAG ZX
  109. bs[21] = 'X'; // every 64 block size
  110. bs[22] = 6; // TAG LEN
  111. bs[23] = 0; // 6
  112. bs[24] = 0; // reserve to store the random access index
  113. bs[25] = 0; //
  114. bs[26] = 0; //
  115. bs[27] = 0; //
  116. bs[28] = 0; //
  117. bs[29] = 0; //
  118. }
  119. static inline void _gen_pgzf_tailer(u1i bs[8], u4i crc, u4i u_size){
  120. _num2bytes_pgzf(bs + 0, 4, crc);
  121. _num2bytes_pgzf(bs + 4, 4, u_size);
  122. }
  123. static inline u4i _zlib_raw_deflate_all(u1i *dst, u4i dlen, u1i *src, u4i slen, int level){
  124. z_stream Z, *z;
  125. u4i ret;
  126. z = &Z;
  127. ZEROS(z);
  128. deflateInit2(z, level, Z_DEFLATED, -15, 9, Z_DEFAULT_STRATEGY);
  129. z->avail_in = slen;
  130. z->next_in = src;
  131. z->avail_out = dlen;
  132. z->next_out = dst;
  133. deflate(z, Z_FINISH);
  134. ret = dlen - z->avail_out;
  135. deflateEnd(z);
  136. return ret;
  137. }
  138. static inline u4i _pgzf_deflate(u1v *dst, u1v *src, int level){
  139. u4i z_size;
  140. uLong crc;
  141. clear_u1v(dst);
  142. if(src->size == 0 || src->size >= MAX_U4) return 0;
  143. z_size = compressBound(src->size);
  144. encap_u1v(dst, z_size + PGZF_HEAD_SIZE + PGZF_TAIL_SIZE);
  145. z_size = _zlib_raw_deflate_all(dst->buffer + PGZF_HEAD_SIZE, z_size, src->buffer, src->size, level);
  146. _gen_pgzf_header(dst->buffer, z_size + PGZF_HEAD_SIZE + PGZF_TAIL_SIZE);
  147. crc = crc32(0L, Z_NULL, 0);
  148. crc = crc32(crc, src->buffer, src->size);
  149. _gen_pgzf_tailer(dst->buffer + PGZF_HEAD_SIZE + z_size, crc, src->size);
  150. dst->size = PGZF_HEAD_SIZE + z_size + PGZF_TAIL_SIZE;
  151. return dst->size;
  152. }
  153. static inline int _read_pgzf_header(FILE *in, u1v *src, u4i *hoff, u4i *zsval, u8i *zxval){
  154. u4i off, val, sl, end;
  155. int ch, is_pgzf, xflag;
  156. char si1, si2;
  157. is_pgzf = 0;
  158. off = *hoff;
  159. *zsval = 0;
  160. *zxval = 0;
  161. if(src->size < off + 10){
  162. encap_u1v(src, 10);
  163. src->size += fread(src->buffer + src->size, 1, off + 10 - src->size, in);
  164. }
  165. // At least give 10 bytes
  166. if(src->size < off + 10) return PGZF_FILETYPE_UNKNOWN;
  167. if(src->buffer[off + 0] != 0x1f) return PGZF_FILETYPE_UNKNOWN;
  168. if(src->buffer[off + 1] != 0x8b) return PGZF_FILETYPE_UNKNOWN;
  169. if((src->buffer[off + 2] != 0x08) || (src->buffer[off + 2] & 0xE0)) return PGZF_FILETYPE_UNKNOWN;
  170. xflag = src->buffer[off + 3];
  171. off += 10;
  172. if(xflag & 0x04){
  173. if(src->size < off + 2){
  174. encap_u1v(src, 2);
  175. sl = fread(src->buffer + src->size, 1, off + 2 - src->size, in);
  176. src->size += sl;
  177. }
  178. if(src->size < off + 2) return PGZF_FILETYPE_UNKNOWN;
  179. val = _bytes2num_pgzf(src->buffer + off, 2);
  180. off += 2;
  181. end = off + val;
  182. if(val > 0 && val < 4) return PGZF_FILETYPE_UNKNOWN;
  183. if(src->size < off + val){
  184. encap_u1v(src, val);
  185. sl = fread(src->buffer + src->size, 1, off + val - src->size, in);
  186. src->size += sl;
  187. if(src->size < off + val) return PGZF_FILETYPE_UNKNOWN;
  188. }
  189. //parse TAGs
  190. while(off < end){
  191. si1 = src->buffer[off + 0];
  192. si2 = src->buffer[off + 1];
  193. sl = _bytes2num_pgzf(src->buffer + off + 2, 2);
  194. off += 4;
  195. if(si1 == 'Z' && si2 == 'S' && sl == 4){
  196. is_pgzf = 1;
  197. *zsval = _bytes2num_pgzf(src->buffer + off, 4);
  198. } else if(is_pgzf && si1 == 'Z' && si2 == 'X' && sl == 6){
  199. *zxval = _bytes2num_pgzf(src->buffer + off, 6);
  200. }
  201. off += sl;
  202. }
  203. }
  204. if(xflag & 0x08){
  205. do {
  206. if(off < src->size){
  207. ch = src->buffer[off];
  208. } else {
  209. ch = getc(in);
  210. if(ch == -1){
  211. return PGZF_FILETYPE_UNKNOWN;
  212. }
  213. push_u1v(src, ch);
  214. }
  215. off ++;
  216. } while(ch);
  217. }
  218. if(xflag & 0x10){
  219. do {
  220. if(off < src->size){
  221. ch = src->buffer[off];
  222. } else {
  223. ch = getc(in);
  224. if(ch == -1){
  225. return PGZF_FILETYPE_UNKNOWN;
  226. }
  227. push_u1v(src, ch);
  228. }
  229. off ++;
  230. } while(ch);
  231. }
  232. if(xflag & 0x02){
  233. if(src->size < off + 2){
  234. encap_u1v(src, 2);
  235. sl = fread(src->buffer + src->size, 1, off + 2 - src->size, in);
  236. src->size += sl;
  237. }
  238. off += 2;
  239. if(src->size < off) return PGZF_FILETYPE_UNKNOWN;
  240. }
  241. *hoff = off;
  242. return is_pgzf? PGZF_FILETYPE_PGZF : PGZF_FILETYPE_GZ;
  243. }
  244. int pgzf_inflate_raw_core(z_stream *z, u1i *dst, u4i *dlen, u1i *src, u4i *slen, int flush){
  245. u4i dl, sl;
  246. int ret;
  247. ret = Z_OK;
  248. dl = *dlen;
  249. sl = *slen;
  250. z->avail_in = sl;
  251. z->next_in = src;
  252. z->avail_out = dl;
  253. z->next_out = dst;
  254. ret = inflate(z, flush);
  255. *dlen = dl - z->avail_out;
  256. *slen = sl - z->avail_in;
  257. return ret;
  258. }
  259. // src start just after gz_header, and include fz_tailer
  260. int pgzf_inflate_core(u1i *dst, u4i *dlen, u1i *src, u4i slen, int check){
  261. z_stream Z, *z;
  262. u4i soff, dsz;
  263. uLong crc, rcr;
  264. int ret;
  265. z = &Z;
  266. ZEROS(z);
  267. inflateInit2(z, -15);
  268. z->avail_in = slen - PGZF_TAIL_SIZE;
  269. z->next_in = src;
  270. z->avail_out = *dlen;
  271. z->next_out = dst;
  272. ret = inflate(z, Z_FINISH);
  273. *dlen -= z->avail_out;
  274. soff = slen - PGZF_TAIL_SIZE - z->avail_in;
  275. inflateEnd(z);
  276. if(check){
  277. if(soff + 8 > slen){
  278. fprintf(stderr, " -- something wrong in %s -- %s:%d --\n", __FUNCTION__, __FILE__, __LINE__); fflush(stderr);
  279. return 0;
  280. }
  281. rcr = _bytes2num_pgzf(src + soff, 4);
  282. dsz = _bytes2num_pgzf(src + soff + 4, 4);
  283. if(dsz != *dlen){
  284. fprintf(stderr, " -- something wrong in %s -- %s:%d --\n", __FUNCTION__, __FILE__, __LINE__); fflush(stderr);
  285. return 0;
  286. }
  287. crc = crc32(0L, Z_NULL, 0);
  288. crc = crc32(crc, dst, *dlen);
  289. if(crc != rcr){
  290. fprintf(stderr, " -- something wrong in %s -- %s:%d --\n", __FUNCTION__, __FILE__, __LINE__); fflush(stderr);
  291. return 0;
  292. }
  293. }
  294. return 1;
  295. }
  296. thread_beg_func(pgz);
  297. PGZF *pz;
  298. u1v *dst, *src;
  299. u4i bufsize, hsize, rsize, dsz, ssz, next;
  300. int ret;
  301. pz = pgz->pz;
  302. dst = pgz->dst;
  303. src = pgz->src;
  304. thread_beg_loop(pgz);
  305. if(pgz->task == PGZF_TASK_DEFLATE){
  306. if(src->size == 0) continue;
  307. clear_u1v(dst);
  308. _pgzf_deflate(dst, src, pgz->level);
  309. while(pz->ridx != pgz->token){
  310. nano_sleep(1);
  311. }
  312. {
  313. pz->tot_out += pgz->dst->size;
  314. push_u8v(pz->boffs, pz->tot_out);
  315. fwrite(pgz->dst->buffer, 1, pgz->dst->size, pz->file);
  316. clear_u1v(pgz->dst);
  317. clear_u1v(pgz->src);
  318. }
  319. pz->ridx ++;
  320. } else if(pgz->task == PGZF_TASK_INFLATE){
  321. pgz->doff = 0;
  322. clear_u1v(pgz->dst);
  323. while((pz->ridx % pz->ncpu) != UInt(pgz->t_idx)){
  324. nano_sleep(10);
  325. if(pz->error) break;
  326. //if(pz->eof){
  327. //if(pz->rw_mode != PGZF_MODE_R_GZ){
  328. //break;
  329. //}
  330. //}
  331. if(pgz->running == 0){
  332. break;
  333. }
  334. }
  335. if(pz->error) break;
  336. if(pz->rw_mode == PGZF_MODE_R){
  337. if(pgz->src->size){ // loaded header, had set zsval and zxval
  338. } else {
  339. pgz->zsval = pgz->zxval = 0;
  340. pgz->soff = pgz->src->size = 0;
  341. ret = _read_pgzf_header(pz->file, pgz->src, &pgz->soff, &pgz->zsval, &pgz->zxval);
  342. if(pgz->src->size == 0){
  343. pz->eof = 1;
  344. pz->ridx ++;
  345. break;
  346. }
  347. if(ret != PGZF_FILETYPE_PGZF){
  348. fprintf(stderr, " -- Error: not a PGZF format at %u block, in %s -- %s:%d --\n", pz->ridx, __FUNCTION__, __FILE__, __LINE__); fflush(stderr);
  349. pz->error = 1;
  350. pz->ridx ++;
  351. break;
  352. }
  353. }
  354. hsize = pgz->soff;
  355. encap_u1v(pgz->src, pgz->zsval - pgz->src->size);
  356. rsize = fread(pgz->src->buffer + hsize, 1, pgz->zsval - pgz->src->size, pz->file);
  357. if(rsize < pgz->zsval - pgz->src->size){
  358. fprintf(stderr, " -- Error: read %u < %u at %u block, in %s -- %s:%d --\n", UInt(pgz->src->size + rsize), pgz->zsval, pz->ridx, __FUNCTION__, __FILE__, __LINE__); fflush(stderr);
  359. pz->error = 1;
  360. pz->ridx ++;
  361. break;
  362. }
  363. pgz->src->size += rsize;
  364. pz->tot_in += pgz->zsval;
  365. pz->ridx ++;
  366. dsz = _bytes2num_pgzf(pgz->src->buffer + pgz->zsval - 4, 4);
  367. encap_u1v(pgz->dst, dsz);
  368. pgz->soff = 0;
  369. if(pgzf_inflate_core(pgz->dst->buffer, &dsz, pgz->src->buffer + hsize, pgz->zsval - hsize, 1) == 0){
  370. clear_u1v(pgz->src);
  371. pz->error = 1;
  372. break;
  373. }
  374. pgz->dst->size = dsz;
  375. clear_u1v(pgz->src);
  376. } else if(pz->rw_mode == PGZF_MODE_R_GZ){
  377. u4i bsz;
  378. bsz = 1024 * 1024;
  379. bufsize = pz->bufsize? pz->bufsize : PGZF_DEFAULT_BUFF_SIZE;
  380. encap_u1v(pgz->dst, bufsize);
  381. while(!pz->error){
  382. if(pgz->src->size == pgz->soff){
  383. pgz->soff = pgz->src->size = 0;
  384. }
  385. if(pgz->src->size < bsz){
  386. encap_u1v(pgz->src, bsz - pgz->src->size);
  387. rsize = fread(pgz->src->buffer + pgz->src->size, 1, bsz - pgz->src->size, pz->file);
  388. if(rsize < bsz - pgz->src->size){
  389. pz->eof = 1;
  390. }
  391. pz->tot_in += rsize;
  392. pgz->src->size += rsize;
  393. }
  394. if(pgz->src->size == pgz->soff){
  395. break;
  396. }
  397. if(pz->step == 0){
  398. u4i tsz;
  399. tsz = pgz->src->size;
  400. ret = _read_pgzf_header(pz->file, pgz->src, &pgz->soff, &pgz->zsval, &pgz->zxval);
  401. if(ret != PGZF_FILETYPE_GZ && ret != PGZF_FILETYPE_PGZF){
  402. if(pgz->src->size == pgz->soff){
  403. pz->eof = 1;
  404. } else {
  405. fprintf(stderr, " -- failed in read gzip header, ret = %d in %s -- %s:%d --\n", ret, __FUNCTION__, __FILE__, __LINE__); fflush(stderr);
  406. pz->error = 1;
  407. }
  408. break;
  409. } else {
  410. pz->tot_in += pgz->src->size - tsz;
  411. }
  412. pz->step = 1;
  413. continue;
  414. } else if(pz->step == 2){
  415. if(pgz->src->size >= pgz->soff + PGZF_TAIL_SIZE){
  416. pgz->soff += PGZF_TAIL_SIZE;
  417. pz->step = 0;
  418. inflateReset(pz->z);
  419. continue;
  420. } else if(pz->eof){
  421. pz->error = 2;
  422. break;
  423. } else {
  424. memmove(pgz->src->buffer, pgz->src->buffer + pgz->soff, pgz->src->size - pgz->soff);
  425. pgz->src->size -= pgz->soff;
  426. pgz->soff = 0;
  427. }
  428. }
  429. while(pgz->dst->size < bufsize && pgz->soff < pgz->src->size){
  430. dsz = bufsize - pgz->dst->size;
  431. ssz = pgz->src->size - pgz->soff;
  432. ret = pgzf_inflate_raw_core(pz->z, pgz->dst->buffer + pgz->dst->size, &dsz, pgz->src->buffer + pgz->soff, &ssz, Z_NO_FLUSH);
  433. pgz->dst->size += dsz;
  434. pgz->soff += ssz;
  435. if(ret == Z_STREAM_END){
  436. pz->step = 2;
  437. break;
  438. } else if(ret != Z_OK){
  439. fprintf(stderr, " -- ZERROR: %d in %s -- %s:%d --\n", ret, __FUNCTION__, __FILE__, __LINE__); fflush(stderr);
  440. pz->error = 1;
  441. break;
  442. }
  443. }
  444. if(pgz->dst->size == bufsize){
  445. if(pgz->soff < pgz->src->size){
  446. if(pz->ncpu > 1){
  447. next = (pz->ridx + 1) % pz->ncpu;
  448. if(pz->srcs[next]->size != 0){
  449. fprintf(stderr, " -- something wrong in %s -- %s:%d --\n", __FUNCTION__, __FILE__, __LINE__); fflush(stderr);
  450. abort();
  451. }
  452. append_array_u1v(pz->srcs[next], pgz->src->buffer + pgz->soff, pgz->src->size - pgz->soff);
  453. }
  454. }
  455. pgz->soff = pgz->src->size = 0;
  456. break;
  457. }
  458. }
  459. pz->ridx ++;
  460. } else if(pz->rw_mode == PGZF_MODE_R_UNKNOWN){
  461. bufsize = pz->bufsize? pz->bufsize : PGZF_DEFAULT_BUFF_SIZE;
  462. encap_u1v(pgz->dst, bufsize);
  463. if(pgz->src->size > bufsize){
  464. fprintf(stderr, " -- something wrong in %s -- %s:%d --\n", __FUNCTION__, __FILE__, __LINE__); fflush(stderr);
  465. pz->error = 1;
  466. pz->ridx ++;
  467. break;
  468. } else if(pgz->src->size){
  469. append_u1v(pgz->dst, pgz->src);
  470. clear_u1v(pgz->src);
  471. }
  472. rsize = fread(pgz->dst->buffer + pgz->dst->size, 1, bufsize - pgz->dst->size, pz->file);
  473. if(rsize < bufsize - pgz->dst->size){
  474. pz->eof = 1;
  475. }
  476. pgz->dst->size += rsize;
  477. pz->tot_in += pgz->dst->size;
  478. pz->ridx ++;
  479. }
  480. }
  481. thread_end_loop(pgz);
  482. thread_end_func(pgz);
  483. static inline PGZF* open_pgzf_writer(FILE *out, u4i buffer_size, int ncpu, int level){
  484. PGZF *pz;
  485. u4i i;
  486. b8i offset;
  487. thread_prepare(pgz);
  488. pz = malloc(sizeof(PGZF));
  489. if(ncpu < 1){
  490. get_linux_sys_info(NULL, NULL, &ncpu);
  491. if(ncpu < 1) ncpu = 8;
  492. }
  493. pz->ncpu = ncpu;
  494. pz->ridx = 0;
  495. pz->widx = 0;
  496. offset = ftell(out);
  497. if(offset == -1){
  498. pz->offset = 0;
  499. pz->seekable = 0;
  500. } else {
  501. pz->offset = offset;
  502. pz->seekable = 1;
  503. }
  504. pz->file = out;
  505. pz->error = 0;
  506. pz->eof = 0;
  507. pz->step = 0;
  508. pz->rw_mode = 1; // write
  509. if(buffer_size == 0) buffer_size = PGZF_DEFAULT_BUFF_SIZE;
  510. pz->bufsize = (buffer_size + 0xFFFFFU) & 0xFFF00000U;
  511. pz->xsize = 0;
  512. pz->boffs = init_u8v(32);
  513. pz->xoffs = init_u8v(32);
  514. pz->z = NULL;
  515. pz->dsts = calloc(pz->ncpu, sizeof(u1v*));
  516. for(i=0;i<pz->ncpu;i++){
  517. pz->dsts[i] = init_u1v(pz->bufsize);
  518. }
  519. pz->srcs = calloc(pz->ncpu, sizeof(u1v*));
  520. for(i=0;i<pz->ncpu;i++){
  521. pz->srcs[i] = init_u1v(pz->bufsize);
  522. }
  523. pz->tmp = init_u1v(32);
  524. pz->tot_in = 0;
  525. pz->tot_out = 0;
  526. if(level == 0) level = Z_DEFAULT_COMPRESSION; // disable level 0, set to default level 6
  527. thread_beg_init(pgz, pz->ncpu);
  528. pgz->pz = pz;
  529. pgz->zsval = 0;
  530. pgz->zxval = 0;
  531. pgz->soff = 0;
  532. pgz->doff = 0;
  533. pgz->dst = pz->dsts[pgz->t_idx];
  534. pgz->src = pz->srcs[pgz->t_idx];
  535. pgz->token = 0;
  536. pgz->level = level;
  537. pgz->task = PGZF_TASK_NULL;
  538. thread_end_init(pgz);
  539. thread_export(pgz, pz);
  540. return pz;
  541. }
  542. static inline size_t write_pgzf(PGZF *pz, void *dat, size_t len){
  543. size_t off, cnt;
  544. thread_prepare(pgz);
  545. thread_import(pgz, pz);
  546. off = 0;
  547. while(off < len){
  548. thread_beg_operate(pgz, pz->widx % pz->ncpu);
  549. thread_wait(pgz);
  550. /*
  551. if(pgz->dst->size){
  552. pz->tot_out += pgz->dst->size;
  553. push_u8v(pz->boffs, pz->tot_out);
  554. fwrite(pgz->dst->buffer, 1, pgz->dst->size, pz->file);
  555. clear_u1v(pgz->dst);
  556. clear_u1v(pgz->src);
  557. }
  558. */
  559. cnt = num_min(len - off, pz->bufsize - pgz->src->size);
  560. append_array_u1v(pgz->src, dat + off, cnt);
  561. off += cnt;
  562. if(pgz->src->size == pz->bufsize){
  563. pz->tot_in += pgz->src->size;
  564. pgz->task = PGZF_TASK_DEFLATE;
  565. pgz->token = pz->widx;
  566. thread_wake(pgz);
  567. pz->widx ++;
  568. }
  569. }
  570. thread_export(pgz, pz);
  571. return len;
  572. }
  573. static inline void _end_pgzf_writer(PGZF *pz){
  574. u4i i, widx;
  575. thread_prepare(pgz);
  576. thread_import(pgz, pz);
  577. widx = pz->widx;
  578. for(i=0;i<=pz->ncpu;i++){ // (pz->tidx + ncpu + 1) % ncpu
  579. thread_beg_operate(pgz, widx % pz->ncpu);
  580. thread_wait(pgz);
  581. /*
  582. if(pgz->dst->size){
  583. pz->tot_out += pgz->dst->size;
  584. push_u8v(pz->boffs, pz->tot_out);
  585. fwrite(pgz->dst->buffer, 1, pgz->dst->size, pz->file);
  586. clear_u1v(pgz->dst);
  587. clear_u1v(pgz->src);
  588. }
  589. */
  590. if(pgz->src->size){ // will force to write un-full block
  591. pz->tot_in += pgz->src->size;
  592. pgz->task = PGZF_TASK_DEFLATE;
  593. pgz->token = pz->widx;
  594. thread_wake(pgz);
  595. pz->widx ++;
  596. }
  597. widx ++;
  598. }
  599. thread_export(pgz, pz);
  600. }
  601. static inline int write_index_pgzf(PGZF *pz){
  602. u8i i, x;
  603. u1i bs[6];
  604. pz->xsize = pz->tot_in;
  605. if(!pz->seekable) return 0;
  606. if(fseek(pz->file, pz->offset + PGZF_HEAD_ZX_OFFSET, SEEK_SET) == -1){
  607. perror("fseek error in write_index_pgzf");
  608. return 0;
  609. }
  610. _num2bytes_pgzf(bs, 6, pz->xsize);
  611. fwrite(bs, 1, 6, pz->file);
  612. for(i=64,x=1;i+PGZF_INDEX_BIN<=pz->boffs->size;i+=PGZF_INDEX_BIN,x++){
  613. push_u8v(pz->xoffs, pz->boffs->buffer[i+PGZF_INDEX_BIN]);
  614. _num2bytes_pgzf(bs, 6, pz->boffs->buffer[i+PGZF_INDEX_BIN]);
  615. if(fseek(pz->file, pz->offset + pz->boffs->buffer[x] + PGZF_HEAD_ZX_OFFSET, SEEK_SET) == -1){
  616. perror("fseek error in write_index_pgzf");
  617. return 0;
  618. }
  619. fwrite(bs, 1, 6, pz->file);
  620. }
  621. fseek(pz->file, 0, SEEK_END);
  622. return 1;
  623. }
  624. static inline PGZF* open_pgzf_reader(FILE *in, u4i bufsize, int ncpu){
  625. PGZF *pz;
  626. u8i zxval;
  627. b8i offset;
  628. u4i i, zsval, hoff;
  629. int ftype;
  630. thread_prepare(pgz);
  631. pz = malloc(sizeof(PGZF));
  632. pz->ncpu = ncpu;
  633. pz->ridx = 0;
  634. pz->widx = 0;
  635. offset = ftell(in);
  636. if(offset == -1){
  637. pz->offset = 0;
  638. pz->seekable = 0;
  639. } else {
  640. pz->offset = offset;
  641. pz->seekable = 1;
  642. }
  643. pz->file = in;
  644. pz->eof = 0;
  645. pz->error = 0;
  646. pz->step = 0;
  647. pz->dsts = calloc(pz->ncpu, sizeof(u1v*));
  648. pz->srcs = calloc(pz->ncpu, sizeof(u1v*));
  649. pz->tmp = init_u1v(32);
  650. pz->tot_in = 0;
  651. pz->tot_out = 0;
  652. pz->boffs = init_u8v(32);
  653. pz->xoffs = init_u8v(32);
  654. // recognize PGZF
  655. zsval = zxval = 0;
  656. hoff = 0;
  657. pz->srcs[0] = init_u1v(1024);
  658. ftype = _read_pgzf_header(pz->file, pz->srcs[0], &hoff, &zsval, &zxval);
  659. pz->tot_in = pz->srcs[0]->size;
  660. switch(ftype){
  661. case PGZF_FILETYPE_GZ: pz->step = 1; pz->rw_mode = PGZF_MODE_R_GZ; break;
  662. case PGZF_FILETYPE_PGZF: pz->rw_mode = PGZF_MODE_R; break;
  663. default:
  664. fprintf(stderr, " ** WARNNING: input file is not in gzip format **\n");
  665. pz->rw_mode = PGZF_MODE_R_UNKNOWN; break;
  666. }
  667. if(pz->rw_mode == PGZF_MODE_R){
  668. pz->z = NULL;
  669. pz->xsize = zxval;
  670. push_u8v(pz->boffs, zsval);
  671. if(pz->seekable){
  672. u8i foff;
  673. foff = ftell(pz->file);
  674. if(fseek(pz->file, pz->offset + zsval - 4, SEEK_SET) == -1){
  675. fprintf(stderr, " ** ERROR: failed to read uncompress block size in the first block ERR(1) **\n");
  676. return NULL;
  677. }
  678. if(fread(&pz->bufsize, 4, 1, pz->file) == 0){
  679. fprintf(stderr, " ** ERROR: failed to read uncompress block size in the first block ERR(2) **\n");
  680. return NULL;
  681. }
  682. if(fseek(pz->file, foff, SEEK_SET) == -1){
  683. fprintf(stderr, " ** ERROR: failed to read uncompress block size in the first block ERR(3) **\n");
  684. return NULL;
  685. }
  686. } else {
  687. pz->bufsize = bufsize;
  688. }
  689. } else if(pz->rw_mode == PGZF_MODE_R_GZ){
  690. pz->z = calloc(1, sizeof(z_stream));
  691. inflateInit2(pz->z, -15);
  692. pz->bufsize = bufsize;
  693. } else {
  694. pz->z = NULL;
  695. pz->bufsize = bufsize;
  696. }
  697. if(pz->bufsize == 0) pz->bufsize = PGZF_DEFAULT_BUFF_SIZE;
  698. pz->bufsize = (pz->bufsize + 0xFFFFFU) & 0xFFF00000U;
  699. for(i=0;i<pz->ncpu;i++){
  700. pz->dsts[i] = init_u1v(pz->bufsize);
  701. }
  702. if(pz->bufsize > pz->srcs[0]->size){
  703. encap_u1v(pz->srcs[0], pz->bufsize - pz->srcs[0]->size);
  704. }
  705. for(i=1;i<pz->ncpu;i++){
  706. pz->srcs[i] = init_u1v(pz->bufsize);
  707. }
  708. thread_beg_init(pgz, pz->ncpu);
  709. pgz->pz = pz;
  710. pgz->zsval = pgz->t_idx? 0 : zsval;
  711. pgz->zxval = pgz->t_idx? 0 : zxval;
  712. pgz->soff = pgz->t_idx? 0 : hoff;
  713. pgz->doff = 0;
  714. pgz->src = pz->srcs[pgz->t_idx];
  715. pgz->dst = pz->dsts[pgz->t_idx];
  716. pgz->level = Z_DEFAULT_COMPRESSION; // useless in inflating
  717. pgz->task = PGZF_TASK_INFLATE;
  718. thread_end_init(pgz);
  719. thread_wake_all(pgz);
  720. thread_export(pgz, pz);
  721. return pz;
  722. }
  723. /*
  724. static inline _clear_pgzf_reader(PGZF *pz){
  725. UNUSED(pz);
  726. }
  727. static inline off_t seek_pgzf(PGZF *pz, u8i offset){
  728. u4i bidx, boff, xidx, xoff;
  729. if(offset > pz->xsize) return -1;
  730. else if(offset == pz->xsize){
  731. pz->eof = 1;
  732. return offset;
  733. }
  734. if(!pz->seekable) return -1;
  735. bidx = offset / pz->bufsize;
  736. boff = offset % pz->>bufsize;
  737. xidx = bidx / PGZF_INDEX_BIN;
  738. xoff = bidx % PGZF_INDEX_BIN;
  739. if(xidx > pz->xoffs->size){
  740. }
  741. return 0;
  742. }
  743. */
  744. static inline size_t read_pgzf(PGZF *pz, void *dat, size_t len){
  745. size_t off;
  746. u4i nrun;
  747. thread_prepare(pgz);
  748. thread_import(pgz, pz);
  749. nrun = 0;
  750. for(off=0;off<len;){
  751. thread_beg_operate(pgz, pz->widx % pz->ncpu);
  752. thread_wait(pgz);
  753. if(pz->error) break;
  754. if(len - off < pgz->dst->size - pgz->doff){
  755. memcpy(dat + off, pgz->dst->buffer + pgz->doff, len - off);
  756. pz->tot_out += len - off;
  757. pgz->doff += len - off;
  758. off = len;
  759. break;
  760. } else if(pgz->dst->size){
  761. memcpy(dat + off, pgz->dst->buffer + pgz->doff, pgz->dst->size - pgz->doff);
  762. pz->tot_out += pgz->dst->size - pgz->doff;
  763. off += pgz->dst->size - pgz->doff;
  764. pgz->doff = pgz->dst->size;
  765. pgz->task = PGZF_TASK_INFLATE;
  766. thread_wake(pgz);
  767. pz->widx ++;
  768. } else if(pz->eof){
  769. nrun ++;
  770. if(nrun >= pz->ncpu){
  771. break;
  772. }
  773. }
  774. }
  775. return off;
  776. }
  777. static inline void close_pgzf(PGZF *pz){
  778. thread_prepare(pgz);
  779. if(pz->rw_mode == PGZF_MODE_W){
  780. _end_pgzf_writer(pz);
  781. }
  782. thread_import(pgz, pz);
  783. thread_beg_close(pgz);
  784. free_u1v(pgz->dst);
  785. free_u1v(pgz->src);
  786. thread_end_close(pgz);
  787. free(pz->dsts);
  788. free(pz->srcs);
  789. free_u1v(pz->tmp);
  790. switch(pz->rw_mode){
  791. case PGZF_MODE_W: write_index_pgzf(pz); fflush(pz->file); break;
  792. case PGZF_MODE_R: break;
  793. case PGZF_MODE_R_GZ:
  794. if(pz->z){
  795. inflateEnd(pz->z);
  796. free(pz->z);
  797. }
  798. break;
  799. }
  800. free_u8v(pz->boffs);
  801. free_u8v(pz->xoffs);
  802. free(pz);
  803. }
  804. static inline size_t read_pgzf4filereader(void *obj, void *dat, size_t len){ return read_pgzf((PGZF*)obj, dat, len); }
  805. static inline void close_pgzf4filereader(void *obj){
  806. PGZF *pz;
  807. pz = (PGZF*)obj;
  808. if(pz->file != stdin){
  809. fclose(pz->file);
  810. }
  811. return close_pgzf(pz);
  812. }
  813. static inline size_t write_pgzf4filewriter(void *obj, void *dat, size_t len){ return write_pgzf((PGZF*)obj, dat, len); }
  814. static inline void close_pgzf4filewriter(void *obj){
  815. PGZF *pz;
  816. pz = (PGZF*)obj;
  817. return close_pgzf(pz);
  818. }
  819. #endif