diff options
author | Tor Andersson <tor.andersson@artifex.com> | 2020-10-14 11:41:23 +0200 |
---|---|---|
committer | Tor Andersson <tor.andersson@artifex.com> | 2020-11-06 16:48:18 +0100 |
commit | 0f7fd8a26b576e80f626dcd76103eabd1ebc7c83 (patch) | |
tree | 62eee5ed26f45e5dc14f637db8b1197bf1fd0f71 | |
parent | 6e31eb560d0a4b66f5567ec1a83aae2738c9cc3a (diff) |
Add JBIG2 support to pdf_add_image.
We need to parse the segment headers in order to rewrite the JBIG2 data
to convert it to a PDF compatible JBIG2Decode stream.
1) Strip the file header (if present).
2) Copy segments from 'globals' symbol table (if present).
3) Convert random access format to sequential access (if necessary).
4) Delete end-of-page and end-of-file segments.
5) Delete all segments related to other pages than the first.
TODO: Copy a given page from a multi-page JBIG2 stream. We currently only
support creating a PDF image from the first page of a JBIG2 file.
-rw-r--r-- | include/mupdf/fitz/filter.h | 5 | ||||
-rw-r--r-- | source/fitz/filter-jbig2.c | 10 | ||||
-rw-r--r-- | source/pdf/pdf-image.c | 202 |
3 files changed, 216 insertions, 1 deletions
diff --git a/include/mupdf/fitz/filter.h b/include/mupdf/fitz/filter.h index a55514ff4..f93e0b920 100644 --- a/include/mupdf/fitz/filter.h +++ b/include/mupdf/fitz/filter.h @@ -192,6 +192,11 @@ void fz_drop_jbig2_globals(fz_context *ctx, fz_jbig2_globals *globals); */ void fz_drop_jbig2_globals_imp(fz_context *ctx, fz_storable *globals); +/** + Return buffer containing jbig2 globals data stream. +*/ +fz_buffer * fz_jbig2_globals_data(fz_context *ctx, fz_jbig2_globals *globals); + /* Extra filters for tiff */ /** diff --git a/source/fitz/filter-jbig2.c b/source/fitz/filter-jbig2.c index 7af2e9bb1..95d8a5100 100644 --- a/source/fitz/filter-jbig2.c +++ b/source/fitz/filter-jbig2.c @@ -13,6 +13,7 @@ struct fz_jbig2_globals fz_storable storable; Jbig2GlobalCtx *gctx; fz_jbig2_allocators alloc; + fz_buffer *data; }; typedef struct @@ -166,6 +167,8 @@ fz_load_jbig2_globals(fz_context *ctx, fz_buffer *buf) FZ_INIT_STORABLE(globals, 1, fz_drop_jbig2_globals_imp); globals->gctx = jbig2_make_global_ctx(jctx); + globals->data = fz_keep_buffer(ctx, buf); + return globals; } @@ -175,6 +178,7 @@ fz_drop_jbig2_globals_imp(fz_context *ctx, fz_storable *globals_) fz_jbig2_globals *globals = (fz_jbig2_globals *)globals_; globals->alloc.ctx = ctx; jbig2_global_ctx_free(globals->gctx); + fz_drop_buffer(ctx, globals->data); fz_free(ctx, globals); } @@ -211,3 +215,9 @@ fz_open_jbig2d(fz_context *ctx, fz_stream *chain, fz_jbig2_globals *globals, int return fz_new_stream(ctx, state, next_jbig2d, close_jbig2d); } + +fz_buffer * +fz_jbig2_globals_data(fz_context *ctx, fz_jbig2_globals *globals) +{ + return globals ? globals->data : NULL; +} diff --git a/source/pdf/pdf-image.c b/source/pdf/pdf-image.c index f90925f47..8e3d36ba7 100644 --- a/source/pdf/pdf-image.c +++ b/source/pdf/pdf-image.c @@ -298,6 +298,196 @@ pdf_load_image(fz_context *ctx, pdf_document *doc, pdf_obj *dict) return image; } +struct jbig2_segment_header { + int number; + int flags; + /* referred-to-segment numbers */ + int page; + int length; +}; + +static uint32_t getu32(const unsigned char *data) +{ + return ((uint32_t)data[0]<<24) | ((uint32_t)data[1]<<16) | ((uint32_t)data[2]<<8) | (uint32_t)data[3]; +} + +static size_t +pdf_parse_jbig2_segment_header(fz_context *ctx, + const unsigned char *data, const unsigned char *end, + struct jbig2_segment_header *info) +{ + uint32_t rts; + size_t n = 5; + + if (data + 11 > end) return 0; + + info->number = getu32(data); + info->flags = data[4]; + + rts = data[5] >> 5; + if (rts == 7) + { + rts = getu32(data+5) & 0x1FFFFFFF; + n += 4 + (rts + 1) / 8; + } + else + { + n += 1; + } + + if (info->number <= 256) + n += rts; + else if (info->number <= 65536) + n += rts * 2; + else + n += rts * 4; + + if (info->flags & 0x40) + { + if (data + n + 4 > end) return 0; + info->page = getu32(data+n); + n += 4; + } + else + { + if (data + n + 1 > end) return 0; + info->page = data[n]; + n += 1; + } + + if (data + n + 4 > end) return 0; + info->length = getu32(data+n); + return n + 4; +} + +static void +pdf_copy_jbig2_segments(fz_context *ctx, fz_buffer *output, const unsigned char *data, size_t size, int page) +{ + struct jbig2_segment_header info; + const unsigned char *end = data + size; + size_t n; + int type; + + while (data < end) + { + n = pdf_parse_jbig2_segment_header(ctx, data, end, &info); + if (n == 0) + fz_throw(ctx, FZ_ERROR_GENERIC, "truncated jbig2 segment header"); + + /* omit end of page, end of file, and segments for other pages */ + type = (info.flags & 63); + if (type == 49 || type == 51 || (info.page > 0 && info.page != page)) + { + data += n; + data += info.length; + } + else + { + fz_append_data(ctx, output, data, n); + data += n; + if (data + info.length > end) + fz_throw(ctx, FZ_ERROR_GENERIC, "truncated jbig2 segment data"); + fz_append_data(ctx, output, data, info.length); + data += info.length; + } + } +} + +static void +pdf_copy_jbig2_random_segments(fz_context *ctx, fz_buffer *output, const unsigned char *data, size_t size, int page) +{ + struct jbig2_segment_header info; + const unsigned char *start = data; + const unsigned char *end = data + size; + size_t n; + int type; + + /* Skip headers until end-of-file segment is found. */ + while (data < end) + { + n = pdf_parse_jbig2_segment_header(ctx, data, end, &info); + if (n == 0) + fz_throw(ctx, FZ_ERROR_GENERIC, "truncated jbig2 segment header"); + data += n; + if ((info.flags & 63) == 51) + break; + } + if (data >= end) + fz_throw(ctx, FZ_ERROR_GENERIC, "truncated jbig2 segment header"); + + /* Copy segment headers and segment data */ + while (data < end) + { + n = pdf_parse_jbig2_segment_header(ctx, start, end, &info); + + /* omit end of page, end of file, and segments for other pages */ + type = (info.flags & 63); + if (type == 49 || type == 51 || (info.page > 0 && info.page != page)) + { + start += n; + data += info.length; + } + else + { + fz_append_data(ctx, output, start, n); + start += n; + if (data + info.length > end) + fz_throw(ctx, FZ_ERROR_GENERIC, "truncated jbig2 segment data"); + fz_append_data(ctx, output, data, info.length); + data += info.length; + } + } +} + +static fz_buffer * +pdf_jbig2_stream_from_file(fz_context *ctx, fz_buffer *input, fz_jbig2_globals *globals_, int embedded, int page) +{ + fz_buffer *globals = fz_jbig2_globals_data(ctx, globals_); + size_t globals_size = globals ? globals->len : 0; + fz_buffer *output; + int flags; + size_t header = 9; + + if (globals_size == 0 && embedded) + return fz_keep_buffer(ctx, input); + + if (!embedded) + { + if (input->len < 9) + return NULL; /* not enough data! */ + flags = input->data[8]; + if ((flags & 2) == 0) + { + if (input->len < 13) + return NULL; /* not enough data! */ + header = 13; + } + } + + output = fz_new_buffer(ctx, input->len + globals_size); + fz_try(ctx) + { + if (globals_size > 0) + fz_append_buffer(ctx, output, globals); + if (embedded) + fz_append_buffer(ctx, output, input); + else + { + if ((flags & 1) == 0) + pdf_copy_jbig2_random_segments(ctx, output, input->data + header, input->len - header, page); + else + pdf_copy_jbig2_segments(ctx, output, input->data + header, input->len - header, page); + } + } + fz_catch(ctx) + { + fz_drop_buffer(ctx, output); + fz_rethrow(ctx); + } + + return output; +} + pdf_obj * pdf_add_image(fz_context *ctx, pdf_document *doc, fz_image *image) { @@ -341,6 +531,15 @@ pdf_add_image(fz_context *ctx, pdf_document *doc, fz_image *image) pdf_dict_put_int(ctx, dp, PDF_NAME(SMaskInData), cp->u.jpx.smask_in_data); pdf_dict_put(ctx, imobj, PDF_NAME(Filter), PDF_NAME(JPXDecode)); break; + case FZ_IMAGE_JBIG2: + buffer = pdf_jbig2_stream_from_file(ctx, cbuffer->buffer, + cp->u.jbig2.globals, + cp->u.jbig2.embedded, + 1); + if (!buffer) + goto unknown_compression; + pdf_dict_put(ctx, imobj, PDF_NAME(Filter), PDF_NAME(JBIG2Decode)); + break; case FZ_IMAGE_FAX: if (cp->u.fax.columns) pdf_dict_put_int(ctx, dp, PDF_NAME(Columns), cp->u.fax.columns); @@ -396,7 +595,8 @@ pdf_add_image(fz_context *ctx, pdf_document *doc, fz_image *image) pdf_dict_put_int(ctx, imobj, PDF_NAME(Width), image->w); pdf_dict_put_int(ctx, imobj, PDF_NAME(Height), image->h); - buffer = fz_keep_buffer(ctx, cbuffer->buffer); + if (!buffer) + buffer = fz_keep_buffer(ctx, cbuffer->buffer); if (image->use_decode) { |