summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTor Andersson <tor.andersson@artifex.com>2020-10-14 11:41:23 +0200
committerTor Andersson <tor.andersson@artifex.com>2020-11-06 16:48:18 +0100
commit0f7fd8a26b576e80f626dcd76103eabd1ebc7c83 (patch)
tree62eee5ed26f45e5dc14f637db8b1197bf1fd0f71
parent6e31eb560d0a4b66f5567ec1a83aae2738c9cc3a (diff)
Add JBIG2 support to pdf_add_image.
We need to parse the segment headers in order to rewrite the JBIG2 data to convert it to a PDF compatible JBIG2Decode stream. 1) Strip the file header (if present). 2) Copy segments from 'globals' symbol table (if present). 3) Convert random access format to sequential access (if necessary). 4) Delete end-of-page and end-of-file segments. 5) Delete all segments related to other pages than the first. TODO: Copy a given page from a multi-page JBIG2 stream. We currently only support creating a PDF image from the first page of a JBIG2 file.
-rw-r--r--include/mupdf/fitz/filter.h5
-rw-r--r--source/fitz/filter-jbig2.c10
-rw-r--r--source/pdf/pdf-image.c202
3 files changed, 216 insertions, 1 deletions
diff --git a/include/mupdf/fitz/filter.h b/include/mupdf/fitz/filter.h
index a55514ff4..f93e0b920 100644
--- a/include/mupdf/fitz/filter.h
+++ b/include/mupdf/fitz/filter.h
@@ -192,6 +192,11 @@ void fz_drop_jbig2_globals(fz_context *ctx, fz_jbig2_globals *globals);
*/
void fz_drop_jbig2_globals_imp(fz_context *ctx, fz_storable *globals);
+/**
+ Return buffer containing jbig2 globals data stream.
+*/
+fz_buffer * fz_jbig2_globals_data(fz_context *ctx, fz_jbig2_globals *globals);
+
/* Extra filters for tiff */
/**
diff --git a/source/fitz/filter-jbig2.c b/source/fitz/filter-jbig2.c
index 7af2e9bb1..95d8a5100 100644
--- a/source/fitz/filter-jbig2.c
+++ b/source/fitz/filter-jbig2.c
@@ -13,6 +13,7 @@ struct fz_jbig2_globals
fz_storable storable;
Jbig2GlobalCtx *gctx;
fz_jbig2_allocators alloc;
+ fz_buffer *data;
};
typedef struct
@@ -166,6 +167,8 @@ fz_load_jbig2_globals(fz_context *ctx, fz_buffer *buf)
FZ_INIT_STORABLE(globals, 1, fz_drop_jbig2_globals_imp);
globals->gctx = jbig2_make_global_ctx(jctx);
+ globals->data = fz_keep_buffer(ctx, buf);
+
return globals;
}
@@ -175,6 +178,7 @@ fz_drop_jbig2_globals_imp(fz_context *ctx, fz_storable *globals_)
fz_jbig2_globals *globals = (fz_jbig2_globals *)globals_;
globals->alloc.ctx = ctx;
jbig2_global_ctx_free(globals->gctx);
+ fz_drop_buffer(ctx, globals->data);
fz_free(ctx, globals);
}
@@ -211,3 +215,9 @@ fz_open_jbig2d(fz_context *ctx, fz_stream *chain, fz_jbig2_globals *globals, int
return fz_new_stream(ctx, state, next_jbig2d, close_jbig2d);
}
+
+fz_buffer *
+fz_jbig2_globals_data(fz_context *ctx, fz_jbig2_globals *globals)
+{
+ return globals ? globals->data : NULL;
+}
diff --git a/source/pdf/pdf-image.c b/source/pdf/pdf-image.c
index f90925f47..8e3d36ba7 100644
--- a/source/pdf/pdf-image.c
+++ b/source/pdf/pdf-image.c
@@ -298,6 +298,196 @@ pdf_load_image(fz_context *ctx, pdf_document *doc, pdf_obj *dict)
return image;
}
+struct jbig2_segment_header {
+ int number;
+ int flags;
+ /* referred-to-segment numbers */
+ int page;
+ int length;
+};
+
+static uint32_t getu32(const unsigned char *data)
+{
+ return ((uint32_t)data[0]<<24) | ((uint32_t)data[1]<<16) | ((uint32_t)data[2]<<8) | (uint32_t)data[3];
+}
+
+static size_t
+pdf_parse_jbig2_segment_header(fz_context *ctx,
+ const unsigned char *data, const unsigned char *end,
+ struct jbig2_segment_header *info)
+{
+ uint32_t rts;
+ size_t n = 5;
+
+ if (data + 11 > end) return 0;
+
+ info->number = getu32(data);
+ info->flags = data[4];
+
+ rts = data[5] >> 5;
+ if (rts == 7)
+ {
+ rts = getu32(data+5) & 0x1FFFFFFF;
+ n += 4 + (rts + 1) / 8;
+ }
+ else
+ {
+ n += 1;
+ }
+
+ if (info->number <= 256)
+ n += rts;
+ else if (info->number <= 65536)
+ n += rts * 2;
+ else
+ n += rts * 4;
+
+ if (info->flags & 0x40)
+ {
+ if (data + n + 4 > end) return 0;
+ info->page = getu32(data+n);
+ n += 4;
+ }
+ else
+ {
+ if (data + n + 1 > end) return 0;
+ info->page = data[n];
+ n += 1;
+ }
+
+ if (data + n + 4 > end) return 0;
+ info->length = getu32(data+n);
+ return n + 4;
+}
+
+static void
+pdf_copy_jbig2_segments(fz_context *ctx, fz_buffer *output, const unsigned char *data, size_t size, int page)
+{
+ struct jbig2_segment_header info;
+ const unsigned char *end = data + size;
+ size_t n;
+ int type;
+
+ while (data < end)
+ {
+ n = pdf_parse_jbig2_segment_header(ctx, data, end, &info);
+ if (n == 0)
+ fz_throw(ctx, FZ_ERROR_GENERIC, "truncated jbig2 segment header");
+
+ /* omit end of page, end of file, and segments for other pages */
+ type = (info.flags & 63);
+ if (type == 49 || type == 51 || (info.page > 0 && info.page != page))
+ {
+ data += n;
+ data += info.length;
+ }
+ else
+ {
+ fz_append_data(ctx, output, data, n);
+ data += n;
+ if (data + info.length > end)
+ fz_throw(ctx, FZ_ERROR_GENERIC, "truncated jbig2 segment data");
+ fz_append_data(ctx, output, data, info.length);
+ data += info.length;
+ }
+ }
+}
+
+static void
+pdf_copy_jbig2_random_segments(fz_context *ctx, fz_buffer *output, const unsigned char *data, size_t size, int page)
+{
+ struct jbig2_segment_header info;
+ const unsigned char *start = data;
+ const unsigned char *end = data + size;
+ size_t n;
+ int type;
+
+ /* Skip headers until end-of-file segment is found. */
+ while (data < end)
+ {
+ n = pdf_parse_jbig2_segment_header(ctx, data, end, &info);
+ if (n == 0)
+ fz_throw(ctx, FZ_ERROR_GENERIC, "truncated jbig2 segment header");
+ data += n;
+ if ((info.flags & 63) == 51)
+ break;
+ }
+ if (data >= end)
+ fz_throw(ctx, FZ_ERROR_GENERIC, "truncated jbig2 segment header");
+
+ /* Copy segment headers and segment data */
+ while (data < end)
+ {
+ n = pdf_parse_jbig2_segment_header(ctx, start, end, &info);
+
+ /* omit end of page, end of file, and segments for other pages */
+ type = (info.flags & 63);
+ if (type == 49 || type == 51 || (info.page > 0 && info.page != page))
+ {
+ start += n;
+ data += info.length;
+ }
+ else
+ {
+ fz_append_data(ctx, output, start, n);
+ start += n;
+ if (data + info.length > end)
+ fz_throw(ctx, FZ_ERROR_GENERIC, "truncated jbig2 segment data");
+ fz_append_data(ctx, output, data, info.length);
+ data += info.length;
+ }
+ }
+}
+
+static fz_buffer *
+pdf_jbig2_stream_from_file(fz_context *ctx, fz_buffer *input, fz_jbig2_globals *globals_, int embedded, int page)
+{
+ fz_buffer *globals = fz_jbig2_globals_data(ctx, globals_);
+ size_t globals_size = globals ? globals->len : 0;
+ fz_buffer *output;
+ int flags;
+ size_t header = 9;
+
+ if (globals_size == 0 && embedded)
+ return fz_keep_buffer(ctx, input);
+
+ if (!embedded)
+ {
+ if (input->len < 9)
+ return NULL; /* not enough data! */
+ flags = input->data[8];
+ if ((flags & 2) == 0)
+ {
+ if (input->len < 13)
+ return NULL; /* not enough data! */
+ header = 13;
+ }
+ }
+
+ output = fz_new_buffer(ctx, input->len + globals_size);
+ fz_try(ctx)
+ {
+ if (globals_size > 0)
+ fz_append_buffer(ctx, output, globals);
+ if (embedded)
+ fz_append_buffer(ctx, output, input);
+ else
+ {
+ if ((flags & 1) == 0)
+ pdf_copy_jbig2_random_segments(ctx, output, input->data + header, input->len - header, page);
+ else
+ pdf_copy_jbig2_segments(ctx, output, input->data + header, input->len - header, page);
+ }
+ }
+ fz_catch(ctx)
+ {
+ fz_drop_buffer(ctx, output);
+ fz_rethrow(ctx);
+ }
+
+ return output;
+}
+
pdf_obj *
pdf_add_image(fz_context *ctx, pdf_document *doc, fz_image *image)
{
@@ -341,6 +531,15 @@ pdf_add_image(fz_context *ctx, pdf_document *doc, fz_image *image)
pdf_dict_put_int(ctx, dp, PDF_NAME(SMaskInData), cp->u.jpx.smask_in_data);
pdf_dict_put(ctx, imobj, PDF_NAME(Filter), PDF_NAME(JPXDecode));
break;
+ case FZ_IMAGE_JBIG2:
+ buffer = pdf_jbig2_stream_from_file(ctx, cbuffer->buffer,
+ cp->u.jbig2.globals,
+ cp->u.jbig2.embedded,
+ 1);
+ if (!buffer)
+ goto unknown_compression;
+ pdf_dict_put(ctx, imobj, PDF_NAME(Filter), PDF_NAME(JBIG2Decode));
+ break;
case FZ_IMAGE_FAX:
if (cp->u.fax.columns)
pdf_dict_put_int(ctx, dp, PDF_NAME(Columns), cp->u.fax.columns);
@@ -396,7 +595,8 @@ pdf_add_image(fz_context *ctx, pdf_document *doc, fz_image *image)
pdf_dict_put_int(ctx, imobj, PDF_NAME(Width), image->w);
pdf_dict_put_int(ctx, imobj, PDF_NAME(Height), image->h);
- buffer = fz_keep_buffer(ctx, cbuffer->buffer);
+ if (!buffer)
+ buffer = fz_keep_buffer(ctx, cbuffer->buffer);
if (image->use_decode)
{