Extracting objects from a PDF

Tags: inspect PDFstream objectrich mediaextract contentiText 7

Example written in answer to the question How to extract embedded streams?

    This file is part of the iText (R) project.
    Copyright (c) 1998-2016 iText Group NV
 * Example written by Bruno Lowagie in answer to the following question:
 * http://stackoverflow.com/questions/30286601/extracting-an-embedded-object-from-a-pdf
package com.itextpdf.samples.sandbox.parse;
import com.itextpdf.kernel.PdfException;
import com.itextpdf.kernel.pdf.PdfDocument;
import com.itextpdf.kernel.pdf.PdfObject;
import com.itextpdf.kernel.pdf.PdfReader;
import com.itextpdf.kernel.pdf.PdfStream;
import com.itextpdf.test.annotations.type.SampleTest;
import org.junit.Assert;
import org.junit.BeforeClass;
import org.junit.Test;
import org.junit.experimental.categories.Category;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
public class ExtractStreams {
    public static final String DEST = "./target/test/resources/sandbox/parse/extract_streams%s";
    public static final String SRC = "./src/test/resources/pdfs/image.pdf";
    public static void before() {
        new File(DEST).getParentFile().mkdirs();
    public static void main(String[] args) throws IOException {
        new ExtractStreams().manipulatePdf();
    public void manipulatePdf() throws IOException {
        PdfDocument pdfDoc = new PdfDocument(new PdfReader(SRC));
        PdfObject obj;
        List<Integer> streamLengths = new ArrayList<>();
        for (int i = 1; i <= pdfDoc.getNumberOfPdfObjects(); i++) {
            obj = pdfDoc.getPdfObject(i);
            if (obj != null && obj.isStream()) {
                byte[] b;
                try {
                    b = ((PdfStream) obj).getBytes();
                } catch (PdfException exc) {
                    b = ((PdfStream)obj).getBytes(false);
                FileOutputStream fos = new FileOutputStream(String.format(DEST, i));
        Assert.assertArrayEquals(new Integer[] {30965, 74}, streamLengths.toArray(new Integer[streamLengths.size()]));
File nameRaw URLUpdated
ExtractStreams.javaExtractStreams.java2016-08-09 10:18 am
File nameRaw URLUpdated
image.pdfimage.pdf2016-08-08 1:24 pm