HTML Images

Tags: XHTMLimagesXML WorkeriText 5

Some examples converting HTML to PDF involving images.

Files: 
/**
 * Example written by Bruno Lowagie in answer to the following question:
 * http://stackoverflow.com/questions/29194405/html-to-pdf-with-base64-image-throw-filenotfound
 */
package sandbox.xmlworker;
 
import com.itextpdf.text.BadElementException;
import com.itextpdf.text.Document;
import com.itextpdf.text.DocumentException;
import com.itextpdf.text.Image;
import com.itextpdf.text.pdf.PdfWriter;
import com.itextpdf.text.pdf.codec.Base64;
import com.itextpdf.tool.xml.XMLWorker;
import com.itextpdf.tool.xml.XMLWorkerHelper;
import com.itextpdf.tool.xml.html.Tags;
import com.itextpdf.tool.xml.parser.XMLParser;
import com.itextpdf.tool.xml.pipeline.css.CSSResolver;
import com.itextpdf.tool.xml.pipeline.css.CssResolverPipeline;
import com.itextpdf.tool.xml.pipeline.end.PdfWriterPipeline;
import com.itextpdf.tool.xml.pipeline.html.AbstractImageProvider;
import com.itextpdf.tool.xml.pipeline.html.HtmlPipeline;
import com.itextpdf.tool.xml.pipeline.html.HtmlPipelineContext;
 
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import sandbox.WrapToTest;
 
@WrapToTest
public class ParseHtml4 {
 
    class Base64ImageProvider extends AbstractImageProvider {
 
        @Override
        public Image retrieve(String src) {
            int pos = src.indexOf("base64,");
            try {
                if (src.startsWith("data") && pos > 0) {
                    byte[] img = Base64.decode(src.substring(pos + 7));
                    return Image.getInstance(img);
                }
                else {
                    return Image.getInstance(src);
                }
            } catch (BadElementException ex) {
                return null;
            } catch (IOException ex) {
                return null;
            }
        }
 
        @Override
        public String getImageRootPath() {
            return null;
        }
    }
 
    public static final String DEST = "results/xmlworker/html_4.pdf";
 
    public static void main(String[] args) throws IOException, DocumentException {
        File file = new File(DEST);
        file.getParentFile().mkdirs();
        new ParseHtml4().createPdf(DEST);
    }
 
    public void createPdf(String file) throws IOException, DocumentException {
        String str = "<html><head><title>Test PDF</title></head><body><div>" + 
            "<img src=\"\" />" +
            "</div><div>Hello world</div></body></html>";
 
 
                // step 1
        Document document = new Document();
        // step 2
        PdfWriter writer = PdfWriter.getInstance(document, new FileOutputStream(file));
        // step 3
        document.open();
        // step 4
 
        // CSS
        CSSResolver cssResolver =
                XMLWorkerHelper.getInstance().getDefaultCssResolver(true);
 
        // HTML
        HtmlPipelineContext htmlContext = new HtmlPipelineContext(null);
        htmlContext.setTagFactory(Tags.getHtmlTagProcessorFactory());
        htmlContext.setImageProvider(new Base64ImageProvider());
 
        // Pipelines
        PdfWriterPipeline pdf = new PdfWriterPipeline(document, writer);
        HtmlPipeline html = new HtmlPipeline(htmlContext, pdf);
        CssResolverPipeline css = new CssResolverPipeline(cssResolver, html);
 
        // XML Worker
        XMLWorker worker = new XMLWorker(css, true);
        XMLParser p = new XMLParser(worker);
        p.parse(new ByteArrayInputStream(str.getBytes()));
 
        // step 5
        document.close();
    }
}
/**
 * Example written by Bruno Lowagie in answer to the following question:
 * http://stackoverflow.com/questions/30206803/itext-html-to-pdf-for-non-english-content-images-are-very-small-in-pdf
 */
package sandbox.xmlworker;
 
import com.itextpdf.text.Document;
import com.itextpdf.text.DocumentException;
import com.itextpdf.text.pdf.PdfWriter;
import com.itextpdf.tool.xml.XMLWorkerHelper;
import sandbox.WrapToTest;
 
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.nio.charset.Charset;
 
/**
 *
 * @author iText
 */
@WrapToTest
public class ParseHtml6 {
 
    public static final String HTML = "resources/xml/brasil.html";
    public static final String DEST = "results/xmlworker/brasil.pdf";
 
    public static void main(String[] args) throws IOException, DocumentException {
        File file = new File(DEST);
        file.getParentFile().mkdirs();
        new ParseHtml6().createPdf(DEST);
    }
 
    /**
     * Creates a PDF with the words "Hello World"
     * @param file
     * @throws IOException
     * @throws DocumentException
     */
    public void createPdf(String file) throws IOException, DocumentException {
        // step 1
        Document document = new Document();
        // step 2
        PdfWriter writer = PdfWriter.getInstance(document, new FileOutputStream(file));
        // step 3
        document.open();
        // step 4
        XMLWorkerHelper.getInstance().parseXHtml(writer, document,
                new FileInputStream(HTML), Charset.forName("cp1252"));
        // step 5
        document.close();
    }
}
File nameRaw URLUpdated
ParseHtml4.javaParseHtml4.java2015-11-08 7:40 pm
ParseHtml6.javaParseHtml6.java2015-11-08 7:40 pm
Resources: 
<html>
<body>
<div><b>Brasil</b>, oficialmente República Federativa do Brasil, é o maior país da América do Sul
e da região da América Latina, sendo o quinto maior do mundo em área territorial (equivalente
a 47% do território sul-americano) e população (com mais de 202 milhões de habitantes).
É o único país na América onde se fala majoritariamente a língua portuguesa e o maior país lusófono
do planeta, além de ser uma das nações mais multiculturais e etnicamente diversas,
em decorrência da forte imigração oriunda de variados cantos do mundo.</div>
<img src="resources/images/brasil.png" />
</body>
</html>
File nameRaw URLUpdated
brasil.htmlbrasil.html2015-11-08 7:42 pm
brasil.pngbrasil.png2015-11-08 7:44 pm
Results: 
File nameRaw URLUpdated
cmp_html_4.pdfcmp_html_4.pdf2015-11-08 7:44 pm
cmp_brasil.pdfcmp_brasil.pdf2015-11-08 7:44 pm