pdfHTML: Accessible PDF Creation

Tags: pdfHtmlXMLWorkerXML WorkeriText7HTML to PDFparsing HTML

A simple example showcasing the creation of a Tagged PDF with pdfHTML, and the configuration to include the metadata, necessary to obtain an Accessible PDF.

Files: 
/*
    This file is part of the iText (R) project.
    Copyright (c) 1998-2017 iText Group NV
    Authors: iText Software.
 
    For more information, please contact iText Software at this address:
    sales@itextpdf.com
 */
package com.itextpdf.samples.pdfHTML.AccessiblePDF;
 
import com.itextpdf.html2pdf.ConverterProperties;
import com.itextpdf.html2pdf.HtmlConverter;
import com.itextpdf.html2pdf.attach.impl.DefaultTagWorkerFactory;
import com.itextpdf.kernel.pdf.PdfDocument;
import com.itextpdf.kernel.pdf.PdfDocumentInfo;
import com.itextpdf.kernel.pdf.PdfName;
import com.itextpdf.kernel.pdf.PdfString;
import com.itextpdf.kernel.pdf.PdfViewerPreferences;
import com.itextpdf.kernel.pdf.PdfWriter;
import com.itextpdf.kernel.pdf.WriterProperties;
import com.itextpdf.layout.font.FontProvider;
import com.itextpdf.licensekey.LicenseKey;
import com.itextpdf.samples.pdfHTML.AccessiblePDF.HeaderTagging.AccessibilityTagWorkerFactory;
 
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
 
 
public class CreateAccessiblePDF {
 
    public static final String sourceFolder = "src/test/resources/pdfHTML/AccessiblePDF/";
    public static final String destinationFolder = "target/output/pdfHTML/AccessiblePDF/";
    public static final String[] files = {"Accessibility"};
    //License key path
    public static final String LICENSE = "src/test/resources/pdfHTML/itextkey-html2pdf_typography.xml";
 
 
    public static void main(String[] args) throws IOException, InterruptedException {
        LicenseKey.loadLicenseFile(LICENSE);
        for (String name : files) {
            String htmlSource = sourceFolder + name  + ".html";
            String resourceFolder = sourceFolder;
            String pdfDest = destinationFolder + name + ".pdf";
            File file = new File(pdfDest);
 
            System.out.println("Parsing: " + htmlSource);
            file.getParentFile().mkdirs();
 
            new CreateAccessiblePDF().createPdf(htmlSource,pdfDest,resourceFolder);
        }
    }
 
    public void createPdf(String src, String dest, String resources) throws IOException {
        try {
            FileOutputStream outputStream = new FileOutputStream(dest);
 
            WriterProperties writerProperties = new WriterProperties();
            //Add metadata
            writerProperties.addXmpMetadata();
 
            PdfWriter pdfWriter = new PdfWriter(outputStream, writerProperties);
 
            PdfDocument pdfDoc = new PdfDocument(pdfWriter);
            pdfDoc.getCatalog().setLang(new PdfString("en-US"));
            //Set the document to be tagged
            pdfDoc.setTagged();
            pdfDoc.getCatalog().setViewerPreferences(new PdfViewerPreferences().setDisplayDocTitle(true));
 
            //Set meta tags
            PdfDocumentInfo pdfMetaData = pdfDoc.getDocumentInfo();
            pdfMetaData.setAuthor("Samuel Huylebroeck");
            pdfMetaData.addCreationDate();
            pdfMetaData.getProducer();
            pdfMetaData.setCreator("iText Software");
            pdfMetaData.setKeywords("example, accessibility");
            pdfMetaData.setSubject("PDF accessibility");
            //Title is derived from html
 
            // pdf conversion
            ConverterProperties props = new ConverterProperties();
            FontProvider fp = new FontProvider();
            fp.addStandardPdfFonts();
            fp.addDirectory(resources);//The noto-nashk font file (.ttf extension) is placed in the resources
 
            props.setFontProvider(fp);
            props.setBaseUri(resources);
            //Setup custom tagworker factory for better tagging of headers
            DefaultTagWorkerFactory tagWorkerFactory = new AccessibilityTagWorkerFactory();
            props.setTagWorkerFactory(tagWorkerFactory);
 
            HtmlConverter.convertToPdf(new FileInputStream(src), pdfDoc, props);
            pdfDoc.close();
 
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
}
/*
    This file is part of the iText (R) project.
    Copyright (c) 1998-2017 iText Group NV
    Authors: iText Software.
 
    For more information, please contact iText Software at this address:
    sales@itextpdf.com
 */
package com.itextpdf.samples.pdfHTML.AccessiblePDF.HeaderTagging;
 
import com.itextpdf.html2pdf.attach.ITagWorker;
import com.itextpdf.html2pdf.attach.ProcessorContext;
import com.itextpdf.html2pdf.attach.impl.DefaultTagWorkerFactory;
import com.itextpdf.html2pdf.html.node.IElementNode;
 
public class AccessibilityTagWorkerFactory extends DefaultTagWorkerFactory {
 
    @Override
    public ITagWorker getCustomTagWorker(IElementNode tag, ProcessorContext context) {
        //This can probably replaced with a regex or string pattern
        if(tag.name().equals("h1")){
            return new HeaderTagWorker(tag, context,1);
        }
        if(tag.name().equals("h2")){
            return new HeaderTagWorker(tag, context,2);
        }
        if(tag.name().equals("h3")){
            return new HeaderTagWorker(tag, context,3);
        }
        if(tag.name().equals("h4")){
            return new HeaderTagWorker(tag, context,4);
        }
        if(tag.name().equals("h5")){
            return new HeaderTagWorker(tag, context,5);
        }
        if(tag.name().equals("h6")){
            return new HeaderTagWorker(tag, context,6);
        }
 
        if(tag.name().equals("th")){
            return new TableHeaderTagWorker(tag,context);
        }
 
        return null;
    }
}
/*
    This file is part of the iText (R) project.
    Copyright (c) 1998-2017 iText Group NV
    Authors: iText Software.
 
    For more information, please contact iText Software at this address:
    sales@itextpdf.com
 */
package com.itextpdf.samples.pdfHTML.AccessiblePDF.HeaderTagging;
 
import com.itextpdf.html2pdf.attach.ProcessorContext;
import com.itextpdf.html2pdf.attach.impl.tags.DivTagWorker;
import com.itextpdf.html2pdf.html.node.IElementNode;
import com.itextpdf.kernel.pdf.PdfName;
import com.itextpdf.layout.IPropertyContainer;
import com.itextpdf.layout.element.Div;
 
 
public class HeaderTagWorker extends DivTagWorker {
    private int i;
    public HeaderTagWorker(IElementNode element, ProcessorContext context, int i) {
        super(element, context);
        this.i = i;
    }
 
    @Override
    public IPropertyContainer getElementResult() {
        Div div =(Div) super.getElementResult();
        div.setRole(new PdfName("H"+i));
        return super.getElementResult();
    }
}
/*
    This file is part of the iText (R) project.
    Copyright (c) 1998-2017 iText Group NV
    Authors: iText Software.
 
    For more information, please contact iText Software at this address:
    sales@itextpdf.com
 */
package com.itextpdf.samples.pdfHTML.AccessiblePDF.HeaderTagging;
 
import com.itextpdf.html2pdf.attach.ProcessorContext;
import com.itextpdf.html2pdf.attach.impl.tags.TdTagWorker;
import com.itextpdf.html2pdf.html.node.IElementNode;
import com.itextpdf.kernel.pdf.PdfName;
import com.itextpdf.layout.IPropertyContainer;
import com.itextpdf.layout.element.Cell;
 
public class TableHeaderTagWorker extends TdTagWorker {
    public TableHeaderTagWorker(IElementNode element, ProcessorContext context) {
        super(element, context);
    }
 
    @Override
    public IPropertyContainer getElementResult() {
        Cell cell =(Cell) super.getElementResult();
        cell.setRole(PdfName.TH);
        return super.getElementResult();
    }
}
Resources: 
File nameRaw URLUpdated
Accessibility.htmlAccessibility.html2017-06-07 10:56 am
accessibility.cssaccessibility.css2017-06-07 10:56 am
dog.bmpdog.bmp2017-06-07 10:56 am
fox.bmpfox.bmp2017-06-07 10:56 am
Results: 
File nameRaw URLUpdated
Accessibility.pdfAccessibility.pdf2017-06-07 12:13 pm