Blog Archive

Convert HTML to PDF using iText XMLWorker

iText is a free and open source library for creating and manipulating PDF files in Java. XML Worker is an add-on for iText. It allows developers to convert XML files to PDF documents in a programmer-friendly way. In this example we are going to load the contents from a web page using the URL and then convert that to a PDF. You can also convert HTML files residing in your file system to PDF. XML Worker uses HTML TagProcessors in the HtmlPipeline to convert HTML to PDF.


The default configuration uses the following settings:
  • XML Worker will look for CSS styles in the head tag (external or internal), and for styles in individual tags (e.g. style="margin:15px"). If no styles are defined, the default CSS of Firefox 4 is applied.
  • XML Worker will automatically create bookmarks for header tags h1 to h6
  • XML Worker will only add pictures to the document if they are defined using a fully qualified URL.
Add the following jars to the Project classpath
Convert HTML to PDF using iText XMLWorker

Source Code

package com.as400samplecode;

import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.URL;

import com.itextpdf.text.Document;
import com.itextpdf.text.DocumentException;
import com.itextpdf.text.PageSize;
import com.itextpdf.text.pdf.PdfWriter;
import com.itextpdf.tool.xml.XMLWorkerHelper;

public class ConvertHTMLToPDF {

 public static void main(String[] args) {

  String pdfFilename = "";
  ConvertHTMLToPDF convertHTMLToPDF = new ConvertHTMLToPDF();
  if (args.length < 1)
  {
   System.err.println("Usage: java "+ convertHTMLToPDF.getClass().getName()+
   " PDF_Filename");
   System.exit(1);
  }

  pdfFilename = args[0].trim();
  convertHTMLToPDF.createPDF(pdfFilename);

 }

 private void createPDF (String pdfFilename){

  //path for the PDF file to be generated
  String path = "docs/" + pdfFilename;
  PdfWriter pdfWriter = null;
  
  //create a new document
  Document document = new Document();
  
  try {

   //get Instance of the PDFWriter
   pdfWriter = PdfWriter.getInstance(document, new FileOutputStream(path));
   
   //document header attributes
   document.addAuthor("betterThanZero");
   document.addCreationDate();
   document.addProducer();
   document.addCreator("MySampleCode.com");
   document.addTitle("Demo for iText XMLWorker");
   document.setPageSize(PageSize.LETTER);

   //open document
   document.open();
   
   //To convert a HTML file from the filesystem
   //String File_To_Convert = "docs/SamplePDF.html";
   //FileInputStream fis = new FileInputStream(File_To_Convert);

   //URL for HTML page
   URL myWebPage = new URL("http://demo.mysamplecode.com/");
   InputStreamReader fis = new InputStreamReader(myWebPage.openStream());

   //get the XMLWorkerHelper Instance
   XMLWorkerHelper worker = XMLWorkerHelper.getInstance();
   //convert to PDF
   worker.parseXHtml(pdfWriter, document, fis);
   
   //close the document
   document.close();
   //close the writer
   pdfWriter.close();

  }   

  catch (FileNotFoundException e) {
   e.printStackTrace();
  } catch (IOException e) {
   e.printStackTrace();
  } catch (DocumentException e) {
   e.printStackTrace();
  }       

 }

}

References