Thursday, January 16, 2014

PDF to TIFF in Java

Recently I came across some code to convert PDF to Tiff.  I restructured it into a nice clean utility class and provided some overloads for the main covert method.
Requires:

  • pdfbox-app-1.8.3.jar
  • jai_imageio-1.1.jar
  • log4j-1.2.16.jar
import java.awt.image.BufferedImage;
import java.io.File;
import java.io.IOException;
import java.util.List;

import javax.imageio.IIOImage;
import javax.imageio.ImageIO;
import javax.imageio.ImageTypeSpecifier;
import javax.imageio.ImageWriteParam;
import javax.imageio.ImageWriter;
import javax.imageio.metadata.IIOInvalidTreeException;
import javax.imageio.metadata.IIOMetadata;
import javax.imageio.stream.ImageOutputStream;

import org.apache.log4j.Logger;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;

import com.sun.media.imageio.plugins.tiff.BaselineTIFFTagSet;
import com.sun.media.imageio.plugins.tiff.TIFFDirectory;
import com.sun.media.imageio.plugins.tiff.TIFFField;
import com.sun.media.imageio.plugins.tiff.TIFFTag;
import com.sun.media.imageioimpl.plugins.tiff.TIFFImageWriterSpi;

public class PDFToTiff
{
   private static final Logger log = Logger.getLogger(PDFToTiff.class);
   
   public static void main(String[] args) throws IOException
   {
   
      try
      {
         //usage
         new PDFToTiff().convert("data/sample.pdf", null  , null );
         new PDFToTiff().convert("data/sample.pdf", "out/"    );
         new PDFToTiff().convert("data/sample.pdf", "out/" , "TestPrefix"   );
    
       
      }
      catch(Exception e)
      {
         // TODO Auto-generated catch block
         e.printStackTrace();
      }
   }
  
   /***
    * Converts pdfFile to Tiff images and places them in the same folder.
    * Tiff files will use same name of the input pdffile and a suffix of the page number
    * @param pdfFilePath
    * @throws Exception
    */
   public void convert(String pdfFilePath) throws Exception
   {
      convert(pdfFilePath,null,null);
   }
  
   /**
    * Converts pdfFile to Tiff images and places them in the output folder or the same folder if outFolder is null.
    * Tiff files will use same name of the input pdffile name and a suffix of the page number
    * @param pdfFilePath
    * @param outFolder
    * @throws Exception
    */
   public void convert(String pdfFilePath, String outFolder) throws Exception
   {
      convert(pdfFilePath,outFolder,null);
   }
   /***
    * Converts pdfFile to Tiff images and places them in the output folder or the same folder if outFolder is null.
    * Tiff file names will use outFilePrefix + page number or input pdffile name + page number if outFilePrefix is null
    * @param pdfFile
    * @param outFolder if null uses same location as pdfFile
    * @param outFilePrefix if null uses the name of the pdfFile
    * @throws Exception
    */
   public void convert(String pdfFilePath, String outFolder, String outFilePrefix) throws Exception
   {
      File pdfFile = new File(pdfFilePath);
      File outputFolder = outFolder==null? pdfFile.getParentFile() : new File (outFolder);
      if (!pdfFile.exists())
         throw new Exception("File {"+pdfFilePath+"} not found!");
   
      if (!outputFolder.exists())
      {
         log.debug("Creating output Folder " + outputFolder.getAbsolutePath());
         if (!outputFolder.mkdirs())
            throw new Exception("Could not created folder : " + outFolder);
      }
     
      if (outFilePrefix==null)
      {
         outFilePrefix = pdfFile.getName();
         //exclude the extension;
         if (outFilePrefix.endsWith(".pdf"))
            outFilePrefix = outFilePrefix.substring(0,outFilePrefix.lastIndexOf("."));
      }
      log.debug("Converting " + pdfFile.getName() + " to Tiff in " + outputFolder.getAbsolutePath() + " using prefix: " + outFilePrefix  );
     
      PDDocument document = null;
      int resolution = 300;
      try {
         // Set main options
         int imageType = BufferedImage.TYPE_BYTE_BINARY;
    
         int maxPage = Integer.MAX_VALUE;
     
         // Create TIFF writer
         ImageWriter writer = null;
         ImageWriteParam writerParams = null;
         try {
             TIFFImageWriterSpi tiffspi = new TIFFImageWriterSpi();
             writer = tiffspi.createWriterInstance();
             writerParams = writer.getDefaultWriteParam();
             writerParams.setCompressionMode(ImageWriteParam.MODE_EXPLICIT);
             writerParams.setCompressionType("CCITT T.6");
             writerParams.setCompressionQuality(1.0f);
         } catch (Exception ex) {
             throw new Exception("Could not load the TIFF writer", ex);
         }
     
         // Create metadata for Java IO ImageWriter
         IIOMetadata metadata = createMetadata(writer, writerParams, resolution);
     
         // Load the pdf file
         document = PDDocument.load(pdfFile);
         @SuppressWarnings("unchecked")
         List pages = (List) document.getDocumentCatalog().getAllPages();
         // Loop over the pages
         for (int i = 0; i < pages.size() && i <= maxPage; i++) {
             // Write page to image file
             File outputFile = null;
             ImageOutputStream ios = null;
             try {
                 outputFile = new File(outputFolder.getAbsolutePath()  + File.separator  +  outFilePrefix  + "_" + (i+1) + ".tiff");
                 ios = ImageIO.createImageOutputStream(outputFile);
                 writer.setOutput(ios);
                 writer.write(null, new IIOImage(pages.get(i).convertToImage(imageType, resolution), null, metadata), writerParams);
             } catch (Exception ex) {
                 throw new Exception("Could not write the TIFF file", ex);
             } finally {
                 ios.close();
             }
     
             // do something with TIF file
         }
         log.debug("Successfully converted " + pages.size() + " to Tiff.");
     } catch (Exception ex) {
         throw new Exception("Error while converting PDF to TIFF", ex);
     } finally {
         try {
             if (document != null) {
                 document.close();
             }
         } catch (Exception ex) {
             log.error("Error while closing PDF document", ex);
         }
     }
   }
  
  
   private IIOMetadata createMetadata(ImageWriter writer, ImageWriteParam writerParams, int resolution) throws IIOInvalidTreeException {
      // Get default metadata from writer
      ImageTypeSpecifier type = writerParams.getDestinationType();
      IIOMetadata meta = writer.getDefaultImageMetadata(type, writerParams);
  
      // Convert default metadata to TIFF metadata
      TIFFDirectory dir = TIFFDirectory.createFromMetadata(meta);
  
      // Get {X,Y} resolution tags
      BaselineTIFFTagSet base = BaselineTIFFTagSet.getInstance();
      TIFFTag tagXRes = base.getTag(BaselineTIFFTagSet.TAG_X_RESOLUTION);
      TIFFTag tagYRes = base.getTag(BaselineTIFFTagSet.TAG_Y_RESOLUTION);
  
      // Create {X,Y} resolution fields
      TIFFField fieldXRes = new TIFFField(tagXRes, TIFFTag.TIFF_RATIONAL, 1, new long[][] { { resolution, 1 } });
      TIFFField fieldYRes = new TIFFField(tagYRes, TIFFTag.TIFF_RATIONAL, 1, new long[][] { { resolution, 1 } });
  
      // Add {X,Y} resolution fields to TIFFDirectory
      dir.addTIFFField(fieldXRes);
      dir.addTIFFField(fieldYRes);
  
      // Return TIFF metadata so it can be picked up by the IIOImage
      return dir.getAsMetadata();
  }
  
  
}


No comments:

Post a Comment