java word to pdf converter
Posted: Mon Feb 05, 2018 6:37 pm
I am using docx4j to convert word to pdf my word doc contains arabic and english characters in output pdf arabic characters are messed up
why?
i have attached the word document and pdf document text
While loading some other word files(arabic + english + chinese) these exceptions are thrown
Exceptions:
Exception in thread "main" org.docx4j.openpackaging.exceptions.Docx4JException: Exception exporting package
at org.docx4j.convert.out.common.AbstractExporter.export(AbstractExporter.java:109)
at org.docx4j.Docx4J.toFO(Docx4J.java:597)
at convertoutpdfviaxslfo.Conversion1.createPDF(Conversion1.java:55)
at convertoutpdfviaxslfo.ExampleFO2PDF.main(ExampleFO2PDF.java:19)
Caused by: org.docx4j.openpackaging.exceptions.Docx4JException: Exception writing Document to OutputStream: For TrueType collection you must specify which font to select (-ttcname)
at org.docx4j.utils.XmlSerializerUtil.serialize(XmlSerializerUtil.java:50)
at org.docx4j.utils.XmlSerializerUtil.serialize(XmlSerializerUtil.java:14)
at org.docx4j.convert.out.fo.renderers.FORendererApacheFOP.render(FORendererApacheFOP.java:209)
at org.docx4j.convert.out.fo.renderers.FORendererApacheFOP.render(FORendererApacheFOP.java:159)
at org.docx4j.convert.out.fo.AbstractFOExporter.postprocess(AbstractFOExporter.java:168)
at org.docx4j.convert.out.fo.AbstractFOExporter.postprocess(AbstractFOExporter.java:47)
at org.docx4j.convert.out.common.AbstractExporter.export(AbstractExporter.java:82)
... 3 more
Caused by: java.lang.IllegalArgumentException: For TrueType collection you must specify which font to select (-ttcname)
at org.apache.fop.fonts.truetype.TTFFile.readFont(TTFFile.java:476)
at org.apache.fop.fonts.truetype.TTFFontLoader.read(TTFFontLoader.java:58)
at org.apache.fop.fonts.FontLoader.getFont(FontLoader.java:164)
at org.apache.fop.fonts.FontLoader.loadFont(FontLoader.java:113)
at org.apache.fop.fonts.LazyFont.load(LazyFont.java:126)
at org.apache.fop.fonts.LazyFont.getAscender(LazyFont.java:233)
at org.apache.fop.fonts.Font.getAscender(Font.java:96)
at org.apache.fop.layoutmgr.inline.AlignmentContext.<init>(AlignmentContext.java:127)
at org.apache.fop.layoutmgr.inline.InlineLayoutManager.getNextKnuthElements(InlineLayoutManager.java:255)
at org.apache.fop.layoutmgr.inline.InlineLayoutManager.getNextKnuthElements(InlineLayoutManager.java:315)
at org.apache.fop.layoutmgr.inline.LineLayoutManager.collectInlineKnuthElements(LineLayoutManager.java:658)
at org.apache.fop.layoutmgr.inline.LineLayoutManager.getNextKnuthElements(LineLayoutManager.java:594)
at org.apache.fop.layoutmgr.BlockStackingLayoutManager.getNextKnuthElements(BlockStackingLayoutManager.java:294)
at org.apache.fop.layoutmgr.BlockLayoutManager.getNextKnuthElements(BlockLayoutManager.java:116)
at org.apache.fop.layoutmgr.FlowLayoutManager.getNextKnuthElements(FlowLayoutManager.java:107)
at org.apache.fop.layoutmgr.PageBreaker.getNextKnuthElements(PageBreaker.java:145)
at org.apache.fop.layoutmgr.AbstractBreaker.getNextBlockList(AbstractBreaker.java:552)
at org.apache.fop.layoutmgr.PageBreaker.getNextBlockList(PageBreaker.java:137)
at org.apache.fop.layoutmgr.AbstractBreaker.doLayout(AbstractBreaker.java:302)
at org.apache.fop.layoutmgr.AbstractBreaker.doLayout(AbstractBreaker.java:264)
at org.apache.fop.layoutmgr.PageSequenceLayoutManager.activateLayout(PageSequenceLayoutManager.java:106)
at org.apache.fop.area.AreaTreeHandler.endPageSequence(AreaTreeHandler.java:234)
at org.apache.fop.fo.pagination.PageSequence.endOfNode(PageSequence.java:123)
at org.apache.fop.fo.FOTreeBuilder$MainFOHandler.endElement(FOTreeBuilder.java:340)
at org.apache.fop.fo.FOTreeBuilder.endElement(FOTreeBuilder.java:169)
at org.docx4j.org.apache.xalan.transformer.TransformerIdentityImpl.endElement(TransformerIdentityImpl.java:1106)
at com.sun.org.apache.xerces.internal.parsers.AbstractSAXParser.endElement(AbstractSAXParser.java:609)
at com.sun.org.apache.xerces.internal.impl.XMLDocumentFragmentScannerImpl.scanEndElement(XMLDocumentFragmentScannerImpl.java:1782)
at com.sun.org.apache.xerces.internal.impl.XMLDocumentFragmentScannerImpl$FragmentContentDriver.next(XMLDocumentFragmentScannerImpl.java:2973)
at com.sun.org.apache.xerces.internal.impl.XMLDocumentScannerImpl.next(XMLDocumentScannerImpl.java:606)
at com.sun.org.apache.xerces.internal.impl.XMLNSDocumentScannerImpl.next(XMLNSDocumentScannerImpl.java:117)
at com.sun.org.apache.xerces.internal.impl.XMLDocumentFragmentScannerImpl.scanDocument(XMLDocumentFragmentScannerImpl.java:510)
at com.sun.org.apache.xerces.internal.parsers.XML11Configuration.parse(XML11Configuration.java:848)
at com.sun.org.apache.xerces.internal.parsers.XML11Configuration.parse(XML11Configuration.java:777)
at com.sun.org.apache.xerces.internal.parsers.XMLParser.parse(XMLParser.java:141)
at com.sun.org.apache.xerces.internal.parsers.AbstractSAXParser.parse(AbstractSAXParser.java:1213)
at org.docx4j.org.apache.xalan.transformer.TransformerIdentityImpl.transform(TransformerIdentityImpl.java:489)
at org.docx4j.utils.XmlSerializerUtil.serialize(XmlSerializerUtil.java:47)
... 9 more
why?
i have attached the word document and pdf document text
- Code: Select all
void createPDF() throws FileNotFoundException, Docx4JException, Exception {
//To change body of generated methods, choose Tools | Templates.
FOSettings foSettings = Docx4J.createFOSettings();
InputStream is = new FileInputStream(new File("Documents\\Sampledoc.docx"));
WordprocessingMLPackage wordMLPackage = WordprocessingMLPackage.load(is);
//Print all available physical fonts
PhysicalFonts.discoverPhysicalFonts();
Map<String, PhysicalFont> physicalFonts = PhysicalFonts.getPhysicalFonts();
Iterator<Entry<String, PhysicalFont>> availableFonts = physicalFonts.entrySet().iterator();
while(availableFonts.hasNext()) {
Entry<String, PhysicalFont> font = availableFonts.next();
String key = font.getKey();
PhysicalFont pFont = font.getValue();
System.out.println("Key is " + key + ";; Name " + pFont.getName());
}
Mapper fontMapper = new IdentityPlusMapper();
PhysicalFont font = PhysicalFonts.get("Arial Unicode MS");
fontMapper.put("Arial", font);
wordMLPackage.setFontMapper(fontMapper);
foSettings.setWmlPackage(wordMLPackage);
OutputStream pdfOutputStream = new FileOutputStream("Documents\\output.pdf");
System.out.println(foSettings.getSettings());
Docx4J.toFO(foSettings, pdfOutputStream, Docx4J.FLAG_EXPORT_PREFER_XSL);
System.out.println(" Done !!!!");
}
While loading some other word files(arabic + english + chinese) these exceptions are thrown
Exceptions:
Exception in thread "main" org.docx4j.openpackaging.exceptions.Docx4JException: Exception exporting package
at org.docx4j.convert.out.common.AbstractExporter.export(AbstractExporter.java:109)
at org.docx4j.Docx4J.toFO(Docx4J.java:597)
at convertoutpdfviaxslfo.Conversion1.createPDF(Conversion1.java:55)
at convertoutpdfviaxslfo.ExampleFO2PDF.main(ExampleFO2PDF.java:19)
Caused by: org.docx4j.openpackaging.exceptions.Docx4JException: Exception writing Document to OutputStream: For TrueType collection you must specify which font to select (-ttcname)
at org.docx4j.utils.XmlSerializerUtil.serialize(XmlSerializerUtil.java:50)
at org.docx4j.utils.XmlSerializerUtil.serialize(XmlSerializerUtil.java:14)
at org.docx4j.convert.out.fo.renderers.FORendererApacheFOP.render(FORendererApacheFOP.java:209)
at org.docx4j.convert.out.fo.renderers.FORendererApacheFOP.render(FORendererApacheFOP.java:159)
at org.docx4j.convert.out.fo.AbstractFOExporter.postprocess(AbstractFOExporter.java:168)
at org.docx4j.convert.out.fo.AbstractFOExporter.postprocess(AbstractFOExporter.java:47)
at org.docx4j.convert.out.common.AbstractExporter.export(AbstractExporter.java:82)
... 3 more
Caused by: java.lang.IllegalArgumentException: For TrueType collection you must specify which font to select (-ttcname)
at org.apache.fop.fonts.truetype.TTFFile.readFont(TTFFile.java:476)
at org.apache.fop.fonts.truetype.TTFFontLoader.read(TTFFontLoader.java:58)
at org.apache.fop.fonts.FontLoader.getFont(FontLoader.java:164)
at org.apache.fop.fonts.FontLoader.loadFont(FontLoader.java:113)
at org.apache.fop.fonts.LazyFont.load(LazyFont.java:126)
at org.apache.fop.fonts.LazyFont.getAscender(LazyFont.java:233)
at org.apache.fop.fonts.Font.getAscender(Font.java:96)
at org.apache.fop.layoutmgr.inline.AlignmentContext.<init>(AlignmentContext.java:127)
at org.apache.fop.layoutmgr.inline.InlineLayoutManager.getNextKnuthElements(InlineLayoutManager.java:255)
at org.apache.fop.layoutmgr.inline.InlineLayoutManager.getNextKnuthElements(InlineLayoutManager.java:315)
at org.apache.fop.layoutmgr.inline.LineLayoutManager.collectInlineKnuthElements(LineLayoutManager.java:658)
at org.apache.fop.layoutmgr.inline.LineLayoutManager.getNextKnuthElements(LineLayoutManager.java:594)
at org.apache.fop.layoutmgr.BlockStackingLayoutManager.getNextKnuthElements(BlockStackingLayoutManager.java:294)
at org.apache.fop.layoutmgr.BlockLayoutManager.getNextKnuthElements(BlockLayoutManager.java:116)
at org.apache.fop.layoutmgr.FlowLayoutManager.getNextKnuthElements(FlowLayoutManager.java:107)
at org.apache.fop.layoutmgr.PageBreaker.getNextKnuthElements(PageBreaker.java:145)
at org.apache.fop.layoutmgr.AbstractBreaker.getNextBlockList(AbstractBreaker.java:552)
at org.apache.fop.layoutmgr.PageBreaker.getNextBlockList(PageBreaker.java:137)
at org.apache.fop.layoutmgr.AbstractBreaker.doLayout(AbstractBreaker.java:302)
at org.apache.fop.layoutmgr.AbstractBreaker.doLayout(AbstractBreaker.java:264)
at org.apache.fop.layoutmgr.PageSequenceLayoutManager.activateLayout(PageSequenceLayoutManager.java:106)
at org.apache.fop.area.AreaTreeHandler.endPageSequence(AreaTreeHandler.java:234)
at org.apache.fop.fo.pagination.PageSequence.endOfNode(PageSequence.java:123)
at org.apache.fop.fo.FOTreeBuilder$MainFOHandler.endElement(FOTreeBuilder.java:340)
at org.apache.fop.fo.FOTreeBuilder.endElement(FOTreeBuilder.java:169)
at org.docx4j.org.apache.xalan.transformer.TransformerIdentityImpl.endElement(TransformerIdentityImpl.java:1106)
at com.sun.org.apache.xerces.internal.parsers.AbstractSAXParser.endElement(AbstractSAXParser.java:609)
at com.sun.org.apache.xerces.internal.impl.XMLDocumentFragmentScannerImpl.scanEndElement(XMLDocumentFragmentScannerImpl.java:1782)
at com.sun.org.apache.xerces.internal.impl.XMLDocumentFragmentScannerImpl$FragmentContentDriver.next(XMLDocumentFragmentScannerImpl.java:2973)
at com.sun.org.apache.xerces.internal.impl.XMLDocumentScannerImpl.next(XMLDocumentScannerImpl.java:606)
at com.sun.org.apache.xerces.internal.impl.XMLNSDocumentScannerImpl.next(XMLNSDocumentScannerImpl.java:117)
at com.sun.org.apache.xerces.internal.impl.XMLDocumentFragmentScannerImpl.scanDocument(XMLDocumentFragmentScannerImpl.java:510)
at com.sun.org.apache.xerces.internal.parsers.XML11Configuration.parse(XML11Configuration.java:848)
at com.sun.org.apache.xerces.internal.parsers.XML11Configuration.parse(XML11Configuration.java:777)
at com.sun.org.apache.xerces.internal.parsers.XMLParser.parse(XMLParser.java:141)
at com.sun.org.apache.xerces.internal.parsers.AbstractSAXParser.parse(AbstractSAXParser.java:1213)
at org.docx4j.org.apache.xalan.transformer.TransformerIdentityImpl.transform(TransformerIdentityImpl.java:489)
at org.docx4j.utils.XmlSerializerUtil.serialize(XmlSerializerUtil.java:47)
... 9 more