- Code: Select all
/* CSS reset, see http://itumbcom.blogspot.com.au/2013/06/css-reset-how-complex-it-should-be.html
*
* motivated by vertical space in tables in Firefox and Google Chrome.
If you have unwanted vertical space, in Chrome this may be coming from -webkit-margin-before and -webkit-margin-after
(in Firefox, margin-top is set to 1em in html.css)
Setting margin: 0 on p is enough to fix it.
See further http://www.css-101.org/articles/base-styles-sheet-for-webkit-based-browsers/
*/
String userCSS = "html, body, div, span, h1, h2, h3, h4, h5, h6, p, a, img, ol, ul, li, table, caption, tbody, tfoot, thead, tr, th, td " +
"{ margin: 0; padding: 0; border: 0;}" +
"body {line-height: 1;} ";
htmlSettings.setUserCSS(userCSS);
// list numbering: comment out 1 or other of the following, depending on whether
// you want list numbering hardcoded, or done using <li>.
//SdtWriter.registerTagHandler("HTML_ELEMENT", new SdtToListSdtTagHandler());
htmlSettings.getFeatures().remove(ConversionFeatures.PP_HTML_COLLECT_LISTS);
// output to an OutputStream.
OutputStream os;
if (save) {
os = new FileOutputStream(inputfilepath + ".html");
} else {
os = new ByteArrayOutputStream();
}
// If you want XHTML output
//Docx4jProperties.setProperty("docx4j.Convert.Out.HTML.OutputMethodXML", true);
//Don't care what type of exporter you use
// Docx4J.toHTML(htmlSettings, os, Docx4J.FLAG_NONE);
//Prefer the exporter, that uses a xsl transformation
Docx4J.toHTML(htmlSettings, os, Docx4J.FLAG_EXPORT_PREFER_XSL);
//Prefer the exporter, that doesn't use a xsl transformation (= uses a visitor)
// Docx4J.toHTML(htmlSettings, os, Docx4J.FLAG_EXPORT_PREFER_NONXSL);
if (save) {
System.out.println("Saved: " + inputfilepath + ".html ");
} else {
System.out.println( ((ByteArrayOutputStream)os).toString() );
}
// Clean up, so any ObfuscatedFontPart temp files can be deleted
if (wordMLPackage.getMainDocumentPart().getFontTablePart()!=null) {
wordMLPackage.getMainDocumentPart().getFontTablePart().deleteEmbeddedFontTempFiles();
}
// This would also do it, via finalize() methods
htmlSettings = null;
wordMLPackage = null;
}
After that, we are trying to convert the generated html to docx again this way:
- Code: Select all
WordprocessingMLPackage wordMLPackage = WordprocessingMLPackage.createPackage();
MainDocumentPart wordDocumentPart = wordMLPackage.getMainDocumentPart();
// String outputfilepath = "c:\\style-example-OUT.docx";
String text = data;
XHTMLImporter xHTMLImporter = new XHTMLImporterImpl(wordMLPackage);
//Inicio: Registrando fuentes para la conversión
RFonts arialRFonts = Context.getWmlObjectFactory().createRFonts();
arialRFonts.setAscii("Arial");
arialRFonts.setHint(org.docx4j.wml.STHint.DEFAULT);
arialRFonts.setHAnsi("Arial");
XHTMLImporterImpl.addFontMapping("Arial", arialRFonts);
RFonts timesRFonts = Context.getWmlObjectFactory().createRFonts();
timesRFonts.setAscii("Times");
timesRFonts.setHint(org.docx4j.wml.STHint.DEFAULT);
timesRFonts.setHAnsi("Times");
XHTMLImporterImpl.addFontMapping("Times New Roman", timesRFonts);
//fin regsitrando fuentes
xHTMLImporter.setParagraphFormatting(FormattingOption.CLASS_PLUS_OTHER);
xHTMLImporter.setRunFormatting(FormattingOption.CLASS_PLUS_OTHER);
File file = new File(PATH_PRUEBAS+FICHERO_ENTRADA);
wordMLPackage.getMainDocumentPart().getContent().addAll(xHTMLImporter.convert(file, null));
//wordDocumentPart.addParagraphOfText("Manipulating Word document with docx4j");
// Save it
wordMLPackage.save(new File("C:/pruebas/out.docx"));
}catch(Exception e){
System.out.print(e.getMessage());
// LOGGER.debug(e.getMessage());
}
and we are getting the following error:
- Code: Select all
org.docx4j.org.xhtmlrenderer.load INFO:: SAX XMLReader in use (parser): com.sun.org.apache.xerces.internal.parsers.SAXParser
org.docx4j.org.xhtmlrenderer.load INFO:: SAX XMLReader in use (parser): com.sun.org.apache.xerces.internal.parsers.SAXParser
org.docx4j.org.xhtmlrenderer.load INFO:: SAX XMLReader in use (parser): com.sun.org.apache.xerces.internal.parsers.SAXParser
org.docx4j.org.xhtmlrenderer.load INFO:: SAX XMLReader in use (parser): com.sun.org.apache.xerces.internal.parsers.SAXParser
org.docx4j.org.xhtmlrenderer.load INFO:: SAX XMLReader in use (parser): com.sun.org.apache.xerces.internal.parsers.SAXParser
org.docx4j.org.xhtmlrenderer.load INFO:: SAX XMLReader in use (parser): com.sun.org.apache.xerces.internal.parsers.SAXParser
org.docx4j.org.xhtmlrenderer.load INFO:: Content is not allowed in prolog.
org.docx4j.org.xhtmlrenderer.exception WARNING:: Unhandled exception. Can't load the XML resource (using TRaX transformer). org.xml.sax.SAXParseException: Content is not allowed in prolog.
Can't load the XML resource (using TRaX transformer). org.xml.sax.SAXParseException: Content is not allowed in prolog.
Can anyone help us to find the cause?
We are using the following libraries:
· docx-ImportXHTML-3.0.0.jar
· docx4j-3.2.2.jar
Thx