package com.test;
import java.io.File;
import java.util.List;
import org.apache.commons.io.FileUtils;
import org.docx4j.XmlUtils;
import org.docx4j.dml.wordprocessingDrawing.Inline;
import org.docx4j.jaxb.Context;
import org.docx4j.model.structure.SectionWrapper;
import org.docx4j.openpackaging.contenttype.ContentType;
import org.docx4j.openpackaging.exceptions.InvalidFormatException;
import org.docx4j.openpackaging.packages.WordprocessingMLPackage;
import org.docx4j.openpackaging.parts.Part;
import org.docx4j.openpackaging.parts.WordprocessingML.AltChunkType;
import org.docx4j.openpackaging.parts.WordprocessingML.AlternativeFormatInputPart;
import org.docx4j.openpackaging.parts.WordprocessingML.BinaryPartAbstractImage;
import org.docx4j.openpackaging.parts.WordprocessingML.HeaderPart;
import org.docx4j.relationships.Relationship;
import org.docx4j.utils.BufferUtil;
import org.docx4j.wml.CTAltChunk;
import org.docx4j.wml.Hdr;
import org.docx4j.wml.HdrFtrRef;
import org.docx4j.wml.HeaderReference;
import org.docx4j.wml.Jc;
import org.docx4j.wml.JcEnumeration;
import org.docx4j.wml.ObjectFactory;
import org.docx4j.wml.PPr;
import org.docx4j.wml.SectPr;
public class HTMLToDoc
{
static String locationOfFile
= "C:/Users/priteshs/Desktop/Test" + "/test2.doc";
public static void main
(final String[] args
) throws Exception {
// Converter converter = new Converter();
// converter.convertFile();
String xhtml
= FileUtils.
readFileToString(new File(
"C:\\Users\\priteshs\\Desktop\\Test\\html_text.html"),
"UTF-8");
convertHTMLtoDOC
(xhtml
);
}
static void convertHTMLtoDOC
(final String html
) throws Exception {
WordprocessingMLPackage
wordMLPackage = WordprocessingMLPackage
.
createPackage();
Relationship styleRel
= wordMLPackage.
getMainDocumentPart().
getStyleDefinitionsPart().
getSourceRelationships().
get(0
);
wordMLPackage.
getMainDocumentPart().
getRelationshipsPart().
removeRelationship(styleRel
);
// 1. the Header part
Relationship relationship
= createHeaderPart
(wordMLPackage
);
// 2. an entry in SectPr
createHeaderReference
(wordMLPackage, relationship
);
AlternativeFormatInputPart afiPart
= new AlternativeFormatInputPart
(
AltChunkType.
Html);
afiPart.
setBinaryData(html.
getBytes("UTF-8"));
afiPart.
setContentType(new ContentType
("text/html"));
Relationship altChunkRel
= wordMLPackage.
getMainDocumentPart()
.
addTargetPart(afiPart
);
// .. the bit in document body
CTAltChunk ac
= Context.
getWmlObjectFactory().
createCTAltChunk();
ac.
setId(altChunkRel.
getId());
wordMLPackage.
getMainDocumentPart().
addObject(ac
);
wordMLPackage.
getContentTypeManager().
addDefaultContentType("html",
"text/html");
// CONVERTING ALTCHUNKS
WordprocessingMLPackage
pkgOut = wordMLPackage.
getMainDocumentPart()
.
convertAltChunks();
pkgOut.
save(new java.
io.
File(locationOfFile
));
System.
out.
println(XmlUtils.
marshaltoString(pkgOut
.
getMainDocumentPart().
getJaxbElement(),
true,
true));
}
public static Relationship createHeaderPart
(
WordprocessingMLPackage wordprocessingMLPackage
) throws Exception {
HeaderPart headerPart
= new HeaderPart
();
Relationship rel
= wordprocessingMLPackage.
getMainDocumentPart()
.
addTargetPart(headerPart
);
// After addTargetPart, so image can be added properly
headerPart.
setJaxbElement(getHdr
(wordprocessingMLPackage, headerPart
));
return rel
;
}
public static void createHeaderReference
(
WordprocessingMLPackage wordprocessingMLPackage,
Relationship relationship
) throws InvalidFormatException
{
List
<SectionWrapper
> sections
= wordprocessingMLPackage
.
getDocumentModel().
getSections();
SectPr sectPr
= sections.
get(sections.
size() - 1
).
getSectPr();
// There is always a section wrapper, but it might not contain a sectPr
if (sectPr
== null) {
sectPr
= objectFactory.
createSectPr();
wordprocessingMLPackage.
getMainDocumentPart().
addObject(sectPr
);
sections.
get(sections.
size() - 1
).
setSectPr(sectPr
);
}
HeaderReference headerReference
= objectFactory.
createHeaderReference();
headerReference.
setId(relationship.
getId());
headerReference.
setType(HdrFtrRef.
DEFAULT);
sectPr.
getEGHdrFtrReferences().
add(headerReference
);// add header or
// footer references
}
private static ObjectFactory objectFactory
= new ObjectFactory();
public static Hdr getHdr
(WordprocessingMLPackage wordprocessingMLPackage,
Part sourcePart
) throws Exception {
Hdr hdr
= objectFactory.
createHdr();
File file
= new File("C:\\Users\\priteshs\\Desktop\\temp\\google_logo.png");
java.
io.
InputStream is
= new java.
io.
FileInputStream(file
);
hdr.
getContent().
add(
newImage
(wordprocessingMLPackage, sourcePart,
BufferUtil.
getBytesFromInputStream(is
),
"filename",
"alttext", 1, 2
));
return hdr
;
}
public static org.
docx4j.
wml.
P newImage
(
WordprocessingMLPackage wordMLPackage, Part sourcePart,
byte[] bytes,
String filenameHint,
String altText,
int id1,
int id2
)
throws Exception {
BinaryPartAbstractImage imagePart
= BinaryPartAbstractImage
.
createImagePart(wordMLPackage, sourcePart, bytes
);
Inline inline
= imagePart.
createImageInline(filenameHint, altText, id1,
id2,
false);
// Now add the inline in w:p/w:r/w:drawing
org.
docx4j.
wml.
ObjectFactory factory
= Context.
getWmlObjectFactory();
org.
docx4j.
wml.
P p
= factory.
createP();
org.
docx4j.
wml.
R run
= factory.
createR();
PPr pPr
= factory.
createPPr();
Jc jc
= factory.
createJc();
jc.
setVal(JcEnumeration.
CENTER);
pPr.
setJc(jc
);
p.
setPPr(pPr
);
p.
getContent().
add(run
);
org.
docx4j.
wml.
Drawing drawing
= factory.
createDrawing();
run.
getContent().
add(drawing
);
drawing.
getAnchorOrInline().
add(inline
);
return p
;
}
}
Parsed in 0.026 seconds, using
GeSHi 1.0.8.4