Hi
Please can some one tell me how i can get the number of characters in a word docs file.
thanks!
/* Extract Text Count inclusive of spaces using docx4j */
try {
File file = new File("c:\\sample1.docx");
WordprocessingMLPackage wordMLPackage = WordprocessingMLPackage.load(file);
MainDocumentPart documentPart = wordMLPackage.getMainDocumentPart();
org.docx4j.wml.Document wmlDocumentEl = (org.docx4j.wml.Document)documentPart.getJaxbElement();
StringWriter str = new StringWriter();
org.docx4j.TextUtils.extractText(wmlDocumentEl, str);
String strString = str.toString();
System.out.println("Count....."+strString.length());
//out.flush();
// out.close();
str.close();
} catch (Docx4JException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
public void testWordCountInsertedDeletedText() {
/* Extract Text Count using docx4j */
try {
File file = new File("eksempler/Samledokument_2015-01-23_track_changes.docx");
WordprocessingMLPackage wordMLPackage = WordprocessingMLPackage.load(file);
MainDocumentPart documentPart = wordMLPackage.getMainDocumentPart();
org.docx4j.wml.Document wmlDocumentEl = (org.docx4j.wml.Document) documentPart.getJaxbElement();
int deletedChars = 0;
ClassFinder finderDelText = new ClassFinder(DelText.class);
new TraversalUtil(documentPart, finderDelText);
for (Object anDeletedPara : finderDelText.results) {
if(anDeletedPara instanceof DelText){
DelText delText = (DelText) anDeletedPara;
if(delText.getValue()!=null){
deletedChars += delText.getValue().length();
// System.out.println("found: "+ delText.getClass() + " value: " + delText.getValue() + " length: " + + delText.getValue().length() );
}
}
}
System.out.println("deleted chars in total: "+ deletedChars );
StringWriter str = new StringWriter();
org.docx4j.TextUtils.extractText(wmlDocumentEl, str);
String strString = str.toString();
String strStringclean = str.toString().replaceAll("[\\n\\t ]", ""); // remove newlines, tabs, and spaces;
System.out.println(strString.length() + " count with whitespace..... ");
System.out.println(deletedChars + " deleted chars..... ");
System.out.println((strString.length()-deletedChars )+ " Count without deleted text (TrackChanges) still with whitespace..... " );
System.out.println( strStringclean.length() + " count ..... ");
str.close();
} catch (Docx4JException e) {
e.printStackTrace();
} catch (Exception e) {
e.printStackTrace();
}
}
Users browsing this forum: Google [Bot] and 36 guests