说明
在Java生态中处理Office文档时,开发人员常面临格式兼容性和功能完整性的挑战。商业组件Aspose以其卓越的文档处理能力成为企业级解决方案之一,支持Word、Excel、PDF等多种格式的精准转换与操作。
请勿用于商业用途,若侵权请联系我。
参考了一些网上大神的破解文章,因为网上现存的基本上不是最新版的,本文采用了比较新一点的 24.12版本进行破解,可以支持使用几年了。
HTML转Word和PDF功能
除了基本的文档处理外,Aspose Words还提供了强大的HTML转Word和PDF功能,支持复杂的样式保留和格式转换。
核心功能特点:
- 完整保留HTML中的样式和布局
- 支持自定义页眉页脚(可添加公司Logo)
- 自动优化中英文字体(中文默认微软雅黑,英文Times New Roman)
- 表格自动调整和优化
- 列表样式自动修正
- 图片自适应处理
- 生成高质量的PDF文档
使用步骤:
1. pom 文件引入依赖
<dependencies>
<dependency>
<groupId>***.aspose</groupId>
<artifactId>aspose-words</artifactId>
<version>24.12</version>
<classifier>jdk17</classifier>
</dependency>
</dependencies>
<repositories>
<repository>
<id>AsposeJavaAPI</id>
<name>Aspose Java API</name>
<url>https://releases.aspose.***/java/repo/</url>
</repository>
</repositories>
2. HTML转Word和PDF工具类
package ***.gene.project.genereport.utils;
import ***.aspose.words.*;
import ***.aspose.words.Font;
import ***.aspose.words.Shape;
import ***.gene.***mon.utils.StringUtils;
import java.awt.*;
import java.io.File;
import java.io.InputStream;
import java.lang.reflect.Constructor;
import java.lang.reflect.Field;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Map;
import java.util.UUID;
public class HtmlToWordGenerator {
/**
* 将 HTML 文件转换为 Word 和 PDF 文件,并返回文件路径
*
* @param htmlFilePath HTML 文件路径
* @param logoImagePath 页眉图片路径(本地路径)
* @param outputFileName 输出文件名(不带扩展名)
* @param folderPath 输出文件夹路径
* @return 包含 docx 和 pdf 文件路径的 Map
* @throws Exception 异常处理
*/
public static Map<String, Object> exportHtmlToWordAndPdf(String htmlFilePath,
String logoImagePath,
String outputFileName,
String folderPath
) throws Exception {
registerWord2412();
// 设置 HTML 加载选项
HtmlLoadOptions optionsHtml = new HtmlLoadOptions();
optionsHtml.setEncoding(StandardCharsets.UTF_8);
optionsHtml.setMswVersion(MsWordVersion.WORD_2019);
// 加载 HTML 文件
Document doc = new Document(htmlFilePath, optionsHtml);
doc.get***patibilityOptions().optimizeFor(MsWordVersion.WORD_2019);
// 设置纸张、页眉页脚等页面格式
for (Section section : doc.getSections()) {
PageSetup pageSetup = section.getPageSetup();
pageSetup.setPaperSize(PaperSize.A4);
pageSetup.setOrientation(Orientation.PORTRAIT);
pageSetup.setTopMargin(36);
pageSetup.setBottomMargin(36);
pageSetup.setLeftMargin(36);
pageSetup.setRightMargin(36);
pageSetup.setHeaderDistance(0);
pageSetup.setFooterDistance(36.0);
if (StringUtils.isNotEmpty(logoImagePath)) {
// 设置页眉
HeaderFooter header = new HeaderFooter(doc, HeaderFooterType.HEADER_PRIMARY);
Paragraph headerPara = new Paragraph(doc);
headerPara.getParagraphFormat().setAlignment(ParagraphAlignment.CENTER);
InputStream imageStream = Files.newInputStream(new File(logoImagePath).toPath());
double usableWidth = pageSetup.getPageWidth() - pageSetup.getLeftMargin() - pageSetup.getRightMargin();
double fixedHeight = 30;
Shape imageShape = new Shape(doc, ShapeType.IMAGE);
imageShape.setAspectRatioLocked(false);
imageShape.getImageData().setImage(imageStream);
imageShape.setWrapType(WrapType.INLINE);
imageShape.setWidth(usableWidth);
imageShape.setHeight(fixedHeight);
headerPara.appendChild(imageShape);
header.appendChild(headerPara);
section.getHeadersFooters().add(header);
}
// 设置页脚
HeaderFooter footer = new HeaderFooter(doc, HeaderFooterType.FOOTER_PRIMARY);
Paragraph footerPara = new Paragraph(doc);
footerPara.getParagraphFormat().setAlignment(ParagraphAlignment.CENTER);
footerPara.appendField("PAGE", String.valueOf(FieldType.FIELD_PAGE));
footerPara.appendChild(new Run(doc, " / "));
footerPara.appendField("NUMPAGES", String.valueOf(FieldType.FIELD_NUM_PAGES));
footer.appendChild(footerPara);
section.getHeadersFooters().add(footer);
}
// 白色背景
doc.setPageColor(Color.WHITE);
// 文字运行
NodeCollection runs = doc.getChildNodes(NodeType.RUN, true);
for (int i = 0; i < runs.getCount(); i++) {
Run run = (Run) runs.get(i);
String text = run.getText();
Font font = run.getFont();
double originalSize = font.getSize();
if (originalSize > 0) {
font.setSize(Math.max(originalSize * 0.8, 6));
}
if (text.matches("^[\\u4e00-\\u9fa5\\p{Punct}\\s]+$")) {
font.setName("Microsoft YaHei");
} else if (text.matches("^[A-Za-z0-9\\p{Punct}\\s]+$")) {
font.setName("Times New Roman");
} else {
font.setName("Microsoft YaHei");
font.setNameAscii("Times New Roman");
font.setNameFarEast("Microsoft YaHei");
font.setNameOther("Microsoft YaHei");
}
}
// 表格样式设置
NodeCollection tables = doc.getChildNodes(NodeType.TABLE, true);
for (int t = 0; t < tables.getCount(); t++) {
Table table = (Table) tables.get(t);
table.setAlignment(TableAlignment.CENTER);
table.setPreferredWidth(PreferredWidth.fromPercent(95));
table.setAllowAutoFit(false);
for (Row row : table.getRows()) {
row.getRowFormat().setHeightRule(HeightRule.AUTO);
row.getRowFormat().setHeight(20);
for (Cell cell : row.getCells()) {
cell.getCellFormat().setVerticalAlignment(CellVerticalAlignment.CENTER);
cell.getCellFormat().setTopPadding(5);
cell.getCellFormat().setBottomPadding(5);
cell.getCellFormat().setLeftPadding(5);
cell.getCellFormat().setRightPadding(5);
for (Paragraph para : cell.getParagraphs()) {
para.getParagraphFormat().setSpaceBefore(0);
para.getParagraphFormat().setSpaceAfter(0);
para.getParagraphFormat().setLineSpacing(12);
}
}
}
}
// 段落处理
NodeCollection paragraphs = doc.getChildNodes(NodeType.PARAGRAPH, true);
boolean firstHeading1Found = false;
for (int i = 0; i < paragraphs.getCount(); i++) {
Paragraph para = (Paragraph) paragraphs.get(i);
String styleName = para.getParagraphFormat().getStyleName();
ParagraphFormat paragraphFormat = para.getParagraphFormat();
if ("Heading 1".equals(styleName)) {
if (firstHeading1Found) {
paragraphFormat.setPageBreakBefore(true);
} else {
firstHeading1Found = true;
}
}
if ("Heading 2".equals(styleName)) {
paragraphFormat.setLeftIndent(0);
paragraphFormat.setRightIndent(0);
}
ListFormat listFormat = para.getListFormat();
if (listFormat.isListItem()) {
ListLevel listLevel = listFormat.getListLevel();
String bullet = listLevel.getNumberFormat();
if ("\uF0B7".equals(bullet)) {
listLevel.getFont().setName("Microsoft YaHei");
listLevel.getFont().setNameAscii("Microsoft YaHei");
listLevel.getFont().setNameFarEast("Microsoft YaHei");
listLevel.getFont().setNameOther("Microsoft YaHei");
if (para.getRuns().getCount() > 0) {
double fontSize = para.getRuns().get(0).getFont().getSize();
listLevel.getFont().setSize(fontSize);
}
listLevel.setNumberFormat("•");
} else if (".".equals(bullet) || "·".equals(bullet)) {
listLevel.getFont().setName("Times New Roman");
listLevel.getFont().setNameAscii("Times New Roman");
listLevel.getFont().setNameFarEast("Times New Roman");
listLevel.getFont().setNameOther("Times New Roman");
if (para.getRuns().getCount() > 0) {
double fontSize = para.getRuns().get(0).getFont().getSize();
listLevel.getFont().setSize(fontSize);
}
}
}
}
Map<String, Object> result = new HashMap<>();
// 输出文件路径
String uuid = UUID.randomUUID().toString();
String docxPath = folderPath + File.separator + outputFileName + "_" + uuid + ".docx";
String pdfPath = folderPath + File.separator + outputFileName + "_" + uuid + ".pdf";
// 保存 Word 文件
OoxmlSaveOptions wordOptions = new OoxmlSaveOptions(SaveFormat.DOCX);
wordOptions.set***pliance(Ooxml***pliance.ISO_29500_2008_TRANSITIONAL);
wordOptions.set***pressionLevel(***pressionLevel.MAXIMUM);
doc.save(docxPath, wordOptions);
result.put("docxFilePath", docxPath);
// 保存 PDF 文件
PdfSaveOptions pdfOptions = new PdfSaveOptions();
pdfOptions.setUseCoreFonts(false);
pdfOptions.setUseHighQualityRendering(true);
pdfOptions.setJpegQuality(100);
pdfOptions.setImage***pression(0);
pdfOptions.set***pliance(Pdf***pliance.PDF_20);
pdfOptions.setFontEmbeddingMode(PdfFontEmbeddingMode.EMBED_ALL);
pdfOptions.setExportDocumentStructure(true);
pdfOptions.setDmlRenderingMode(DmlRenderingMode.DRAWING_ML);
pdfOptions.setDmlEffectsRenderingMode(DmlEffectsRenderingMode.FINE);
pdfOptions.setEmbedFullFonts(true);
doc.save(pdfPath, pdfOptions);
result.put("pdfFilePath", pdfPath);
return result;
}
/**
* 核心破解方法
*/
public static void registerWord2412() {
try {
Class<?> zzodClass = Class.forName("***.aspose.words.zzod");
Constructor<?> constructors = zzodClass.getDeclaredConstructors()[0];
constructors.setA***essible(true);
Object instance = constructors.newInstance(null, null);
Field zzWws = zzodClass.getDeclaredField("zzWws");
zzWws.setA***essible(true);
zzWws.set(instance, 1);
Field zzVZC = zzodClass.getDeclaredField("zzVZC");
zzVZC.setA***essible(true);
zzVZC.set(instance, 1);
Class<?> zz83Class = Class.forName("***.aspose.words.zz83");
constructors.setA***essible(true);
constructors.newInstance(null, null);
Field zzZY4 = zz83Class.getDeclaredField("zzZY4");
zzZY4.setA***essible(true);
ArrayList<Object> zzwPValue = new ArrayList<>();
zzwPValue.add(instance);
zzZY4.set(null, zzwPValue);
Class<?> zzXuRClass = Class.forName("***.aspose.words.zzXuR");
Field zzWE8 = zzXuRClass.getDeclaredField("zzWE8");
zzWE8.setA***essible(true);
zzWE8.set(null, 128);
Field zzZKj = zzXuRClass.getDeclaredField("zzZKj");
zzZKj.setA***essible(true);
zzZKj.set(null, false);
} catch (Exception e) {
e.printStackTrace();
}
}
}
3. 使用示例
public class HtmlToWordTest {
public static void main(String[] args) {
try {
String htmlPath = "input.html";
String logoPath = "***pany_logo.png";
String outputName = "Report";
String outputFolder = "output";
Map<String, Object> result = HtmlToWordGenerator.exportHtmlToWordAndPdf(
htmlPath,
logoPath,
outputName,
outputFolder
);
System.out.println("Word文件生成成功: " + result.get("docxFilePath"));
System.out.println("PDF文件生成成功: " + result.get("pdfFilePath"));
} catch (Exception e) {
e.printStackTrace();
}
}
}
重要声明
请勿用于商业用途,商业用途请购买官方正版,用于商业用途本人不承担任何责任。
功能特点总结
- 格式保留:完整保留HTML中的样式、布局和结构
- 字体优化:自动区分中英文应用不同字体
- 表格处理:自动调整表格宽度和样式
- 列表修正:规范化列表符号和编号
- 页眉页脚:支持自定义页眉页脚和页码
- 高质量PDF:生成符合PDF 2.0标准的高质量文档
- 批量处理:支持批量转换多个HTML文件
该工具类特别适合需要将网页内容或HTML报告转换为正式Word/PDF文档的场景,如报告生成、文档归档等需求。