C# 将Word文档转换为HTML

时间:2022-11-11 21:02:10

       日常生活中,我们总是在Word中进行文字的编辑,它不仅能够保存Text文本,还可以保存文本的格式等等。那么如果我要将一Word文档上的内容展示在网页上,该怎么做呢?这里我提供了一个小工具,你可以将Word转换为Html,需要显示的话,可以直接访问该Html,废话不多说,下面看代码。

页面代码:

<div>
<input id="File1" type="file" runat="server"/>
<asp:Button ID="btnConvert" runat="server" Text="转换" OnClick="btnConvert_Click" />
</div>
C#代码:

using System;
using System.Data;
using System.Configuration;
using System.Collections;
using System.Collections.Generic;
using System.Linq;
using System.Web;
using System.Web.Security;
using System.Web.UI;
using System.Web.UI.WebControls;
using System.Web.UI.WebControls.WebParts;
using System.Web.UI.HtmlControls;
using System.IO;

protected void Page_Load(object sender, EventArgs e)
{

}

/// <summary>
/// 将word转换为Html
/// </summary>
/// <param name="sender"></param>
/// <param name="e"></param>
protected void btnConvert_Click(object sender, EventArgs e)
{
try
{

//上传
//uploadWord(File1);
//转换
wordToHtml(File1);
}
catch (Exception ex)
{
throw ex;
}
finally
{
Response.Write("恭喜,转换成功!");
}

}

//上传文件并转换为html wordToHtml(wordFilePath)
///<summary>
///上传文件并转存为html
///</summary>
///<param name="wordFilePath">word文档在客户机的位置</param>
///<returns>上传的html文件的地址</returns>
public string wordToHtml(System.Web.UI.HtmlControls.HtmlInputFile wordFilePath)
{
Microsoft.Office.Interop.Word.ApplicationClass word = new Microsoft.Office.Interop.Word.ApplicationClass();
Type wordType = word.GetType();
Microsoft.Office.Interop.Word.Documents docs = word.Documents;

// 打开文件
Type docsType = docs.GetType();

//应当先把文件上传至服务器然后再解析文件为html
string filePath = uploadWord(wordFilePath);

//判断是否上传文件成功
if (filePath == "0")
return "0";
//判断是否为word文件
if (filePath == "1")
return "1";

object fileName = filePath;

Microsoft.Office.Interop.Word.Document doc = (Microsoft.Office.Interop.Word.Document)docsType.InvokeMember("Open",
System.Reflection.BindingFlags.InvokeMethod, null, docs, new Object[] { fileName, true, true });

// 转换格式,另存为html
Type docType = doc.GetType();

string filename = System.DateTime.Now.Year.ToString() + System.DateTime.Now.Month.ToString() + System.DateTime.Now.Day.ToString() +
System.DateTime.Now.Hour.ToString() + System.DateTime.Now.Minute.ToString() + System.DateTime.Now.Second.ToString();

// 判断指定目录下是否存在文件夹,如果不存在,则创建
if (!Directory.Exists(Server.MapPath("~\\html")))
{
// 创建up文件夹
Directory.CreateDirectory(Server.MapPath("~\\html"));
}

//被转换的html文档保存的位置
string ConfigPath = HttpContext.Current.Server.MapPath("html/" + filename + ".html");
object saveFileName = ConfigPath;

/*下面是Microsoft Word 9 Object Library的写法,如果是10,可能写成:
* docType.InvokeMember("SaveAs", System.Reflection.BindingFlags.InvokeMethod,
* null, doc, new object[]{saveFileName, Word.WdSaveFormat.wdFormatFilteredHTML});
* 其它格式:
* wdFormatHTML
* wdFormatDocument
* wdFormatDOSText
* wdFormatDOSTextLineBreaks
* wdFormatEncodedText
* wdFormatRTF
* wdFormatTemplate
* wdFormatText
* wdFormatTextLineBreaks
* wdFormatUnicodeText
*/
docType.InvokeMember("SaveAs", System.Reflection.BindingFlags.InvokeMethod,
null, doc, new object[] { saveFileName, Microsoft.Office.Interop.Word.WdSaveFormat.wdFormatFilteredHTML });

//关闭文档
docType.InvokeMember("Close", System.Reflection.BindingFlags.InvokeMethod,
null, doc, new object[] { null, null, null });

// 退出 Word
wordType.InvokeMember("Quit", System.Reflection.BindingFlags.InvokeMethod, null, word, null);
//转到新生成的页面
return ("/" + filename + ".html");

}


public string uploadWord(System.Web.UI.HtmlControls.HtmlInputFile uploadFiles)
{
if (uploadFiles.PostedFile != null)
{
string fileName = uploadFiles.PostedFile.FileName;

int extendNameIndex = fileName.LastIndexOf(".");
string extendName = fileName.Substring(extendNameIndex);
string newName = "";
try
{
//验证是否为word格式
if (extendName == ".doc" || extendName == ".docx")
{

DateTime now = DateTime.Now;
newName = now.DayOfYear.ToString() + uploadFiles.PostedFile.ContentLength.ToString();

// 判断指定目录下是否存在文件夹,如果不存在,则创建
if (!Directory.Exists(Server.MapPath("~\\wordTmp")))
{
// 创建up文件夹
Directory.CreateDirectory(Server.MapPath("~\\wordTmp"));
}

//上传路径 指当前上传页面的同一级的目录下面的wordTmp路径
uploadFiles.PostedFile.SaveAs(System.Web.HttpContext.Current.Server.MapPath("wordTmp/" + newName + extendName));
}
else
{
return "1";
}
}
catch
{
return "0";
}
//return "http://" + HttpContext.Current.Request.Url.Host + HttpContext.Current.Request.ApplicationPath + "/wordTmp/" + newName + extendName;
return System.Web.HttpContext.Current.Server.MapPath("wordTmp/" + newName + extendName);
}
else
{
return "0";
}
}
效果图:

转换后的Html文件

C# 将Word文档转换为HTML

C# 将Word文档转换为HTML

       这样就可以简单的在Html中展示word文档中的内容,而不需要在自己进行编辑了。当然,如果有需要的话,可以将转换的Html的路径存入数据库,根据不同的条件直接进行访问。