在C#中将MS字表转换为html

时间:2022-10-30 15:03:24

I'm writing a Word Addin with C# that replace all formats to xml tags, now I want convert tables in word to html with standard tags, tables may different in rows count and columns count, I mean table contains merged cells or columns

我正在编写一个带有C#的Word Addin,它将所有格式替换为xml标签,现在我希望将word转换为带有标准标签的html表,行数和列数可能不同,我的意思是表包含合并的单元格或列

same:

-------------------------
|  1  |  2  |  3  |  4  |
|     -------------------
|     |  5  |  6  |  7  |
|     -------------------
|     |        8        |
|     -------------------
|     |  9  | 10  | 11  |
|------------------------
| 12  | 13  | 14  | 15  |
-------------------------

that cell 1 is merge of four rows in one column and cell 8 is merge of three columns in one row

单元格1在一列中合并四行,单元格8在一行中合并三列

How can I convert it?

我怎么转换它?

3 个解决方案

#1


1  

We ran into similar projects a while ago and hope below codes could give you a start. HTML Part

我们前一段时间遇到过类似的项目,希望下面的代码可以给你一个开始。 HTML部分

<span style="font-size:18px;"><div>  
    <input id="File1" type="file" runat="server"/>  
    <asp:Button ID="btnConvert" runat="server" Text="Convert" OnClick="btnConvert_Click" />  
</div></span> 

C# Part using System;
using System.Data;
using System.Configuration;
using System.Collections;
using System.Collections.Generic;
using System.Linq;
using System.Web;
using System.Web.Security;
using System.Web.UI;
using System.Web.UI.WebControls;
using System.Web.UI.WebControls.WebParts;
using System.Web.UI.HtmlControls;
using System.IO;

C#部分使用系统;使用System.Data;使用System.Configuration;使用System.Collections;使用System.Collections.Generic;使用System.Linq;使用System.Web;使用System.Web.Security;使用System.Web.UI;使用System.Web.UI.WebControls;使用System.Web.UI.WebControls.WebParts;使用System.Web.UI.HtmlControls;使用System.IO;

protected void Page_Load(object sender, EventArgs e)  
        {  

        }  

        /// <summary>  
        /// word to Html  
        /// </summary>  
        /// <param name="sender"></param>  
        /// <param name="e"></param>  
        protected void btnConvert_Click(object sender, EventArgs e)  
        {  
            try  
            {  

                //upload  
                //uploadWord(File1);  
                //convert  
                wordToHtml(File1);  
            }  
            catch (Exception ex)  
            {  
                throw ex;  
            }  
            finally  
            {  
                Response.Write("Convert successfully!");  
            }  

        }  

        //upload and convert to html wordToHtml(wordFilePath)  
        ///<summary>  
        ///upload and save as html  
        ///</summary>  
        ///<param name="wordFilePath">word doc file path on client machine</param>  
        ///<returns>html save path</returns>  
        public string wordToHtml(System.Web.UI.HtmlControls.HtmlInputFile wordFilePath)  
        {  
            Microsoft.Office.Interop.Word.ApplicationClass word = new Microsoft.Office.Interop.Word.ApplicationClass();  
            Type wordType = word.GetType();  
            Microsoft.Office.Interop.Word.Documents docs = word.Documents;  

            // open doc file  
            Type docsType = docs.GetType();  

            //upload to server and parse as html  
            string filePath = uploadWord(wordFilePath);  

            //if upload is success  
            if (filePath == "0")  
                return "0";  
            //if file is word  
            if (filePath == "1")  
                return "1";  

            object fileName = filePath;  

            Microsoft.Office.Interop.Word.Document doc = (Microsoft.Office.Interop.Word.Document)docsType.InvokeMember("Open",  
            System.Reflection.BindingFlags.InvokeMethod, null, docs, new Object[] { fileName, true, true });  

            // convert and save  
            Type docType = doc.GetType();  

            string filename = System.DateTime.Now.Year.ToString() + System.DateTime.Now.Month.ToString() + System.DateTime.Now.Day.ToString() +  
            System.DateTime.Now.Hour.ToString() + System.DateTime.Now.Minute.ToString() + System.DateTime.Now.Second.ToString();  

            // if directory exist, create if not  
            if (!Directory.Exists(Server.MapPath("~\\html")))  
            {  
                // create upload directory  
                Directory.CreateDirectory(Server.MapPath("~\\html"));  
            }  

            //html save location  
            string ConfigPath = HttpContext.Current.Server.MapPath("html/" + filename + ".html");  
            object saveFileName = ConfigPath;  

            /*For Microsoft Word 9 Object Library, if using 10, then use below: 
         * docType.InvokeMember("SaveAs", System.Reflection.BindingFlags.InvokeMethod, 
         * null, doc, new object[]{saveFileName, Word.WdSaveFormat.wdFormatFilteredHTML}); 
         * other formats: 
         * wdFormatHTML 
         * wdFormatDocument 
         * wdFormatDOSText 
         * wdFormatDOSTextLineBreaks 
         * wdFormatEncodedText 
         * wdFormatRTF 
         * wdFormatTemplate 
         * wdFormatText 
         * wdFormatTextLineBreaks 
         * wdFormatUnicodeText 
         */  
            docType.InvokeMember("SaveAs", System.Reflection.BindingFlags.InvokeMethod,  
            null, doc, new object[] { saveFileName, Microsoft.Office.Interop.Word.WdSaveFormat.wdFormatFilteredHTML });  

            //close document 
            docType.InvokeMember("Close", System.Reflection.BindingFlags.InvokeMethod,  
            null, doc, new object[] { null, null, null });  

            // exit Word  
            wordType.InvokeMember("Quit", System.Reflection.BindingFlags.InvokeMethod, null, word, null);  
            //goto converted html page  
            return ("/" + filename + ".html");  

        }  


        public string uploadWord(System.Web.UI.HtmlControls.HtmlInputFile uploadFiles)  
        {  
            if (uploadFiles.PostedFile != null)  
            {  
                string fileName = uploadFiles.PostedFile.FileName;  

                int extendNameIndex = fileName.LastIndexOf(".");  
                string extendName = fileName.Substring(extendNameIndex);  
                string newName = "";  
                try  
                {  
                    //check if is word format  
                    if (extendName == ".doc" || extendName == ".docx")  
                    {  

                        DateTime now = DateTime.Now;  
                        newName = now.DayOfYear.ToString() + uploadFiles.PostedFile.ContentLength.ToString();  

                        // check if directory exist, create one if not  
                        if (!Directory.Exists(Server.MapPath("~\\wordTmp")))  
                        {  
                            // create upload directory  
                            Directory.CreateDirectory(Server.MapPath("~\\wordTmp"));  
                        }  

                        //upload path, wordTemp relative to parrent  
                        uploadFiles.PostedFile.SaveAs(System.Web.HttpContext.Current.Server.MapPath("wordTmp/" + newName + extendName));  
                    }  
                    else  
                    {  
                        return "1";  
                    }  
                }  
                catch  
                {  
                    return "0";  
                }  
                //return "http://" + HttpContext.Current.Request.Url.Host + HttpContext.Current.Request.ApplicationPath + "/wordTmp/" + newName + extendName;  
                return System.Web.HttpContext.Current.Server.MapPath("wordTmp/" + newName + extendName);  
            }  
            else  
            {  
                return "0";  
            }  
        }</span>  

#2


0  

This is simple trick just copy word table & paste in dreamweaver -> Design Mode. When you see in code mode you will get all html tags from there you can just copy that code & paste in c# file.

这是一个简单的技巧,只需在Dreamweaver中复制word表和粘贴 - >设计模式。当您在代码模式中看到时,您将从那里获得所有html标记,您只需将该代码复制并粘贴到c#文件中即可。

Enjoy.

#3


0  

tanx for reply, i found a way to convert word tables to html.
i write this code:

坦克回复,我找到了一种方法将单词表转换为HTML。我写这段代码:

private static void ConvertTableToHTML()
    {
        try
        {
            foreach (Table tb in Common.WordApplication.ActiveDocument.Tables)
            {

                for (int r = 1; r <= tb.Rows.Count; r++)
                {
                    for (int c = 1; c <= tb.Columns.Count; c++)
                    {
                        try
                        {
                            Cell cell = tb.Cell(r, c);
                            foreach (Paragraph paragraph in cell.Range.Paragraphs)
                            {
                                Tagging(paragraph.Range, "P");
                            }
                            Tagging(cell.Range, "TD");                                
                        }
                        catch (Exception e)
                        {
                            if (e.Message.Contains("The requested member of the collection does not exist."))
                            {
                                //Most likely a part of a merged cell, so skip over.
                            }
                            else throw;
                        }
                    }
                    try
                    {
                        Row row = tb.Rows[r];
                        Tagging(row.Range, "TR");                            
                    }
                    catch (Exception ex)
                    {
                        bool initialTrTagInserted = false;
                        int columnsIndex = 1;
                        int columnsCount = tb.Columns.Count;
                        while (!initialTrTagInserted && columnsIndex <= columnsCount)
                        {
                            try
                            {
                                Cell cell = tb.Cell(r, columnsIndex);
                                cell.Range.InsertBefore("<TR>");
                                initialTrTagInserted = true;
                            }
                            catch (Exception e)
                            {
                            }
                            columnsIndex++;
                        }

                        columnsIndex = tb.Columns.Count;
                        bool endTrTagInserted = false;
                        while (!endTrTagInserted && columnsIndex >= 1)
                        {
                            try
                            {
                                Cell cell = tb.Cell(r, columnsIndex);
                                cell.Range.InsertAfter("</TR>");
                                endTrTagInserted = true;
                            }
                            catch (Exception e)
                            {
                            }
                            columnsIndex--;
                        }
                    }
                }
                Common.Tagging2(tb.Range, "Table");                    

                object separator = "";
                object nestedTable = true;
                tb.ConvertToText(separator, nestedTable);
            }
        }
        catch (Exception ex) { MessageBox.Show(ex.Message); }
    }


public static void Tagging(Range range, string TagName)
    {
        try
        {
            range.InsertBefore("<" + TagName + ">");
            range.InsertAfter("</" + TagName + ">");
        }
        catch (Exception ex) { throw new Exception(ex.Message); }
    }

#1


1  

We ran into similar projects a while ago and hope below codes could give you a start. HTML Part

我们前一段时间遇到过类似的项目,希望下面的代码可以给你一个开始。 HTML部分

<span style="font-size:18px;"><div>  
    <input id="File1" type="file" runat="server"/>  
    <asp:Button ID="btnConvert" runat="server" Text="Convert" OnClick="btnConvert_Click" />  
</div></span> 

C# Part using System;
using System.Data;
using System.Configuration;
using System.Collections;
using System.Collections.Generic;
using System.Linq;
using System.Web;
using System.Web.Security;
using System.Web.UI;
using System.Web.UI.WebControls;
using System.Web.UI.WebControls.WebParts;
using System.Web.UI.HtmlControls;
using System.IO;

C#部分使用系统;使用System.Data;使用System.Configuration;使用System.Collections;使用System.Collections.Generic;使用System.Linq;使用System.Web;使用System.Web.Security;使用System.Web.UI;使用System.Web.UI.WebControls;使用System.Web.UI.WebControls.WebParts;使用System.Web.UI.HtmlControls;使用System.IO;

protected void Page_Load(object sender, EventArgs e)  
        {  

        }  

        /// <summary>  
        /// word to Html  
        /// </summary>  
        /// <param name="sender"></param>  
        /// <param name="e"></param>  
        protected void btnConvert_Click(object sender, EventArgs e)  
        {  
            try  
            {  

                //upload  
                //uploadWord(File1);  
                //convert  
                wordToHtml(File1);  
            }  
            catch (Exception ex)  
            {  
                throw ex;  
            }  
            finally  
            {  
                Response.Write("Convert successfully!");  
            }  

        }  

        //upload and convert to html wordToHtml(wordFilePath)  
        ///<summary>  
        ///upload and save as html  
        ///</summary>  
        ///<param name="wordFilePath">word doc file path on client machine</param>  
        ///<returns>html save path</returns>  
        public string wordToHtml(System.Web.UI.HtmlControls.HtmlInputFile wordFilePath)  
        {  
            Microsoft.Office.Interop.Word.ApplicationClass word = new Microsoft.Office.Interop.Word.ApplicationClass();  
            Type wordType = word.GetType();  
            Microsoft.Office.Interop.Word.Documents docs = word.Documents;  

            // open doc file  
            Type docsType = docs.GetType();  

            //upload to server and parse as html  
            string filePath = uploadWord(wordFilePath);  

            //if upload is success  
            if (filePath == "0")  
                return "0";  
            //if file is word  
            if (filePath == "1")  
                return "1";  

            object fileName = filePath;  

            Microsoft.Office.Interop.Word.Document doc = (Microsoft.Office.Interop.Word.Document)docsType.InvokeMember("Open",  
            System.Reflection.BindingFlags.InvokeMethod, null, docs, new Object[] { fileName, true, true });  

            // convert and save  
            Type docType = doc.GetType();  

            string filename = System.DateTime.Now.Year.ToString() + System.DateTime.Now.Month.ToString() + System.DateTime.Now.Day.ToString() +  
            System.DateTime.Now.Hour.ToString() + System.DateTime.Now.Minute.ToString() + System.DateTime.Now.Second.ToString();  

            // if directory exist, create if not  
            if (!Directory.Exists(Server.MapPath("~\\html")))  
            {  
                // create upload directory  
                Directory.CreateDirectory(Server.MapPath("~\\html"));  
            }  

            //html save location  
            string ConfigPath = HttpContext.Current.Server.MapPath("html/" + filename + ".html");  
            object saveFileName = ConfigPath;  

            /*For Microsoft Word 9 Object Library, if using 10, then use below: 
         * docType.InvokeMember("SaveAs", System.Reflection.BindingFlags.InvokeMethod, 
         * null, doc, new object[]{saveFileName, Word.WdSaveFormat.wdFormatFilteredHTML}); 
         * other formats: 
         * wdFormatHTML 
         * wdFormatDocument 
         * wdFormatDOSText 
         * wdFormatDOSTextLineBreaks 
         * wdFormatEncodedText 
         * wdFormatRTF 
         * wdFormatTemplate 
         * wdFormatText 
         * wdFormatTextLineBreaks 
         * wdFormatUnicodeText 
         */  
            docType.InvokeMember("SaveAs", System.Reflection.BindingFlags.InvokeMethod,  
            null, doc, new object[] { saveFileName, Microsoft.Office.Interop.Word.WdSaveFormat.wdFormatFilteredHTML });  

            //close document 
            docType.InvokeMember("Close", System.Reflection.BindingFlags.InvokeMethod,  
            null, doc, new object[] { null, null, null });  

            // exit Word  
            wordType.InvokeMember("Quit", System.Reflection.BindingFlags.InvokeMethod, null, word, null);  
            //goto converted html page  
            return ("/" + filename + ".html");  

        }  


        public string uploadWord(System.Web.UI.HtmlControls.HtmlInputFile uploadFiles)  
        {  
            if (uploadFiles.PostedFile != null)  
            {  
                string fileName = uploadFiles.PostedFile.FileName;  

                int extendNameIndex = fileName.LastIndexOf(".");  
                string extendName = fileName.Substring(extendNameIndex);  
                string newName = "";  
                try  
                {  
                    //check if is word format  
                    if (extendName == ".doc" || extendName == ".docx")  
                    {  

                        DateTime now = DateTime.Now;  
                        newName = now.DayOfYear.ToString() + uploadFiles.PostedFile.ContentLength.ToString();  

                        // check if directory exist, create one if not  
                        if (!Directory.Exists(Server.MapPath("~\\wordTmp")))  
                        {  
                            // create upload directory  
                            Directory.CreateDirectory(Server.MapPath("~\\wordTmp"));  
                        }  

                        //upload path, wordTemp relative to parrent  
                        uploadFiles.PostedFile.SaveAs(System.Web.HttpContext.Current.Server.MapPath("wordTmp/" + newName + extendName));  
                    }  
                    else  
                    {  
                        return "1";  
                    }  
                }  
                catch  
                {  
                    return "0";  
                }  
                //return "http://" + HttpContext.Current.Request.Url.Host + HttpContext.Current.Request.ApplicationPath + "/wordTmp/" + newName + extendName;  
                return System.Web.HttpContext.Current.Server.MapPath("wordTmp/" + newName + extendName);  
            }  
            else  
            {  
                return "0";  
            }  
        }</span>  

#2


0  

This is simple trick just copy word table & paste in dreamweaver -> Design Mode. When you see in code mode you will get all html tags from there you can just copy that code & paste in c# file.

这是一个简单的技巧,只需在Dreamweaver中复制word表和粘贴 - >设计模式。当您在代码模式中看到时,您将从那里获得所有html标记,您只需将该代码复制并粘贴到c#文件中即可。

Enjoy.

#3


0  

tanx for reply, i found a way to convert word tables to html.
i write this code:

坦克回复,我找到了一种方法将单词表转换为HTML。我写这段代码:

private static void ConvertTableToHTML()
    {
        try
        {
            foreach (Table tb in Common.WordApplication.ActiveDocument.Tables)
            {

                for (int r = 1; r <= tb.Rows.Count; r++)
                {
                    for (int c = 1; c <= tb.Columns.Count; c++)
                    {
                        try
                        {
                            Cell cell = tb.Cell(r, c);
                            foreach (Paragraph paragraph in cell.Range.Paragraphs)
                            {
                                Tagging(paragraph.Range, "P");
                            }
                            Tagging(cell.Range, "TD");                                
                        }
                        catch (Exception e)
                        {
                            if (e.Message.Contains("The requested member of the collection does not exist."))
                            {
                                //Most likely a part of a merged cell, so skip over.
                            }
                            else throw;
                        }
                    }
                    try
                    {
                        Row row = tb.Rows[r];
                        Tagging(row.Range, "TR");                            
                    }
                    catch (Exception ex)
                    {
                        bool initialTrTagInserted = false;
                        int columnsIndex = 1;
                        int columnsCount = tb.Columns.Count;
                        while (!initialTrTagInserted && columnsIndex <= columnsCount)
                        {
                            try
                            {
                                Cell cell = tb.Cell(r, columnsIndex);
                                cell.Range.InsertBefore("<TR>");
                                initialTrTagInserted = true;
                            }
                            catch (Exception e)
                            {
                            }
                            columnsIndex++;
                        }

                        columnsIndex = tb.Columns.Count;
                        bool endTrTagInserted = false;
                        while (!endTrTagInserted && columnsIndex >= 1)
                        {
                            try
                            {
                                Cell cell = tb.Cell(r, columnsIndex);
                                cell.Range.InsertAfter("</TR>");
                                endTrTagInserted = true;
                            }
                            catch (Exception e)
                            {
                            }
                            columnsIndex--;
                        }
                    }
                }
                Common.Tagging2(tb.Range, "Table");                    

                object separator = "";
                object nestedTable = true;
                tb.ConvertToText(separator, nestedTable);
            }
        }
        catch (Exception ex) { MessageBox.Show(ex.Message); }
    }


public static void Tagging(Range range, string TagName)
    {
        try
        {
            range.InsertBefore("<" + TagName + ">");
            range.InsertAfter("</" + TagName + ">");
        }
        catch (Exception ex) { throw new Exception(ex.Message); }
    }