EXCEL VBA调用adobe的api识别电子PDF发票里内容并登记台账

时间:2024-01-27 09:03:44

EXCEL VBA调用adobe的api识别电子PDF发票里内容并登记台账

实现效果如上
代码如下


使用须知:
1、工具--引用里勾选[Adobe Acrobat 10.0 Type Library]
2、安装Adobe Acrobat pro软件

Dim sht As Worksheet
Function BrowseFolders() As String  '浏览目录
    Dim objshell As Object
    Dim objFolder As Object
    Set objshell = CreateObject("Shell.Application")
    Set objFolder = objshell.BrowseForFolder(0, "请指定发票文件所在的文件夹", 0, 0)
    BrowseFolders = ""
    If Not objFolder Is Nothing Then
        BrowseFolders = objFolder.Self.Path
    End If
    Set objFolder = Nothing
    Set objshell = Nothing
End Function

Sub cmd_getpdf_Click()
    Dim Pth As String '文件路径
    Dim PDFName As String, Wapp As Object, Mstr As String
    Application.ScreenUpdating = False
    '============================================
    Pth = BrowseFolders
    If Pth = "" Then
        Pth = Sheet1.Range("A9").Text
    End If
    If Pth = "" Then
        Pth = ThisWorkbook.Path
    End If
    If Right(Pth, 1) <> "\" Then Pth = Pth & "\"
    Sheet1.Range("A8") = "上次路径:"
    Sheet1.Range("A9") = Pth
    Sheet1.Range("a15:a10000") = ""
    If Dir(Pth & "*.pdf") = "" Then
        MsgBox "指定目录没有找到发票PDF文件!"
        Sheet1.Range("A9") = ""
        Exit Sub
    End If
    'Debug.Print Pth
    '============================================
    For Each sht In ThisWorkbook.Sheets
        Application.DisplayAlerts = False
        If sht.Name = "发票资料读取到Excel" Then sht.Delete
        Application.DisplayAlerts = True
    Next
    Set sht = Worksheets.Add(, Worksheets(Sheets.Count))
    sht.Name = "发票资料读取到Excel"
    sht.Range("A1:J1") = Array("发票号码", "发票日期", "货物或*名称", "规格型号", "单位", "数量", "单价", "金额", "税率", "税额")
    '============================================定义表头字段
    PDFName = Dir(Pth & "*.pdf")
    Do While PDFName <> ""
          Call Imp_Into_XL(Pth & PDFName)
          PDFName = Dir
    Loop
    sht.Columns.AutoFit
    MsgBox "操作完成!"
    '============================================
    Application.ScreenUpdating = True
End Sub

Sub Imp_Into_XL(PDF_File As String)
    Dim AC_PD As Acrobat.AcroPDDoc
    Dim AC_Hi As Acrobat.AcroHiliteList
    Dim AC_PG As Acrobat.AcroPDPage
    Dim AC_PGTxt As Acrobat.AcroPDTextSelect
    Dim Yes_Fir As Boolean
    Dim Ct_Page As Long
    Dim i As Long, j As Long, k As Long, m As Integer
    Dim T_Str As String
    Dim Hld, XL, Brr(), RowNo%, Arr As Variant, sss%
    Dim Hld_Txt As Variant
    Dim FPHM As String   '发票号码
    Dim FPRQ As String   '发票日期
    Dim GGXH As String   '规格型号
    Dim HWMC As String   '货物名称
    Dim SL_SV As String  '数量-税率
    Dim SL_SV_Temp As String  '数量-税率的临时存变量
    Dim HWDW As String   '货物单位
    Dim SL As String     '数量
    Dim DW As String     '单位
    Dim XH As String     '型号
    '====================================================定义字段类型
    Set AC_PD = New Acrobat.AcroPDDoc
    Set AC_Hi = New Acrobat.AcroHiliteList
    AC_Hi.Add 0, 32767
    With AC_PD
        .Open PDF_File
        Ct_Page = .GetNumPages
        If Ct_Page = -1 Then
            MsgBox "请确认发票文件 '" & PDF_File & "'"
            .Close
            GoTo h_end
        End If
        For i = 1 To 1 ' Ct_Page    '只考虑一个文档有一张发票的情形
            T_Str = ""
            Set AC_PG = .AcquirePage(i - 1)
            Set AC_PGTxt = AC_PG.CreateWordHilite(AC_Hi)
            If Not AC_PGTxt Is Nothing Then
                With AC_PGTxt
                    For j = 0 To .GetNumText - 1
                        T_Str = T_Str & .GetText(j)
                    Next j
                End With
            End If
            '==========================================================
            If T_Str <> "" Then
                    Hld_Txt = Split(T_Str, vbCrLf)
                    FPHM = "": FPRQ = "":: GGXH = "": HWMC = ""
                    For j = 0 To UBound(Hld_Txt)
                          If InStr(Hld_Txt(j), "年月日") = 0 Then
                          If InStr(Hld_Txt(j), "年") > 0 And InStr(Hld_Txt(j), "月") > 0 And InStr(Hld_Txt(j), "日") > 0 Then   '当字符串里含有年月日时
                             Hld_Txt(j) = Repce2(Hld_Txt(j))
                             Hld_Txt(j) = Trim(Replace(Hld_Txt(j), "开票日期:", ""))            '如果有"开票日期:"几个字,将其替换掉
                             FPRQ = Left(Hld_Txt(j), 4) & "-" & Mid(Hld_Txt(j), 6, 2) & "-" & Mid(Hld_Txt(j), 9, 2)
                             Exit For
                          End If
                          End If
                    Next j
                    For j = 0 To UBound(Hld_Txt)
                      If TestNumber(Hld_Txt(j)) Then   '测试是否含有数字并以数字结尾的类型,加以判断
                        If Len(Hld_Txt(j)) = 10 And TestCH(Hld_Txt(j)) = False Then '当字符串里没有年月日,但是以"2023 06 30"有空格,共有10个字符串位置形式存在时取得发票日期
                          If InStr(Hld_Txt(j), " ") > 0 And UBound(Split(Hld_Txt(j), " ")) > 0 Then
                             FPRQ = "'" & RegR(Hld_Txt(j))    '取得发票日期
                             Exit For
                          End If
                        End If
                      End If
                    Next j
                    For j = 0 To UBound(Hld_Txt)
                       If TestNumber(Hld_Txt(j)) Then   '测试是否含有数字并以数字结尾的类型,加以判断
                        Hld_Txt(j) = Trim(Replace(Hld_Txt(j), "发票号码:", ""))            '如果有"发票号码:"几个字,将其替换掉
                        If Len(Hld_Txt(j)) = 8 Or Len(Hld_Txt(j)) = 20 Then          '//***限定要取出的发票号码为8位或者20位数字,否则发票号码取不出来
                          If IsNumeric(Hld_Txt(j)) Then
                             If InStr(Hld_Txt(j), ".") = 0 And InStr(Hld_Txt(j), ChrW(165)) = 0 Then
                               FPHM = Regs(Hld_Txt(j)) '取得8位或者20位的发票号码
                               Exit For
                             End If
                          End If
                        End If
                       End If
                   Next j
                   k = 0
                   For j = 0 To UBound(Hld_Txt)
                        If Len(Trim(Hld_Txt(j))) > 2 Then        '//***当字符数大于2,有的只有一个*,这种情形需要排除
                        If Left(Trim(Hld_Txt(j)), 1) = "*" Or InStr(Hld_Txt(j), "详见") > 0 Then  '////当货物名称前面第一个字符是*号或者含有(详见)时
                        Arr = Array("+", "<", ">")   '/***密码区有许多有这几个符号,遇到了就避开它
                        sss = 0
                        For m = LBound(Arr) To UBound(Arr)               '//***避免遇到密码区以*号开头,并且有Arr数组里符号的情形
                          If InStr(Hld_Txt(j), Arr(m)) > 0 Then sss = sss + 1
                        Next m
                        If sss = 0 Then
                           Hld_Txt(j) = Trim(Hld_Txt(j))                '清除前后空格
                           Hld_Txt(j) = StrConv(Hld_Txt(j), vbNarrow)   '全角转为半角
                           Hld_Txt(j) = Repce(Hld_Txt(j))               '将字符串中多个空格变成一个
                           If InStr(Hld_Txt(j), "%") > 0 Or Right(Trim(Hld_Txt(j)), 1) = "*" Then
                              For m = UBound(Split(Hld_Txt(j), " ")) To 0 Step -1
                                  If TestCHNum(Split(Hld_Txt(j), " ")(m)) = False Or Trim(Split(Hld_Txt(j), " ")(m)) = "*" Then   '循环判定,取出有数字的数量-税额部分//有部分的金额和税额是*号
                                     If TestCH(Split(Hld_Txt(j), " ")(m)) = True And InStr(Hld_Txt(j), "不征税") = 0 Then Exit For
                                     SL_SV = Split(Hld_Txt(j), " ")(m) & " " & SL_SV
                                     SL_SV_Temp = Split(Hld_Txt(j), " ")(m) & " " & SL_SV_Temp       '增加这个变量,存下原始的数量金额部分
                                     If InStr(SL_SV, "不征税") > 0 And Len(SL_SV) > 3 Then SL_SV = Left(SL_SV, InStr(SL_SV, "税")) & " " & Right(SL_SV, Len(SL_SV) - InStr(SL_SV, "税"))
                                     SL_SV = Trim(SL_SV)
                                     SL_SV_Temp = Trim(SL_SV_Temp)
                                     If m < UBound(Split(Hld_Txt(j), " ")) And Split(Hld_Txt(j), " ")(m) < 0 Then Exit For
                                  ElseIf TestCHNum(Split(Hld_Txt(j), " ")(m)) = True Then    '循环判定,如含有中文+数字,则需拆分
                                     SL_SV = RegSL(Split(Hld_Txt(j), " ")(m)) & " " & SL_SV
                                     SL_SV_Temp = RegSL(Split(Hld_Txt(j), " ")(m)) & " " & SL_SV_Temp
                                     SL_SV = Trim(SL_SV)
                                     SL_SV_Temp = Trim(SL_SV_Temp)
                                     Exit For
                                  End If
                              Next m
                              SL_SV = Repce(SL_SV): SL_SV_Temp = Repce(SL_SV_Temp)  '用原始的数量金额部分来方便取出GGXH
                              GGXH = Trim(Replace(Hld_Txt(j), SL_SV_Temp, ""))  '去掉数量-税额部分,下余的是规格型号   ////***前面做过变动后,这里用replace取不出余下的规格型号
                              SL_SV_Temp = ""
                              SL_SV = SL_JE(SL_SV)                         '数量-税额部分,不能用trim去掉前面空格
                              If InStr(GGXH, "费") > 0 Then
                                 HWMC = Left(GGXH, InStr(GGXH, "费"))         '货物名称,有费字的取费字前面字符(含费字)作为货物名称
                                 GGXH = Trim(Replace(GGXH, HWMC, ""))         '费字后面的是规格型号+单位
                              Else
                                 If InStr(GGXH, " ") = 0 Then
                                    HWMC = GGXH: GGXH = ""                   '规格型号没有包含空格时,货物名称就取ggxh,将原来的ggxh置空
                                 Else
                                    HWMC = Split(GGXH, " ")(0)               '规格型号有包含空格时,货物名称取ggxh的第一个空格前的字符
                                    GGXH = Trim(Replace(GGXH, HWMC, ""))     '规格型号取除了货物名称后的余下的值
                                 End If
                              End If
                              If InStr(GGXH, " ") = 0 Then          '当规格型号没有空格时********
                                 Select Case Len(GGXH)
                                     Case Is = 0                    '当费后面的字符数量为0时
                                        If Split(SL_SV, " ")(0) = "" Then     '当数据部分第一个字符为空时,货物名称就只为货物名称
                                           HWMC = HWMC & " " & " "
                                        Else                                '当数据部分第一个字符不为空时,货物名称取最后一个值为单位,次一个值为规格型号
                                           If Mid(HWMC, Len(HWMC) - 1, 2) = "服务" Or InStr(HWMC, "费") > 0 Then
                                              HWMC = HWMC & " " & " "                '当货物名称最后两个字是"服务"时或含有"费",已经不能拆开了.
                                           ElseIf InStr(HWMC, "费") = 0 Then
                                              DW = Right(HWMC, 1)                      '取右边一位做单位*****
                                              XH = Mid(HWMC, Len(HWMC) - 1, 1)
                                              HWMC = Left(HWMC, Len(HWMC) - 2)
                                              If InStr(HWMC, XH & DW) > 0 Or InStr(HWMC, XH) > 0 Or InStr(HWMC, DW) > 0 Then
                                                 HWMC = HWMC & XH & DW & " " & " "
                                              Else
                                                 HWMC = HWMC & " " & XH & " " & DW
                                              End If
                                           End If
                                        End If
                                     Case Is >= 1                   '当费后面的字符数量为1或者大于1时
                                        DW = Right(GGXH, 1)                         '取右边一位做单位
                                        XH = Replace(GGXH, DW, "")               '余下的是型号
                                        If Split(SL_SV, " ")(0) = "" Then
                                           HWMC = HWMC & " " & " "
                                        Else
                                           If XH <> "" Then
                                              HWMC = HWMC & " " & XH & " " & DW
                                           Else
                                              HWMC = HWMC & " " & " " & DW
                                           End If
                                        End If
                                 End Select
                              ElseIf InStr(GGXH, " ") > 0 Then       '当规格型号有空格时
                                 If Split(SL_SV, " ")(0) <> "" Then
                                    HWDW = Split(GGXH, " ")(UBound(Split(GGXH, " ")))     '单位
                                    If Len(HWDW) > 1 Then
                                       HWDW = Right(HWDW, 1)
                                       GGXH = Replace(GGXH, HWDW, "")
                                       GGXH = Replace(GGXH, " ", "_")
                                       HWMC = HWMC & " " & GGXH & " " & HWDW
                                    Else
                                       XH = Trim(Replace(GGXH, HWDW, ""))             '规格型号
                                       If XH = "" Then
                                          If Len(HWDW) > 1 Then
                                             DW = Right(HWDW, 1)
                                             XH = Replace(HWDW, DW, "")
                                             HWMC = HWMC & " " & XH & " " & DW
                                          ElseIf Len(HWDW) = 1 Then
                                             HWMC = HWMC & " " & " " & DW
                                          End If
                                       Else
                                          DW = HWDW
                                          XH = Trim(Replace(XH, " ", "_"))                 '去掉规格型号中的空格,用下横线连接
                                          HWMC = HWMC & " " & XH & " " & DW
                                       End If
                                     End If
                                 ElseIf Split(SL_SV, " ")(0) = "" Then
                                     XH = Replace(GGXH, " ", "_")                 '去掉规格型号中的空格,用下横线连接
                                     HWMC = HWMC & " " & XH & " "                 '没有单位,要加上表示单位的空格
                                 End If
                              End If
                           ElseIf UBound(Split(Hld_Txt(j), " ")) <= 2 And InStr(Hld_Txt(j), "%") = 0 Then           '当品名与数量金额等不在同一行时
                              HWMC = Hld_Txt(j)
                              For m = j To UBound(Hld_Txt