diff --git a/kernel/go.mod b/kernel/go.mod index f52c69d5b..97bf04e72 100644 --- a/kernel/go.mod +++ b/kernel/go.mod @@ -7,7 +7,7 @@ require ( github.com/88250/css v0.1.2 github.com/88250/gulu v1.2.3-0.20221117052724-cd06804db798 github.com/88250/lute v1.7.6-0.20230220030205-b0f64d7ba66e - github.com/88250/pdfcpu v0.3.14-0.20230223031826-d2ae187e1c38 + github.com/88250/pdfcpu v0.3.14-0.20230223050947-68dec81c7661 github.com/88250/vitess-sqlparser v0.0.0-20210205111146-56a2ded2aba1 github.com/ClarkThan/ahocorasick v0.0.0-20230216061320-bccdb98581a3 github.com/ConradIrwin/font v0.0.0-20210318200717-ce8d41cc0732 diff --git a/kernel/go.sum b/kernel/go.sum index cfc52a2ba..ce5cbe5ea 100644 --- a/kernel/go.sum +++ b/kernel/go.sum @@ -12,6 +12,8 @@ github.com/88250/lute v1.7.6-0.20230220030205-b0f64d7ba66e h1:7UgFzsksh+z6IX2z+B github.com/88250/lute v1.7.6-0.20230220030205-b0f64d7ba66e/go.mod h1:cEoBGi0zArPqAsp0MdG9SKinvH/xxZZWXU7sRx8vHSA= github.com/88250/pdfcpu v0.3.14-0.20230223031826-d2ae187e1c38 h1:MaFRabDTXOpLBrdP4qkZnjFBIUTu/rk8S6fu7hC6jCY= github.com/88250/pdfcpu v0.3.14-0.20230223031826-d2ae187e1c38/go.mod h1:S5YT38L/GCjVjmB4PB84PymA1qfopjEhfhTNQilLpv4= +github.com/88250/pdfcpu v0.3.14-0.20230223050947-68dec81c7661 h1:s8YOfk7TpajM8SBivP0ReIHmNfMQu20hWgEBc98D14w= +github.com/88250/pdfcpu v0.3.14-0.20230223050947-68dec81c7661/go.mod h1:S5YT38L/GCjVjmB4PB84PymA1qfopjEhfhTNQilLpv4= github.com/88250/vitess-sqlparser v0.0.0-20210205111146-56a2ded2aba1 h1:48T899JQDwyyRu9yXHePYlPdHtpJfrJEUGBMH3SMBWY= github.com/88250/vitess-sqlparser v0.0.0-20210205111146-56a2ded2aba1/go.mod h1:U3pckKQIgxxkmZjV5yXQjHdGxQK0o/vEZeZ6cQsxfHw= github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo= diff --git a/kernel/model/export.go b/kernel/model/export.go index b1a14c1c9..7fb32fc4c 100644 --- a/kernel/model/export.go +++ b/kernel/model/export.go @@ -655,38 +655,11 @@ func processIFrame(tree *parse.Tree) { } func ProcessPDF(id, p string, merge, removeAssets bool) (err error) { - inFile := p - links, err := api.ListToCLinks(inFile) - if nil != err { - return - } - - sort.Slice(links, func(i, j int) bool { - return links[i].Page < links[j].Page - }) - - bms := map[string]*pdfcpu.Bookmark{} - for _, link := range links { - linkID := link.URI[strings.LastIndex(link.URI, "/")+1:] - b := sql.GetBlock(linkID) - if nil == b { - logging.LogWarnf("pdf outline block [%s] not found", linkID) - continue - } - title := b.Content - title, _ = url.QueryUnescape(title) - bm := &pdfcpu.Bookmark{ - Title: title, - PageFrom: link.Page, - AbsPos: link.Rect.UR.Y, - } - bms[linkID] = bm - } - tree, _ := loadTreeByBlockID(id) if nil == tree { return } + if merge { var mergeErr error tree, mergeErr = mergeSubDocs(tree) @@ -721,239 +694,271 @@ func ProcessPDF(id, p string, merge, removeAssets bool) (err error) { return ast.WalkContinue }) - if 0 < len(bms) { - var topBms []*pdfcpu.Bookmark - stack := linkedliststack.New() - for _, h := range headings { - L: - for ; ; stack.Pop() { - cur, ok := stack.Peek() - if !ok { - bm := bms[h.ID] - if nil == bm { - break L - } - bm.Level = h.HeadingLevel - stack.Push(bm) - topBms = append(topBms, bm) - break L - } - - tip := cur.(*pdfcpu.Bookmark) - if tip.Level < h.HeadingLevel { - bm := bms[h.ID] - bm.Level = h.HeadingLevel - bm.Parent = tip - tip.Children = append(tip.Children, bm) - stack.Push(bm) - break L - } - } - } - - outFile := inFile + ".tmp" - err = api.AddBookmarksFile(inFile, outFile, topBms, nil) - if nil != err { - logging.LogErrorf("add bookmark failed: %s", err) - return - } - - err = os.Rename(outFile, inFile) - if nil != err { - return - } - } - - var assetAbsPaths []string - for _, dest := range assetDests { - absPath, _ := GetAssetAbsPath(dest) - if "" != absPath { - assetAbsPaths = append(assetAbsPaths, absPath) - } - } - - pdfCtx, ctxErr := api.ReadContextFile(inFile) + pdfCtx, ctxErr := api.ReadContextFile(p) if nil != ctxErr { logging.LogErrorf("read pdf context failed: %s", ctxErr) return } - if 0 < len(assetAbsPaths) { - assetLinks, otherLinks, listErr := api.ListLinks(inFile) - if nil != listErr { - logging.LogErrorf("list asset links failed: %s", listErr) - return - } + processPDFBookmarks(pdfCtx, headings) + processPDFLinkEmbedAssets(pdfCtx, assetDests, removeAssets) - if _, removeErr := pdfCtx.RemoveAnnotations(nil, nil, nil, false); nil != removeErr { - logging.LogWarnf("remove annotations failed: %s", removeErr) - } + pdfcpu.VersionStr = "SiYuan v" + util.Ver + if writeErr := api.WriteContextFile(pdfCtx, p); nil != writeErr { + logging.LogErrorf("write pdf context failed: %s", writeErr) + return + } + return +} - linkMap := map[int][]pdfcpu.AnnotationRenderer{} - for _, link := range otherLinks { - link.URI, _ = url.PathUnescape(link.URI) +func processPDFBookmarks(pdfCtx *pdfcpu.Context, headings []*ast.Node) { + links, err := api.ListToCLinks(pdfCtx) + if nil != err { + return + } + + sort.Slice(links, func(i, j int) bool { + return links[i].Page < links[j].Page + }) + + bms := map[string]*pdfcpu.Bookmark{} + for _, link := range links { + linkID := link.URI[strings.LastIndex(link.URI, "/")+1:] + b := sql.GetBlock(linkID) + if nil == b { + logging.LogWarnf("pdf outline block [%s] not found", linkID) + continue + } + title := b.Content + title, _ = url.QueryUnescape(title) + bm := &pdfcpu.Bookmark{ + Title: title, + PageFrom: link.Page, + AbsPos: link.Rect.UR.Y, + } + bms[linkID] = bm + } + + if 1 > len(bms) { + return + } + + var topBms []*pdfcpu.Bookmark + stack := linkedliststack.New() + for _, h := range headings { + L: + for ; ; stack.Pop() { + cur, ok := stack.Peek() + if !ok { + bm := bms[h.ID] + if nil == bm { + break L + } + bm.Level = h.HeadingLevel + stack.Push(bm) + topBms = append(topBms, bm) + break L + } + + tip := cur.(*pdfcpu.Bookmark) + if tip.Level < h.HeadingLevel { + bm := bms[h.ID] + bm.Level = h.HeadingLevel + bm.Parent = tip + tip.Children = append(tip.Children, bm) + stack.Push(bm) + break L + } + } + } + + err = pdfCtx.AddBookmarks(topBms) + if nil != err { + logging.LogErrorf("add bookmark failed: %s", err) + return + } +} + +// processPDFLinkEmbedAssets 处理资源文件超链接,根据 removeAssets 参数决定是否将资源文件嵌入到 PDF 中。 +// 导出 PDF 时支持将资源文件作为附件嵌入 https://github.com/siyuan-note/siyuan/issues/7414 +func processPDFLinkEmbedAssets(pdfCtx *pdfcpu.Context, assetDests []string, removeAssets bool) { + var assetAbsPaths []string + for _, dest := range assetDests { + if absPath, _ := GetAssetAbsPath(dest); "" != absPath { + assetAbsPaths = append(assetAbsPaths, absPath) + } + } + + if 1 > len(assetAbsPaths) { + return + } + + assetLinks, otherLinks, listErr := api.ListLinks(pdfCtx) + if nil != listErr { + logging.LogErrorf("list asset links failed: %s", listErr) + return + } + + if _, removeErr := pdfCtx.RemoveAnnotations(nil, nil, nil, false); nil != removeErr { + logging.LogWarnf("remove annotations failed: %s", removeErr) + } + + linkMap := map[int][]pdfcpu.AnnotationRenderer{} + for _, link := range otherLinks { + link.URI, _ = url.PathUnescape(link.URI) + if 1 > len(linkMap[link.Page]) { + linkMap[link.Page] = []pdfcpu.AnnotationRenderer{link} + } else { + linkMap[link.Page] = append(linkMap[link.Page], link) + } + } + + attachmentMap := map[int][]*pdfcpu.IndirectRef{} + now := pdfcpu.StringLiteral(pdfcpu.DateString(time.Now())) + for _, link := range assetLinks { + link.URI = strings.ReplaceAll(link.URI, "http://127.0.0.1:6806/export/temp/", "") + link.URI, _ = url.PathUnescape(link.URI) + + if !removeAssets { + // 不移除资源文件夹的话将超链接指向资源文件夹 if 1 > len(linkMap[link.Page]) { linkMap[link.Page] = []pdfcpu.AnnotationRenderer{link} } else { linkMap[link.Page] = append(linkMap[link.Page], link) } + + continue } - attachmentMap := map[int][]*pdfcpu.IndirectRef{} - now := pdfcpu.StringLiteral(pdfcpu.DateString(time.Now())) - for _, link := range assetLinks { - link.URI = strings.ReplaceAll(link.URI, "http://127.0.0.1:6806/export/temp/", "") - link.URI, _ = url.PathUnescape(link.URI) + // 移除资源文件夹的话使用内嵌附件 - if !removeAssets { - // 不移除资源文件夹的话将超链接指向资源文件夹 - if 1 > len(linkMap[link.Page]) { - linkMap[link.Page] = []pdfcpu.AnnotationRenderer{link} - } else { - linkMap[link.Page] = append(linkMap[link.Page], link) - } - - continue - } - - // 移除资源文件夹的话使用内嵌附件 - - absPath, getErr := GetAssetAbsPath(link.URI) - if nil != getErr { - continue - } - - ir, newErr := pdfCtx.XRefTable.NewEmbeddedFileStreamDict(absPath) - if nil != newErr { - logging.LogWarnf("new embedded file stream dict failed: %s", newErr) - continue - } - - fn := filepath.Base(absPath) - fileSpecDict, newErr := pdfCtx.XRefTable.NewFileSpecDict(fn, pdfcpu.EncodeUTF16String(fn), "attached by SiYuan", *ir) - if nil != newErr { - logging.LogWarnf("new file spec dict failed: %s", newErr) - continue - } - - ir, indErr := pdfCtx.XRefTable.IndRefForNewObject(fileSpecDict) - if nil != indErr { - logging.LogWarnf("ind ref for new object failed: %s", indErr) - continue - } - - lx := link.Rect.LL.X + link.Rect.Width() - ly := link.Rect.LL.Y + link.Rect.Height()/2 - ux := lx + link.Rect.Height()/2 - uy := ly + link.Rect.Height()/2 - - d := pdfcpu.Dict( - map[string]pdfcpu.Object{ - "Type": pdfcpu.Name("Annot"), - "Subtype": pdfcpu.Name("FileAttachment"), - "Contents": pdfcpu.StringLiteral(""), - "Rect": pdfcpu.Rect(lx, ly, ux, uy).Array(), - "P": link.P, - "M": now, - "F": pdfcpu.Integer(0), - "Border": pdfcpu.NewIntegerArray(0, 0, 1), - "C": pdfcpu.NewNumberArray(0.5, 0.0, 0.5), - "CA": pdfcpu.Float(0.95), - "CreationDate": now, - "Name": pdfcpu.Name("FileAttachment"), - "FS": *ir, - "NM": pdfcpu.StringLiteral(""), - }, - ) - - ann, indErr := pdfCtx.XRefTable.IndRefForNewObject(d) - if nil != indErr { - logging.LogWarnf("ind ref for new object failed: %s", indErr) - continue - } - - pageDictIndRef, pageErr := pdfCtx.PageDictIndRef(link.Page) - if nil != pageErr { - logging.LogWarnf("page dict ind ref failed: %s", pageErr) - continue - } - - d, defErr := pdfCtx.DereferenceDict(*pageDictIndRef) - if nil != defErr { - logging.LogWarnf("dereference dict failed: %s", defErr) - continue - } - - if 1 > len(attachmentMap[link.Page]) { - attachmentMap[link.Page] = []*pdfcpu.IndirectRef{ann} - } else { - attachmentMap[link.Page] = append(attachmentMap[link.Page], ann) - } + absPath, getErr := GetAssetAbsPath(link.URI) + if nil != getErr { + continue } - if 0 < len(linkMap) { - if _, addErr := pdfCtx.AddAnnotationsMap(linkMap, false); nil != addErr { - logging.LogErrorf("add annotations map failed: %s", addErr) - } + ir, newErr := pdfCtx.XRefTable.NewEmbeddedFileStreamDict(absPath) + if nil != newErr { + logging.LogWarnf("new embedded file stream dict failed: %s", newErr) + continue } - // 添加附件注解指向内嵌的附件 - for page, anns := range attachmentMap { - pageDictIndRef, pageErr := pdfCtx.PageDictIndRef(page) - if nil != pageErr { - logging.LogWarnf("page dict ind ref failed: %s", pageErr) - continue - } + fn := filepath.Base(absPath) + fileSpecDict, newErr := pdfCtx.XRefTable.NewFileSpecDict(fn, pdfcpu.EncodeUTF16String(fn), "attached by SiYuan", *ir) + if nil != newErr { + logging.LogWarnf("new file spec dict failed: %s", newErr) + continue + } - pageDict, defErr := pdfCtx.DereferenceDict(*pageDictIndRef) - if nil != defErr { - logging.LogWarnf("dereference dict failed: %s", defErr) - continue - } + ir, indErr := pdfCtx.XRefTable.IndRefForNewObject(fileSpecDict) + if nil != indErr { + logging.LogWarnf("ind ref for new object failed: %s", indErr) + continue + } - array := pdfcpu.Array{} - for _, ann := range anns { - array = append(array, *ann) - } + lx := link.Rect.LL.X + link.Rect.Width() + ly := link.Rect.LL.Y + link.Rect.Height()/2 + ux := lx + link.Rect.Height()/2 + uy := ly + link.Rect.Height()/2 - obj, found := pageDict.Find("Annots") - if !found { - pageDict.Insert("Annots", array) - pdfCtx.EnsureVersionForWriting() - continue - } + d := pdfcpu.Dict( + map[string]pdfcpu.Object{ + "Type": pdfcpu.Name("Annot"), + "Subtype": pdfcpu.Name("FileAttachment"), + "Contents": pdfcpu.StringLiteral(""), + "Rect": pdfcpu.Rect(lx, ly, ux, uy).Array(), + "P": link.P, + "M": now, + "F": pdfcpu.Integer(0), + "Border": pdfcpu.NewIntegerArray(0, 0, 1), + "C": pdfcpu.NewNumberArray(0.5, 0.0, 0.5), + "CA": pdfcpu.Float(0.95), + "CreationDate": now, + "Name": pdfcpu.Name("FileAttachment"), + "FS": *ir, + "NM": pdfcpu.StringLiteral(""), + }, + ) - ir, ok := obj.(pdfcpu.IndirectRef) - if !ok { - pageDict.Update("Annots", append(obj.(pdfcpu.Array), array...)) - pdfCtx.EnsureVersionForWriting() - continue - } + ann, indErr := pdfCtx.XRefTable.IndRefForNewObject(d) + if nil != indErr { + logging.LogWarnf("ind ref for new object failed: %s", indErr) + continue + } - // Annots array is an IndirectReference. + pageDictIndRef, pageErr := pdfCtx.PageDictIndRef(link.Page) + if nil != pageErr { + logging.LogWarnf("page dict ind ref failed: %s", pageErr) + continue + } - o, err := pdfCtx.Dereference(ir) - if err != nil || o == nil { - continue - } + d, defErr := pdfCtx.DereferenceDict(*pageDictIndRef) + if nil != defErr { + logging.LogWarnf("dereference dict failed: %s", defErr) + continue + } - annots, _ := o.(pdfcpu.Array) - entry, ok := pdfCtx.FindTableEntryForIndRef(&ir) - if !ok { - continue - } - entry.Object = append(annots, array...) + if 1 > len(attachmentMap[link.Page]) { + attachmentMap[link.Page] = []*pdfcpu.IndirectRef{ann} + } else { + attachmentMap[link.Page] = append(attachmentMap[link.Page], ann) + } + } + + if 0 < len(linkMap) { + if _, addErr := pdfCtx.AddAnnotationsMap(linkMap, false); nil != addErr { + logging.LogErrorf("add annotations map failed: %s", addErr) + } + } + + // 添加附件注解指向内嵌的附件 + for page, anns := range attachmentMap { + pageDictIndRef, pageErr := pdfCtx.PageDictIndRef(page) + if nil != pageErr { + logging.LogWarnf("page dict ind ref failed: %s", pageErr) + continue + } + + pageDict, defErr := pdfCtx.DereferenceDict(*pageDictIndRef) + if nil != defErr { + logging.LogWarnf("dereference dict failed: %s", defErr) + continue + } + + array := pdfcpu.Array{} + for _, ann := range anns { + array = append(array, *ann) + } + + obj, found := pageDict.Find("Annots") + if !found { + pageDict.Insert("Annots", array) pdfCtx.EnsureVersionForWriting() + continue } - } - pdfcpu.VersionStr = "SiYuan v" + util.Ver - if writeErr := api.WriteContextFile(pdfCtx, inFile); nil != writeErr { - logging.LogErrorf("write pdf context failed: %s", writeErr) - return - } + ir, ok := obj.(pdfcpu.IndirectRef) + if !ok { + pageDict.Update("Annots", append(obj.(pdfcpu.Array), array...)) + pdfCtx.EnsureVersionForWriting() + continue + } - return + // Annots array is an IndirectReference. + + o, err := pdfCtx.Dereference(ir) + if err != nil || o == nil { + continue + } + + annots, _ := o.(pdfcpu.Array) + entry, ok := pdfCtx.FindTableEntryForIndRef(&ir) + if !ok { + continue + } + entry.Object = append(annots, array...) + pdfCtx.EnsureVersionForWriting() + } } func annotRect(i int, w, h, d, l float64) *pdfcpu.Rectangle {