This commit is contained in:
Daniel 2024-11-17 01:33:21 +08:00
parent 93b687a407
commit 6a442ad85d
No known key found for this signature in database
GPG key ID: 86211BA83DF03017
23 changed files with 259 additions and 15 deletions

View file

@ -24,6 +24,7 @@ import (
"errors"
"fmt"
"math"
mathRand "math/rand"
"mime"
"net/http"
"os"
@ -59,6 +60,116 @@ import (
"github.com/studio-b12/gowebdav"
)
// AutoPurgeRepoJob 自动清理数据仓库 https://github.com/siyuan-note/siyuan/issues/13091
func AutoPurgeRepoJob() {
task.AppendTaskWithTimeout(task.RepoAutoPurge, 12*time.Hour, autoPurgeRepo, true)
}
var (
autoPurgeRepoAfterFirstSync = false
lastAutoPurgeRepo = time.Time{}
)
func autoPurgeRepo(cron bool) {
if cron && !autoPurgeRepoAfterFirstSync {
return
}
if time.Since(lastAutoPurgeRepo) < 6*time.Hour {
return
}
autoPurgeRepoAfterFirstSync = true
defer func() {
lastAutoPurgeRepo = time.Now()
}()
if 1 > len(Conf.Repo.Key) {
return
}
repo, err := newRepository()
if err != nil {
return
}
now := time.Now()
dateGroupedIndexes := map[string][]*dejavu.Log{} // 按照日期分组
// 收集指定日期内需要保留的索引
var date string
page := 1
for {
indexLogs, pageCount, _, err := repo.GetIndexLogs(page, 512)
if nil != err {
logging.LogErrorf("get data repo index logs failed: %s", err)
return
}
if 1 > len(indexLogs) {
break
}
tooOld := false
for _, index := range indexLogs {
if now.UnixMilli()-index.Created <= int64(Conf.Repo.IndexRetentionDays)*24*60*60*1000 {
date = time.UnixMilli(index.Created).Format("2006-01-02")
if _, ok := dateGroupedIndexes[date]; !ok {
dateGroupedIndexes[date] = []*dejavu.Log{}
}
dateGroupedIndexes[date] = append(dateGroupedIndexes[date], index)
} else {
tooOld = true
break
}
}
if tooOld {
break
}
page++
if page > pageCount {
break
}
}
// 筛选出每日需要保留的索引
var retentionIndexIDs []string
for _, indexes := range dateGroupedIndexes {
if len(indexes) <= Conf.Repo.RetentionIndexesDaily {
continue
}
keepIndexes := hashset.New()
keepIndexes.Add(indexes[0]) // 每天最后一个固定保留
// 随机保留指定数量的索引
for i := 0; i < Conf.Repo.RetentionIndexesDaily*7; i++ {
keepIndexes.Add(indexes[mathRand.Intn(len(indexes)-1)])
if keepIndexes.Size() >= Conf.Repo.RetentionIndexesDaily {
break
}
}
for _, keepIndex := range keepIndexes.Values() {
retentionIndexIDs = append(retentionIndexIDs, keepIndex.(*dejavu.Log).ID)
}
}
retentionIndexIDs = gulu.Str.RemoveDuplicatedElem(retentionIndexIDs)
if 1 > len(retentionIndexIDs) {
logging.LogInfof("no index to purge [ellapsed=%.2fs]", time.Since(now).Seconds())
return
}
stat, err := repo.Purge(retentionIndexIDs...)
if err != nil {
return
}
deletedIndexes := stat.Indexes
deletedObjects := stat.Objects
deletedSize := humanize.BytesCustomCeil(uint64(stat.Size), 2)
logging.LogInfof("purge data repo completed [ellapsed=%.2fs, indexes=%d, objects=%d, size=%s]",
time.Since(now).Seconds(), deletedIndexes, deletedObjects, deletedSize)
}
func GetRepoFile(fileID string) (ret []byte, p string, err error) {
if 1 > len(Conf.Repo.Key) {
err = errors.New(Conf.Language(26))
@ -518,6 +629,7 @@ func PurgeRepo() (err error) {
return
}
now := time.Now()
stat, err := repo.Purge()
if err != nil {
return
@ -526,6 +638,8 @@ func PurgeRepo() (err error) {
deletedIndexes := stat.Indexes
deletedObjects := stat.Objects
deletedSize := humanize.BytesCustomCeil(uint64(stat.Size), 2)
logging.LogInfof("purge data repo completed [ellapsed=%.2fs, indexes=%d, objects=%d, size=%s]",
time.Since(now).Seconds(), deletedIndexes, deletedObjects, deletedSize)
msg = fmt.Sprintf(Conf.Language(203), deletedIndexes, deletedObjects, deletedSize)
util.PushMsg(msg, 7000)
return
@ -1348,8 +1462,12 @@ func syncRepo(exit, byHand bool) (dataChanged bool, err error) {
processSyncMergeResult(exit, byHand, mergeResult, trafficStat, "a", elapsed)
if !exit {
// 首次数据同步执行完成后再执行索引订正 Index fixing should not be performed before data synchronization https://github.com/siyuan-note/siyuan/issues/10761
go checkIndex()
go func() {
// 首次数据同步执行完成后再执行索引订正 Index fixing should not be performed before data synchronization https://github.com/siyuan-note/siyuan/issues/10761
checkIndex()
// 索引订正结束后执行数据仓库清理 Automatic purge for local data repo https://github.com/siyuan-note/siyuan/issues/13091
autoPurgeRepo(false)
}()
}
return
}