This commit is contained in:
Daniel 2024-11-17 01:33:21 +08:00
parent 93b687a407
commit 6a442ad85d
No known key found for this signature in database
GPG key ID: 86211BA83DF03017
23 changed files with 259 additions and 15 deletions

View file

@ -29,6 +29,40 @@ import (
"github.com/siyuan-note/siyuan/kernel/util"
)
func setRepoIndexRetentionDays(c *gin.Context) {
ret := gulu.Ret.NewResult()
defer c.JSON(http.StatusOK, ret)
arg, ok := util.JsonArg(c, ret)
if !ok {
return
}
days := int(arg["days"].(float64))
if 1 > days {
days = 180
}
model.Conf.Repo.IndexRetentionDays = days
model.Conf.Save()
}
func setRetentionIndexesDaily(c *gin.Context) {
ret := gulu.Ret.NewResult()
defer c.JSON(http.StatusOK, ret)
arg, ok := util.JsonArg(c, ret)
if !ok {
return
}
indexes := int(arg["indexes"].(float64))
if 1 > indexes {
indexes = 180
}
model.Conf.Repo.RetentionIndexesDaily = indexes
model.Conf.Save()
}
func getRepoFile(c *gin.Context) {
// Add internal kernel API `/api/repo/getRepoFile` https://github.com/siyuan-note/siyuan/issues/10101

View file

@ -390,6 +390,8 @@ func ServeAPI(ginServer *gin.Engine) {
ginServer.Handle("POST", "/api/repo/diffRepoSnapshots", model.CheckAuth, model.CheckAdminRole, diffRepoSnapshots)
ginServer.Handle("POST", "/api/repo/openRepoSnapshotDoc", model.CheckAuth, model.CheckAdminRole, openRepoSnapshotDoc)
ginServer.Handle("POST", "/api/repo/getRepoFile", model.CheckAuth, model.CheckAdminRole, getRepoFile)
ginServer.Handle("POST", "/api/repo/setRepoIndexRetentionDays", model.CheckAuth, model.CheckAdminRole, setRepoIndexRetentionDays)
ginServer.Handle("POST", "/api/repo/setRetentionIndexesDaily", model.CheckAuth, model.CheckAdminRole, setRetentionIndexesDaily)
ginServer.Handle("POST", "/api/riff/createRiffDeck", model.CheckAuth, model.CheckAdminRole, model.CheckReadonly, createRiffDeck)
ginServer.Handle("POST", "/api/riff/renameRiffDeck", model.CheckAuth, model.CheckAdminRole, model.CheckReadonly, renameRiffDeck)

View file

@ -29,11 +29,17 @@ type Repo struct {
// If the data repo indexing time is greater than 12s, prompt user to purge the data repo https://github.com/siyuan-note/siyuan/issues/9613
// Supports configuring data sync index time-consuming prompts https://github.com/siyuan-note/siyuan/issues/9698
SyncIndexTiming int64 `json:"syncIndexTiming"`
// 自动清理数据仓库 Automatic purge for local data repo https://github.com/siyuan-note/siyuan/issues/13091
IndexRetentionDays int `json:"indexRetentionDays"` // 索引保留天数
RetentionIndexesDaily int `json:"retentionIndexesDaily"` // 每日保留索引数
}
func NewRepo() *Repo {
return &Repo{
SyncIndexTiming: 12 * 1000,
SyncIndexTiming: 12 * 1000,
IndexRetentionDays: 180,
RetentionIndexesDaily: 2,
}
}

View file

@ -56,7 +56,7 @@ require (
github.com/sabhiram/go-gitignore v0.0.0-20210923224102-525f6e181f06
github.com/sashabaranov/go-openai v1.29.1
github.com/shirou/gopsutil/v3 v3.24.5
github.com/siyuan-note/dejavu v0.0.0-20241116085410-899893f6ae97
github.com/siyuan-note/dejavu v0.0.0-20241116172754-506198a10836
github.com/siyuan-note/encryption v0.0.0-20231219001248-1e028a4d13b4
github.com/siyuan-note/eventbus v0.0.0-20240627125516-396fdb0f0f97
github.com/siyuan-note/filelock v0.0.0-20240724034355-d1ed7bf21d04

View file

@ -340,8 +340,8 @@ github.com/shopspring/decimal v1.4.0/go.mod h1:gawqmDU56v4yIKSwfBSFip1HdCCXN8/+D
github.com/shurcooL/gofontwoff v0.0.0-20181114050219-180f79e6909d h1:lvCTyBbr36+tqMccdGMwuEU+hjux/zL6xSmf5S9ITaA=
github.com/shurcooL/gofontwoff v0.0.0-20181114050219-180f79e6909d/go.mod h1:05UtEgK5zq39gLST6uB0cf3NEHjETfB4Fgr3Gx5R9Vw=
github.com/simplereach/timeutils v1.2.0/go.mod h1:VVbQDfN/FHRZa1LSqcwo4kNZ62OOyqLLGQKYB3pB0Q8=
github.com/siyuan-note/dejavu v0.0.0-20241116085410-899893f6ae97 h1:JkJG+dYDHSPCrzKXfi/yczlobfgiDSDHoS6ozRMNRNI=
github.com/siyuan-note/dejavu v0.0.0-20241116085410-899893f6ae97/go.mod h1:sVINGoilQS1l5ZQJJBHzUwKmyHhx8qdQps7gKqZVbgU=
github.com/siyuan-note/dejavu v0.0.0-20241116172754-506198a10836 h1:+hHN29Ifk2KqdXFgFIj++3bgqEOv8SjHClmFcSvj0eE=
github.com/siyuan-note/dejavu v0.0.0-20241116172754-506198a10836/go.mod h1:sVINGoilQS1l5ZQJJBHzUwKmyHhx8qdQps7gKqZVbgU=
github.com/siyuan-note/encryption v0.0.0-20231219001248-1e028a4d13b4 h1:kJaw5L/evyW6LcB9IQT8PR4ppx8JVqOFP9Ix3rfwSrc=
github.com/siyuan-note/encryption v0.0.0-20231219001248-1e028a4d13b4/go.mod h1:UYcCCY+0wh+GmUoDOaO63j1sV5lgy7laLAk1XhEiUis=
github.com/siyuan-note/eventbus v0.0.0-20240627125516-396fdb0f0f97 h1:lM5v8BfNtbOL5jYwhCdMYBcYtr06IYBKjjSLAPMKTM8=

View file

@ -42,6 +42,7 @@ func StartCron() {
go every(30*time.Second, model.OCRAssetsJob)
go every(30*time.Second, model.FlushAssetsTextsJob)
go every(30*time.Second, model.HookDesktopUIProcJob)
go every(24*time.Hour, model.AutoPurgeRepoJob)
}
func every(interval time.Duration, f func()) {

View file

@ -385,6 +385,12 @@ func InitConf() {
if 12000 > Conf.Repo.SyncIndexTiming {
Conf.Repo.SyncIndexTiming = 12 * 1000
}
if 1 > Conf.Repo.IndexRetentionDays {
Conf.Repo.IndexRetentionDays = 180
}
if 1 > Conf.Repo.RetentionIndexesDaily {
Conf.Repo.RetentionIndexesDaily = 2
}
if nil == Conf.Search {
Conf.Search = conf.NewSearch()

View file

@ -24,6 +24,7 @@ import (
"errors"
"fmt"
"math"
mathRand "math/rand"
"mime"
"net/http"
"os"
@ -59,6 +60,116 @@ import (
"github.com/studio-b12/gowebdav"
)
// AutoPurgeRepoJob 自动清理数据仓库 https://github.com/siyuan-note/siyuan/issues/13091
func AutoPurgeRepoJob() {
task.AppendTaskWithTimeout(task.RepoAutoPurge, 12*time.Hour, autoPurgeRepo, true)
}
var (
autoPurgeRepoAfterFirstSync = false
lastAutoPurgeRepo = time.Time{}
)
func autoPurgeRepo(cron bool) {
if cron && !autoPurgeRepoAfterFirstSync {
return
}
if time.Since(lastAutoPurgeRepo) < 6*time.Hour {
return
}
autoPurgeRepoAfterFirstSync = true
defer func() {
lastAutoPurgeRepo = time.Now()
}()
if 1 > len(Conf.Repo.Key) {
return
}
repo, err := newRepository()
if err != nil {
return
}
now := time.Now()
dateGroupedIndexes := map[string][]*dejavu.Log{} // 按照日期分组
// 收集指定日期内需要保留的索引
var date string
page := 1
for {
indexLogs, pageCount, _, err := repo.GetIndexLogs(page, 512)
if nil != err {
logging.LogErrorf("get data repo index logs failed: %s", err)
return
}
if 1 > len(indexLogs) {
break
}
tooOld := false
for _, index := range indexLogs {
if now.UnixMilli()-index.Created <= int64(Conf.Repo.IndexRetentionDays)*24*60*60*1000 {
date = time.UnixMilli(index.Created).Format("2006-01-02")
if _, ok := dateGroupedIndexes[date]; !ok {
dateGroupedIndexes[date] = []*dejavu.Log{}
}
dateGroupedIndexes[date] = append(dateGroupedIndexes[date], index)
} else {
tooOld = true
break
}
}
if tooOld {
break
}
page++
if page > pageCount {
break
}
}
// 筛选出每日需要保留的索引
var retentionIndexIDs []string
for _, indexes := range dateGroupedIndexes {
if len(indexes) <= Conf.Repo.RetentionIndexesDaily {
continue
}
keepIndexes := hashset.New()
keepIndexes.Add(indexes[0]) // 每天最后一个固定保留
// 随机保留指定数量的索引
for i := 0; i < Conf.Repo.RetentionIndexesDaily*7; i++ {
keepIndexes.Add(indexes[mathRand.Intn(len(indexes)-1)])
if keepIndexes.Size() >= Conf.Repo.RetentionIndexesDaily {
break
}
}
for _, keepIndex := range keepIndexes.Values() {
retentionIndexIDs = append(retentionIndexIDs, keepIndex.(*dejavu.Log).ID)
}
}
retentionIndexIDs = gulu.Str.RemoveDuplicatedElem(retentionIndexIDs)
if 1 > len(retentionIndexIDs) {
logging.LogInfof("no index to purge [ellapsed=%.2fs]", time.Since(now).Seconds())
return
}
stat, err := repo.Purge(retentionIndexIDs...)
if err != nil {
return
}
deletedIndexes := stat.Indexes
deletedObjects := stat.Objects
deletedSize := humanize.BytesCustomCeil(uint64(stat.Size), 2)
logging.LogInfof("purge data repo completed [ellapsed=%.2fs, indexes=%d, objects=%d, size=%s]",
time.Since(now).Seconds(), deletedIndexes, deletedObjects, deletedSize)
}
func GetRepoFile(fileID string) (ret []byte, p string, err error) {
if 1 > len(Conf.Repo.Key) {
err = errors.New(Conf.Language(26))
@ -518,6 +629,7 @@ func PurgeRepo() (err error) {
return
}
now := time.Now()
stat, err := repo.Purge()
if err != nil {
return
@ -526,6 +638,8 @@ func PurgeRepo() (err error) {
deletedIndexes := stat.Indexes
deletedObjects := stat.Objects
deletedSize := humanize.BytesCustomCeil(uint64(stat.Size), 2)
logging.LogInfof("purge data repo completed [ellapsed=%.2fs, indexes=%d, objects=%d, size=%s]",
time.Since(now).Seconds(), deletedIndexes, deletedObjects, deletedSize)
msg = fmt.Sprintf(Conf.Language(203), deletedIndexes, deletedObjects, deletedSize)
util.PushMsg(msg, 7000)
return
@ -1348,8 +1462,12 @@ func syncRepo(exit, byHand bool) (dataChanged bool, err error) {
processSyncMergeResult(exit, byHand, mergeResult, trafficStat, "a", elapsed)
if !exit {
// 首次数据同步执行完成后再执行索引订正 Index fixing should not be performed before data synchronization https://github.com/siyuan-note/siyuan/issues/10761
go checkIndex()
go func() {
// 首次数据同步执行完成后再执行索引订正 Index fixing should not be performed before data synchronization https://github.com/siyuan-note/siyuan/issues/10761
checkIndex()
// 索引订正结束后执行数据仓库清理 Automatic purge for local data repo https://github.com/siyuan-note/siyuan/issues/13091
autoPurgeRepo(false)
}()
}
return
}

View file

@ -119,6 +119,7 @@ func getCurrentTasks() (ret []*Task) {
const (
RepoCheckout = "task.repo.checkout" // 从快照中检出
RepoAutoPurge = "task.repo.autoPurge" // 自动清理数据仓库
DatabaseIndexFull = "task.database.index.full" // 重建索引
DatabaseIndex = "task.database.index" // 数据库索引
DatabaseIndexCommit = "task.database.index.commit" // 数据库索引提交
@ -143,6 +144,7 @@ const (
// uniqueActions 描述了唯一的任务,即队列中只能存在一个在执行的任务。
var uniqueActions = []string{
RepoCheckout,
RepoAutoPurge,
DatabaseIndexFull,
DatabaseIndexCommit,
OCRImage,