mirror of
https://github.com/siyuan-note/siyuan.git
synced 2026-01-21 15:56:10 +01:00
🎨 Automatic purge for local data repo https://github.com/siyuan-note/siyuan/issues/13091
This commit is contained in:
parent
93b687a407
commit
6a442ad85d
23 changed files with 259 additions and 15 deletions
|
|
@ -29,6 +29,40 @@ import (
|
|||
"github.com/siyuan-note/siyuan/kernel/util"
|
||||
)
|
||||
|
||||
func setRepoIndexRetentionDays(c *gin.Context) {
|
||||
ret := gulu.Ret.NewResult()
|
||||
defer c.JSON(http.StatusOK, ret)
|
||||
|
||||
arg, ok := util.JsonArg(c, ret)
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
days := int(arg["days"].(float64))
|
||||
if 1 > days {
|
||||
days = 180
|
||||
}
|
||||
|
||||
model.Conf.Repo.IndexRetentionDays = days
|
||||
model.Conf.Save()
|
||||
}
|
||||
|
||||
func setRetentionIndexesDaily(c *gin.Context) {
|
||||
ret := gulu.Ret.NewResult()
|
||||
defer c.JSON(http.StatusOK, ret)
|
||||
|
||||
arg, ok := util.JsonArg(c, ret)
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
indexes := int(arg["indexes"].(float64))
|
||||
if 1 > indexes {
|
||||
indexes = 180
|
||||
}
|
||||
|
||||
model.Conf.Repo.RetentionIndexesDaily = indexes
|
||||
model.Conf.Save()
|
||||
}
|
||||
|
||||
func getRepoFile(c *gin.Context) {
|
||||
// Add internal kernel API `/api/repo/getRepoFile` https://github.com/siyuan-note/siyuan/issues/10101
|
||||
|
||||
|
|
|
|||
|
|
@ -390,6 +390,8 @@ func ServeAPI(ginServer *gin.Engine) {
|
|||
ginServer.Handle("POST", "/api/repo/diffRepoSnapshots", model.CheckAuth, model.CheckAdminRole, diffRepoSnapshots)
|
||||
ginServer.Handle("POST", "/api/repo/openRepoSnapshotDoc", model.CheckAuth, model.CheckAdminRole, openRepoSnapshotDoc)
|
||||
ginServer.Handle("POST", "/api/repo/getRepoFile", model.CheckAuth, model.CheckAdminRole, getRepoFile)
|
||||
ginServer.Handle("POST", "/api/repo/setRepoIndexRetentionDays", model.CheckAuth, model.CheckAdminRole, setRepoIndexRetentionDays)
|
||||
ginServer.Handle("POST", "/api/repo/setRetentionIndexesDaily", model.CheckAuth, model.CheckAdminRole, setRetentionIndexesDaily)
|
||||
|
||||
ginServer.Handle("POST", "/api/riff/createRiffDeck", model.CheckAuth, model.CheckAdminRole, model.CheckReadonly, createRiffDeck)
|
||||
ginServer.Handle("POST", "/api/riff/renameRiffDeck", model.CheckAuth, model.CheckAdminRole, model.CheckReadonly, renameRiffDeck)
|
||||
|
|
|
|||
|
|
@ -29,11 +29,17 @@ type Repo struct {
|
|||
// If the data repo indexing time is greater than 12s, prompt user to purge the data repo https://github.com/siyuan-note/siyuan/issues/9613
|
||||
// Supports configuring data sync index time-consuming prompts https://github.com/siyuan-note/siyuan/issues/9698
|
||||
SyncIndexTiming int64 `json:"syncIndexTiming"`
|
||||
|
||||
// 自动清理数据仓库 Automatic purge for local data repo https://github.com/siyuan-note/siyuan/issues/13091
|
||||
IndexRetentionDays int `json:"indexRetentionDays"` // 索引保留天数
|
||||
RetentionIndexesDaily int `json:"retentionIndexesDaily"` // 每日保留索引数
|
||||
}
|
||||
|
||||
func NewRepo() *Repo {
|
||||
return &Repo{
|
||||
SyncIndexTiming: 12 * 1000,
|
||||
SyncIndexTiming: 12 * 1000,
|
||||
IndexRetentionDays: 180,
|
||||
RetentionIndexesDaily: 2,
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -56,7 +56,7 @@ require (
|
|||
github.com/sabhiram/go-gitignore v0.0.0-20210923224102-525f6e181f06
|
||||
github.com/sashabaranov/go-openai v1.29.1
|
||||
github.com/shirou/gopsutil/v3 v3.24.5
|
||||
github.com/siyuan-note/dejavu v0.0.0-20241116085410-899893f6ae97
|
||||
github.com/siyuan-note/dejavu v0.0.0-20241116172754-506198a10836
|
||||
github.com/siyuan-note/encryption v0.0.0-20231219001248-1e028a4d13b4
|
||||
github.com/siyuan-note/eventbus v0.0.0-20240627125516-396fdb0f0f97
|
||||
github.com/siyuan-note/filelock v0.0.0-20240724034355-d1ed7bf21d04
|
||||
|
|
|
|||
|
|
@ -340,8 +340,8 @@ github.com/shopspring/decimal v1.4.0/go.mod h1:gawqmDU56v4yIKSwfBSFip1HdCCXN8/+D
|
|||
github.com/shurcooL/gofontwoff v0.0.0-20181114050219-180f79e6909d h1:lvCTyBbr36+tqMccdGMwuEU+hjux/zL6xSmf5S9ITaA=
|
||||
github.com/shurcooL/gofontwoff v0.0.0-20181114050219-180f79e6909d/go.mod h1:05UtEgK5zq39gLST6uB0cf3NEHjETfB4Fgr3Gx5R9Vw=
|
||||
github.com/simplereach/timeutils v1.2.0/go.mod h1:VVbQDfN/FHRZa1LSqcwo4kNZ62OOyqLLGQKYB3pB0Q8=
|
||||
github.com/siyuan-note/dejavu v0.0.0-20241116085410-899893f6ae97 h1:JkJG+dYDHSPCrzKXfi/yczlobfgiDSDHoS6ozRMNRNI=
|
||||
github.com/siyuan-note/dejavu v0.0.0-20241116085410-899893f6ae97/go.mod h1:sVINGoilQS1l5ZQJJBHzUwKmyHhx8qdQps7gKqZVbgU=
|
||||
github.com/siyuan-note/dejavu v0.0.0-20241116172754-506198a10836 h1:+hHN29Ifk2KqdXFgFIj++3bgqEOv8SjHClmFcSvj0eE=
|
||||
github.com/siyuan-note/dejavu v0.0.0-20241116172754-506198a10836/go.mod h1:sVINGoilQS1l5ZQJJBHzUwKmyHhx8qdQps7gKqZVbgU=
|
||||
github.com/siyuan-note/encryption v0.0.0-20231219001248-1e028a4d13b4 h1:kJaw5L/evyW6LcB9IQT8PR4ppx8JVqOFP9Ix3rfwSrc=
|
||||
github.com/siyuan-note/encryption v0.0.0-20231219001248-1e028a4d13b4/go.mod h1:UYcCCY+0wh+GmUoDOaO63j1sV5lgy7laLAk1XhEiUis=
|
||||
github.com/siyuan-note/eventbus v0.0.0-20240627125516-396fdb0f0f97 h1:lM5v8BfNtbOL5jYwhCdMYBcYtr06IYBKjjSLAPMKTM8=
|
||||
|
|
|
|||
|
|
@ -42,6 +42,7 @@ func StartCron() {
|
|||
go every(30*time.Second, model.OCRAssetsJob)
|
||||
go every(30*time.Second, model.FlushAssetsTextsJob)
|
||||
go every(30*time.Second, model.HookDesktopUIProcJob)
|
||||
go every(24*time.Hour, model.AutoPurgeRepoJob)
|
||||
}
|
||||
|
||||
func every(interval time.Duration, f func()) {
|
||||
|
|
|
|||
|
|
@ -385,6 +385,12 @@ func InitConf() {
|
|||
if 12000 > Conf.Repo.SyncIndexTiming {
|
||||
Conf.Repo.SyncIndexTiming = 12 * 1000
|
||||
}
|
||||
if 1 > Conf.Repo.IndexRetentionDays {
|
||||
Conf.Repo.IndexRetentionDays = 180
|
||||
}
|
||||
if 1 > Conf.Repo.RetentionIndexesDaily {
|
||||
Conf.Repo.RetentionIndexesDaily = 2
|
||||
}
|
||||
|
||||
if nil == Conf.Search {
|
||||
Conf.Search = conf.NewSearch()
|
||||
|
|
|
|||
|
|
@ -24,6 +24,7 @@ import (
|
|||
"errors"
|
||||
"fmt"
|
||||
"math"
|
||||
mathRand "math/rand"
|
||||
"mime"
|
||||
"net/http"
|
||||
"os"
|
||||
|
|
@ -59,6 +60,116 @@ import (
|
|||
"github.com/studio-b12/gowebdav"
|
||||
)
|
||||
|
||||
// AutoPurgeRepoJob 自动清理数据仓库 https://github.com/siyuan-note/siyuan/issues/13091
|
||||
func AutoPurgeRepoJob() {
|
||||
task.AppendTaskWithTimeout(task.RepoAutoPurge, 12*time.Hour, autoPurgeRepo, true)
|
||||
}
|
||||
|
||||
var (
|
||||
autoPurgeRepoAfterFirstSync = false
|
||||
lastAutoPurgeRepo = time.Time{}
|
||||
)
|
||||
|
||||
func autoPurgeRepo(cron bool) {
|
||||
if cron && !autoPurgeRepoAfterFirstSync {
|
||||
return
|
||||
}
|
||||
if time.Since(lastAutoPurgeRepo) < 6*time.Hour {
|
||||
return
|
||||
}
|
||||
|
||||
autoPurgeRepoAfterFirstSync = true
|
||||
defer func() {
|
||||
lastAutoPurgeRepo = time.Now()
|
||||
}()
|
||||
|
||||
if 1 > len(Conf.Repo.Key) {
|
||||
return
|
||||
}
|
||||
|
||||
repo, err := newRepository()
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
now := time.Now()
|
||||
|
||||
dateGroupedIndexes := map[string][]*dejavu.Log{} // 按照日期分组
|
||||
// 收集指定日期内需要保留的索引
|
||||
var date string
|
||||
page := 1
|
||||
for {
|
||||
indexLogs, pageCount, _, err := repo.GetIndexLogs(page, 512)
|
||||
if nil != err {
|
||||
logging.LogErrorf("get data repo index logs failed: %s", err)
|
||||
return
|
||||
}
|
||||
if 1 > len(indexLogs) {
|
||||
break
|
||||
}
|
||||
|
||||
tooOld := false
|
||||
for _, index := range indexLogs {
|
||||
if now.UnixMilli()-index.Created <= int64(Conf.Repo.IndexRetentionDays)*24*60*60*1000 {
|
||||
date = time.UnixMilli(index.Created).Format("2006-01-02")
|
||||
if _, ok := dateGroupedIndexes[date]; !ok {
|
||||
dateGroupedIndexes[date] = []*dejavu.Log{}
|
||||
}
|
||||
dateGroupedIndexes[date] = append(dateGroupedIndexes[date], index)
|
||||
} else {
|
||||
tooOld = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if tooOld {
|
||||
break
|
||||
}
|
||||
page++
|
||||
if page > pageCount {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
// 筛选出每日需要保留的索引
|
||||
var retentionIndexIDs []string
|
||||
for _, indexes := range dateGroupedIndexes {
|
||||
if len(indexes) <= Conf.Repo.RetentionIndexesDaily {
|
||||
continue
|
||||
}
|
||||
|
||||
keepIndexes := hashset.New()
|
||||
keepIndexes.Add(indexes[0]) // 每天最后一个固定保留
|
||||
// 随机保留指定数量的索引
|
||||
for i := 0; i < Conf.Repo.RetentionIndexesDaily*7; i++ {
|
||||
keepIndexes.Add(indexes[mathRand.Intn(len(indexes)-1)])
|
||||
if keepIndexes.Size() >= Conf.Repo.RetentionIndexesDaily {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
for _, keepIndex := range keepIndexes.Values() {
|
||||
retentionIndexIDs = append(retentionIndexIDs, keepIndex.(*dejavu.Log).ID)
|
||||
}
|
||||
}
|
||||
|
||||
retentionIndexIDs = gulu.Str.RemoveDuplicatedElem(retentionIndexIDs)
|
||||
if 1 > len(retentionIndexIDs) {
|
||||
logging.LogInfof("no index to purge [ellapsed=%.2fs]", time.Since(now).Seconds())
|
||||
return
|
||||
}
|
||||
|
||||
stat, err := repo.Purge(retentionIndexIDs...)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
deletedIndexes := stat.Indexes
|
||||
deletedObjects := stat.Objects
|
||||
deletedSize := humanize.BytesCustomCeil(uint64(stat.Size), 2)
|
||||
logging.LogInfof("purge data repo completed [ellapsed=%.2fs, indexes=%d, objects=%d, size=%s]",
|
||||
time.Since(now).Seconds(), deletedIndexes, deletedObjects, deletedSize)
|
||||
}
|
||||
|
||||
func GetRepoFile(fileID string) (ret []byte, p string, err error) {
|
||||
if 1 > len(Conf.Repo.Key) {
|
||||
err = errors.New(Conf.Language(26))
|
||||
|
|
@ -518,6 +629,7 @@ func PurgeRepo() (err error) {
|
|||
return
|
||||
}
|
||||
|
||||
now := time.Now()
|
||||
stat, err := repo.Purge()
|
||||
if err != nil {
|
||||
return
|
||||
|
|
@ -526,6 +638,8 @@ func PurgeRepo() (err error) {
|
|||
deletedIndexes := stat.Indexes
|
||||
deletedObjects := stat.Objects
|
||||
deletedSize := humanize.BytesCustomCeil(uint64(stat.Size), 2)
|
||||
logging.LogInfof("purge data repo completed [ellapsed=%.2fs, indexes=%d, objects=%d, size=%s]",
|
||||
time.Since(now).Seconds(), deletedIndexes, deletedObjects, deletedSize)
|
||||
msg = fmt.Sprintf(Conf.Language(203), deletedIndexes, deletedObjects, deletedSize)
|
||||
util.PushMsg(msg, 7000)
|
||||
return
|
||||
|
|
@ -1348,8 +1462,12 @@ func syncRepo(exit, byHand bool) (dataChanged bool, err error) {
|
|||
processSyncMergeResult(exit, byHand, mergeResult, trafficStat, "a", elapsed)
|
||||
|
||||
if !exit {
|
||||
// 首次数据同步执行完成后再执行索引订正 Index fixing should not be performed before data synchronization https://github.com/siyuan-note/siyuan/issues/10761
|
||||
go checkIndex()
|
||||
go func() {
|
||||
// 首次数据同步执行完成后再执行索引订正 Index fixing should not be performed before data synchronization https://github.com/siyuan-note/siyuan/issues/10761
|
||||
checkIndex()
|
||||
// 索引订正结束后执行数据仓库清理 Automatic purge for local data repo https://github.com/siyuan-note/siyuan/issues/13091
|
||||
autoPurgeRepo(false)
|
||||
}()
|
||||
}
|
||||
return
|
||||
}
|
||||
|
|
|
|||
|
|
@ -119,6 +119,7 @@ func getCurrentTasks() (ret []*Task) {
|
|||
|
||||
const (
|
||||
RepoCheckout = "task.repo.checkout" // 从快照中检出
|
||||
RepoAutoPurge = "task.repo.autoPurge" // 自动清理数据仓库
|
||||
DatabaseIndexFull = "task.database.index.full" // 重建索引
|
||||
DatabaseIndex = "task.database.index" // 数据库索引
|
||||
DatabaseIndexCommit = "task.database.index.commit" // 数据库索引提交
|
||||
|
|
@ -143,6 +144,7 @@ const (
|
|||
// uniqueActions 描述了唯一的任务,即队列中只能存在一个在执行的任务。
|
||||
var uniqueActions = []string{
|
||||
RepoCheckout,
|
||||
RepoAutoPurge,
|
||||
DatabaseIndexFull,
|
||||
DatabaseIndexCommit,
|
||||
OCRImage,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue