2019-06-22 18:35:34 +01:00
// Copyright 2019 The Gitea Authors.
// All rights reserved.
2022-11-27 13:20:29 -05:00
// SPDX-License-Identifier: MIT
2019-06-22 18:35:34 +01:00
package pull
import (
"bufio"
2022-01-19 23:26:57 +00:00
"context"
2026-01-22 14:04:26 +08:00
"errors"
2019-06-22 18:35:34 +01:00
"io"
"strconv"
2022-06-12 23:51:54 +08:00
git_model "code.gitea.io/gitea/models/git"
2022-06-13 17:37:59 +08:00
issues_model "code.gitea.io/gitea/models/issues"
2026-01-22 14:04:26 +08:00
"code.gitea.io/gitea/modules/git/gitcmd"
2019-10-28 18:31:55 +00:00
"code.gitea.io/gitea/modules/git/pipeline"
2019-06-22 18:35:34 +01:00
"code.gitea.io/gitea/modules/lfs"
"code.gitea.io/gitea/modules/log"
2026-01-22 14:04:26 +08:00
"code.gitea.io/gitea/modules/util"
"golang.org/x/sync/errgroup"
2019-06-22 18:35:34 +01:00
)
// LFSPush pushes lfs objects referred to in new commits in the head repository from the base repository
2022-06-13 17:37:59 +08:00
func LFSPush ( ctx context . Context , tmpBasePath , mergeHeadSHA , mergeBaseSHA string , pr * issues_model . PullRequest ) error {
2019-06-22 18:35:34 +01:00
// Now we have to implement git lfs push
// git rev-list --objects --filter=blob:limit=1k HEAD --not base
// pass blob shas in to git cat-file --batch-check (possibly unnecessary)
// ensure only blobs and <=1k size then pass in to git cat-file --batch
// to read each sha and check each as a pointer
// Then if they are lfs -> add them to the baseRepo
2026-01-22 14:04:26 +08:00
cmd1RevList , cmd3BathCheck , cmd5BatchContent := gitcmd . NewCommand ( ) , gitcmd . NewCommand ( ) , gitcmd . NewCommand ( )
cmd1RevListOut , cmd1RevListClose := cmd1RevList . MakeStdoutPipe ( )
defer cmd1RevListClose ( )
cmd3BatchCheckIn , cmd3BatchCheckOut , cmd3BatchCheckClose := cmd3BathCheck . MakeStdinStdoutPipe ( )
defer cmd3BatchCheckClose ( )
cmd5BatchContentIn , cmd5BatchContentOut , cmd5BatchContentClose := cmd5BatchContent . MakeStdinStdoutPipe ( )
defer cmd5BatchContentClose ( )
// Create the go-routines in reverse order (update: the order is not needed any more, the pipes are properly prepared)
wg := & errgroup . Group { }
2019-06-22 18:35:34 +01:00
// 6. Take the output of cat-file --batch and check if each file in turn
// to see if they're pointers to files in the LFS store associated with
// the head repo and add them to the base repo if so
2026-01-22 14:04:26 +08:00
wg . Go ( func ( ) error {
return createLFSMetaObjectsFromCatFileBatch ( ctx , cmd5BatchContentOut , pr )
} )
2019-06-22 18:35:34 +01:00
// 5. Take the shas of the blobs and batch read them
2026-01-22 14:04:26 +08:00
wg . Go ( func ( ) error {
return pipeline . CatFileBatch ( ctx , cmd5BatchContent , tmpBasePath )
} )
2019-06-22 18:35:34 +01:00
// 4. From the provided objects restrict to blobs <=1k
2026-01-22 14:04:26 +08:00
wg . Go ( func ( ) error {
return pipeline . BlobsLessThan1024FromCatFileBatchCheck ( cmd3BatchCheckOut , cmd5BatchContentIn )
} )
2019-06-22 18:35:34 +01:00
// 3. Run batch-check on the objects retrieved from rev-list
2026-01-22 14:04:26 +08:00
wg . Go ( func ( ) error {
return pipeline . CatFileBatchCheck ( ctx , cmd3BathCheck , tmpBasePath )
} )
2019-06-22 18:35:34 +01:00
// 2. Check each object retrieved rejecting those without names as they will be commits or trees
2026-01-22 14:04:26 +08:00
wg . Go ( func ( ) error {
return pipeline . BlobsFromRevListObjects ( cmd1RevListOut , cmd3BatchCheckIn )
} )
2019-06-22 18:35:34 +01:00
// 1. Run rev-list objects from mergeHead to mergeBase
2026-01-22 14:04:26 +08:00
wg . Go ( func ( ) error {
return pipeline . RevListObjects ( ctx , cmd1RevList , tmpBasePath , mergeHeadSHA , mergeBaseSHA )
} )
2019-06-22 18:35:34 +01:00
2026-01-22 14:04:26 +08:00
return wg . Wait ( )
2019-06-22 18:35:34 +01:00
}
2026-01-22 14:04:26 +08:00
func createLFSMetaObjectsFromCatFileBatch ( ctx context . Context , catFileBatchReader io . ReadCloser , pr * issues_model . PullRequest ) error {
2019-06-22 18:35:34 +01:00
defer catFileBatchReader . Close ( )
2021-04-09 00:25:57 +02:00
contentStore := lfs . NewContentStore ( )
2019-06-22 18:35:34 +01:00
bufferedReader := bufio . NewReader ( catFileBatchReader )
buf := make ( [ ] byte , 1025 )
for {
// File descriptor line: sha
_ , err := bufferedReader . ReadString ( ' ' )
if err != nil {
2026-01-22 14:04:26 +08:00
return util . Iif ( errors . Is ( err , io . EOF ) , nil , err )
2019-06-22 18:35:34 +01:00
}
// Throw away the blob
if _ , err := bufferedReader . ReadString ( ' ' ) ; err != nil {
2026-01-22 14:04:26 +08:00
return err
2019-06-22 18:35:34 +01:00
}
sizeStr , err := bufferedReader . ReadString ( '\n' )
if err != nil {
2026-01-22 14:04:26 +08:00
return err
2019-06-22 18:35:34 +01:00
}
size , err := strconv . Atoi ( sizeStr [ : len ( sizeStr ) - 1 ] )
if err != nil {
2026-01-22 14:04:26 +08:00
return err
2019-06-22 18:35:34 +01:00
}
pointerBuf := buf [ : size + 1 ]
if _ , err := io . ReadFull ( bufferedReader , pointerBuf ) ; err != nil {
2026-01-22 14:04:26 +08:00
return err
2019-06-22 18:35:34 +01:00
}
pointerBuf = pointerBuf [ : size ]
// Now we need to check if the pointerBuf is an LFS pointer
2021-04-09 00:25:57 +02:00
pointer , _ := lfs . ReadPointerFromBuffer ( pointerBuf )
if ! pointer . IsValid ( ) {
2019-06-22 18:35:34 +01:00
continue
}
2021-04-09 00:25:57 +02:00
exist , _ := contentStore . Exists ( pointer )
if ! exist {
continue
}
2019-06-22 18:35:34 +01:00
// Then we need to check that this pointer is in the db
2023-10-14 10:37:24 +02:00
if _ , err := git_model . GetLFSMetaObjectByOid ( ctx , pr . HeadRepoID , pointer . Oid ) ; err != nil {
2022-06-12 23:51:54 +08:00
if err == git_model . ErrLFSObjectNotExist {
2019-06-22 18:35:34 +01:00
log . Warn ( "During merge of: %d in %-v, there is a pointer to LFS Oid: %s which although present in the LFS store is not associated with the head repo %-v" , pr . Index , pr . BaseRepo , pointer . Oid , pr . HeadRepo )
continue
}
2026-01-22 14:04:26 +08:00
return err
2019-06-22 18:35:34 +01:00
}
// OK we have a pointer that is associated with the head repo
// and is actually a file in the LFS
// Therefore it should be associated with the base repo
2023-12-07 15:27:36 +08:00
if _ , err := git_model . NewLFSMetaObject ( ctx , pr . BaseRepoID , pointer ) ; err != nil {
2026-01-22 14:04:26 +08:00
return err
2019-06-22 18:35:34 +01:00
}
}
}