improve performance of create tag and commit APIs (#946)

This commit is contained in:
Marko Gacesa 2024-01-26 16:46:23 +00:00 committed by Harness
parent 47f69ea42a
commit f02781b4d8
3 changed files with 386 additions and 169 deletions

View File

@ -19,7 +19,12 @@ import (
"context"
"fmt"
"io"
"io/fs"
"os"
"path"
"path/filepath"
"regexp"
"sort"
"strings"
"time"
@ -51,6 +56,7 @@ func NewSharedRepo(
if err != nil {
return nil, err
}
t := &SharedRepo{
adapter: adapter,
repoUID: repoUID,
@ -76,6 +82,168 @@ func (r *SharedRepo) Close(ctx context.Context) {
}
}
// filePriority is based on https://github.com/git/git/blob/master/tmp-objdir.c#L168
func filePriority(name string) int {
switch {
case !strings.HasPrefix(name, "pack"):
return 0
case strings.HasSuffix(name, ".keep"):
return 1
case strings.HasSuffix(name, ".pack"):
return 2
case strings.HasSuffix(name, ".rev"):
return 3
case strings.HasSuffix(name, ".idx"):
return 4
default:
return 5
}
}
type fileEntry struct {
fileName string
fullPath string
relPath string
priority int
}
func (r *SharedRepo) MoveObjects(ctx context.Context) error {
srcDir := path.Join(r.tmpPath, "objects")
dstDir := path.Join(r.remoteRepoPath, "objects")
var files []fileEntry
err := filepath.WalkDir(srcDir, func(path string, d fs.DirEntry, err error) error {
if err != nil {
return err
}
if d.IsDir() {
return nil
}
relPath, err := filepath.Rel(srcDir, path)
if err != nil {
return fmt.Errorf("failed to get relative path: %w", err)
}
// avoid coping anything in the info/
if strings.HasPrefix(relPath, "info/") {
return nil
}
fileName := filepath.Base(relPath)
files = append(files, fileEntry{
fileName: fileName,
fullPath: path,
relPath: relPath,
priority: filePriority(fileName),
})
return nil
})
if err != nil {
return fmt.Errorf("failed to walk source directory: %w", err)
}
sort.Slice(files, func(i, j int) bool {
return files[i].priority < files[j].priority // 0 is top priority, 5 is lowest priority
})
for _, f := range files {
dstPath := filepath.Join(dstDir, f.relPath)
err = os.MkdirAll(filepath.Dir(dstPath), os.ModePerm)
if err != nil {
return err
}
// Try to move the file
errRename := os.Rename(f.fullPath, dstPath)
if errRename == nil {
log.Ctx(ctx).Debug().
Str("object", f.relPath).
Msg("moved git object")
continue
}
// Try to copy the file
copyError := func() error {
srcFile, err := os.Open(f.fullPath)
if err != nil {
return fmt.Errorf("failed to open source file: %w", err)
}
defer func() { _ = srcFile.Close() }()
dstFile, err := os.Create(dstPath)
if err != nil {
return fmt.Errorf("failed to create target file: %w", err)
}
defer func() { _ = dstFile.Close() }()
_, err = io.Copy(dstFile, srcFile)
if err != nil {
return err
}
return nil
}()
if copyError != nil {
log.Ctx(ctx).Err(copyError).
Str("object", f.relPath).
Str("renameErr", errRename.Error()).
Msg("failed to move or copy git object")
return copyError
}
log.Ctx(ctx).Warn().
Str("object", f.relPath).
Str("renameErr", errRename.Error()).
Msg("copied git object")
}
return nil
}
func (r *SharedRepo) InitAsShared(ctx context.Context) error {
args := []string{"init", "--bare"}
if _, stderr, err := gitea.NewCommand(ctx, args...).RunStdString(&gitea.RunOpts{
Dir: r.tmpPath,
}); err != nil {
return errors.Internal(err, "error while creating empty repository: %s", stderr)
}
if err := func() error {
alternates := filepath.Join(r.tmpPath, "objects", "info", "alternates")
f, err := os.OpenFile(alternates, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0o600)
if err != nil {
return fmt.Errorf("failed to open alternates file '%s': %w", alternates, err)
}
defer func() { _ = f.Close() }()
data := filepath.Join(r.remoteRepoPath, "objects")
if _, err = fmt.Fprintln(f, data); err != nil {
return fmt.Errorf("failed to write alternates file '%s': %w", alternates, err)
}
return nil
}(); err != nil {
return errors.Internal(err, "failed to create alternate in empty repository: %s", err.Error())
}
gitRepo, err := gitea.OpenRepository(ctx, r.tmpPath)
if err != nil {
return processGiteaErrorf(err, "failed to open repo")
}
r.repo = gitRepo
return nil
}
// Clone the base repository to our path and set branch as the HEAD.
func (r *SharedRepo) Clone(ctx context.Context, branchName string) error {
args := []string{"clone", "-s", "--bare"}
@ -117,7 +285,15 @@ func (r *SharedRepo) Init(ctx context.Context) error {
// SetDefaultIndex sets the git index to our HEAD.
func (r *SharedRepo) SetDefaultIndex(ctx context.Context) error {
if _, _, err := gitea.NewCommand(ctx, "read-tree", "HEAD").RunStdString(&gitea.RunOpts{Dir: r.tmpPath}); err != nil {
return fmt.Errorf("SetDefaultIndex: %w", err)
return fmt.Errorf("failed to git read-tree HEAD: %w", err)
}
return nil
}
// SetIndex sets the git index to the provided treeish.
func (r *SharedRepo) SetIndex(ctx context.Context, treeish string) error {
if _, _, err := gitea.NewCommand(ctx, "read-tree", treeish).RunStdString(&gitea.RunOpts{Dir: r.tmpPath}); err != nil {
return fmt.Errorf("failed to git read-tree %s: %w", treeish, err)
}
return nil
}

View File

@ -91,6 +91,7 @@ type CommitFilesResponse struct {
CommitID string
}
//nolint:gocognit
func (s *Service) CommitFiles(ctx context.Context, params *CommitFilesParams) (CommitFilesResponse, error) {
if err := params.Validate(); err != nil {
return CommitFilesResponse{}, err
@ -140,84 +141,123 @@ func (s *Service) CommitFiles(ctx context.Context, params *CommitFilesParams) (C
log.Debug().Msg("validate and prepare input")
// ensure input data is valid
if err = s.validateAndPrepareHeader(repo, isEmpty, params); err != nil {
// the commit will be nil for empty repositories
commit, err := s.validateAndPrepareHeader(repo, isEmpty, params)
if err != nil {
return CommitFilesResponse{}, err
}
var oldCommitSHA string
if commit != nil {
oldCommitSHA = commit.ID.String()
}
log.Debug().Msg("create shared repo")
// create a new shared repo
shared, err := s.adapter.SharedRepository(s.tmpDir, params.RepoUID, repo.Path)
if err != nil {
return CommitFilesResponse{}, fmt.Errorf("failed to create shared repository: %w", err)
}
defer shared.Close(ctx)
log.Debug().Msgf("prepare tree (empty: %t)", isEmpty)
// handle empty repo separately (as branch doesn't exist, no commit exists, ...)
var parentCommitSHA string
if isEmpty {
err = s.prepareTreeEmptyRepo(ctx, shared, params.Actions)
newCommitSHA, err := func() (string, error) {
// Create a directory for the temporary shared repository.
shared, err := s.adapter.SharedRepository(s.tmpDir, params.RepoUID, repo.Path)
if err != nil {
return CommitFilesResponse{}, err
return "", fmt.Errorf("failed to create shared repository: %w", err)
}
} else {
parentCommitSHA, err = s.prepareTree(ctx, shared, params.Branch, params.Actions)
defer shared.Close(ctx)
// Create bare repository with alternates pointing to the original repository.
err = shared.InitAsShared(ctx)
if err != nil {
return CommitFilesResponse{}, err
return "", fmt.Errorf("failed to create temp repo with alternates: %w", err)
}
}
log.Debug().Msg("write tree")
log.Debug().Msgf("prepare tree (empty: %t)", isEmpty)
// Now write the tree
treeHash, err := shared.WriteTree(ctx)
// handle empty repo separately (as branch doesn't exist, no commit exists, ...)
if isEmpty {
err = s.prepareTreeEmptyRepo(ctx, shared, params.Actions)
} else {
err = shared.SetIndex(ctx, oldCommitSHA)
if err != nil {
return "", fmt.Errorf("failed to set index to temp repo: %w", err)
}
err = s.prepareTree(ctx, shared, params.Actions, commit)
}
if err != nil {
return "", fmt.Errorf("failed to prepare tree: %w", err)
}
log.Debug().Msg("write tree")
// Now write the tree
treeHash, err := shared.WriteTree(ctx)
if err != nil {
return "", fmt.Errorf("failed to write tree object: %w", err)
}
message := strings.TrimSpace(params.Title)
if len(params.Message) > 0 {
message += "\n\n" + strings.TrimSpace(params.Message)
}
log.Debug().Msg("commit tree")
// Now commit the tree
commitSHA, err := shared.CommitTreeWithDate(
ctx,
oldCommitSHA,
&types.Identity{
Name: author.Name,
Email: author.Email,
},
&types.Identity{
Name: committer.Name,
Email: committer.Email,
},
treeHash,
message,
false,
authorDate,
committerDate,
)
if err != nil {
return "", fmt.Errorf("failed to commit the tree: %w", err)
}
err = shared.MoveObjects(ctx)
if err != nil {
return "", fmt.Errorf("failed to move git objects: %w", err)
}
return commitSHA, nil
}()
if err != nil {
return CommitFilesResponse{}, fmt.Errorf("failed to write tree object: %w", err)
return CommitFilesResponse{}, fmt.Errorf("CommitFiles: failed to create commit in shared repository: %w", err)
}
message := strings.TrimSpace(params.Title)
if len(params.Message) > 0 {
message += "\n\n" + strings.TrimSpace(params.Message)
log.Debug().Msg("update ref")
branchRef := adapter.GetReferenceFromBranchName(params.Branch)
if params.Branch != params.NewBranch {
// we are creating a new branch, rather than updating the existing one
oldCommitSHA = types.NilSHA
branchRef = adapter.GetReferenceFromBranchName(params.NewBranch)
}
log.Debug().Msg("commit tree")
// Now commit the tree
commitSHA, err := shared.CommitTreeWithDate(
err = s.adapter.UpdateRef(
ctx,
parentCommitSHA,
&types.Identity{
Name: author.Name,
Email: author.Email,
},
&types.Identity{
Name: committer.Name,
Email: committer.Email,
},
treeHash,
message,
false,
authorDate,
committerDate,
params.EnvVars,
repoPath,
branchRef,
oldCommitSHA,
newCommitSHA,
)
if err != nil {
return CommitFilesResponse{}, fmt.Errorf("failed to commit the tree: %w", err)
}
log.Debug().Msg("push branch to original repo")
env := CreateEnvironmentForPush(ctx, params.WriteParams)
if err = shared.PushCommitToBranch(ctx, commitSHA, params.NewBranch, false, env...); err != nil {
return CommitFilesResponse{}, fmt.Errorf("failed to push commits to remote repository: %w", err)
return CommitFilesResponse{}, fmt.Errorf("CommitFiles: failed to update ref %s: %w", branchRef, err)
}
log.Debug().Msg("get commit")
commit, err := shared.GetCommit(commitSHA)
commit, err = repo.GetCommit(newCommitSHA)
if err != nil {
return CommitFilesResponse{}, fmt.Errorf("failed to get commit for SHA %s: %w", commitSHA, err)
return CommitFilesResponse{}, fmt.Errorf("failed to get commit for SHA %s: %w", newCommitSHA, err)
}
log.Debug().Msg("done")
@ -227,38 +267,27 @@ func (s *Service) CommitFiles(ctx context.Context, params *CommitFilesParams) (C
}, nil
}
func (s *Service) prepareTree(ctx context.Context, shared SharedRepo,
branchName string, actions []CommitFileAction) (string, error) {
// clone original branch from repo
if err := s.clone(ctx, shared, branchName); err != nil {
return "", err
}
// Get the latest commit of the original branch
commit, err := shared.GetBranchCommit(branchName)
if err != nil {
return "", fmt.Errorf("failed to get latest commit of the branch %s: %w", branchName, err)
}
func (s *Service) prepareTree(
ctx context.Context,
shared *adapter.SharedRepo,
actions []CommitFileAction,
commit *git.Commit,
) error {
// execute all actions
for _, action := range actions {
action := action
if err = s.processAction(ctx, shared, &action, commit); err != nil {
return "", err
for i := range actions {
if err := s.processAction(ctx, shared, &actions[i], commit); err != nil {
return err
}
}
return commit.ID.String(), nil
return nil
}
func (s *Service) prepareTreeEmptyRepo(ctx context.Context, shared *adapter.SharedRepo,
actions []CommitFileAction) error {
// init a new repo (full clone would cause risk that by time of push someone wrote to the remote repo!)
err := shared.Init(ctx)
if err != nil {
return fmt.Errorf("failed to init shared tmp repository: %w", err)
}
func (s *Service) prepareTreeEmptyRepo(
ctx context.Context,
shared *adapter.SharedRepo,
actions []CommitFileAction,
) error {
for _, action := range actions {
if action.Action != CreateAction {
return errors.PreconditionFailed("action not allowed on empty repository")
@ -270,7 +299,7 @@ func (s *Service) prepareTreeEmptyRepo(ctx context.Context, shared *adapter.Shar
}
reader := bytes.NewReader(action.Payload)
if err = createFile(ctx, shared, nil, filePath, defaultFilePermission, reader); err != nil {
if err := createFile(ctx, shared, nil, filePath, defaultFilePermission, reader); err != nil {
return errors.Internal(err, "failed to create file '%s'", action.Path)
}
}
@ -278,12 +307,15 @@ func (s *Service) prepareTreeEmptyRepo(ctx context.Context, shared *adapter.Shar
return nil
}
func (s *Service) validateAndPrepareHeader(repo *git.Repository, isEmpty bool,
params *CommitFilesParams) error {
func (s *Service) validateAndPrepareHeader(
repo *git.Repository,
isEmpty bool,
params *CommitFilesParams,
) (*git.Commit, error) {
if params.Branch == "" {
defaultBranchRef, err := repo.GetDefaultBranch()
if err != nil {
return fmt.Errorf("failed to get default branch: %w", err)
return nil, fmt.Errorf("failed to get default branch: %w", err)
}
params.Branch = defaultBranchRef
}
@ -298,41 +330,32 @@ func (s *Service) validateAndPrepareHeader(repo *git.Repository, isEmpty bool,
// if the repo is empty then we can skip branch existence checks
if isEmpty {
return nil
return nil, nil //nolint:nilnil // an empty repository has no commit and there's no error
}
// ensure source branch exists
if _, err := repo.GetBranch(params.Branch); err != nil {
return fmt.Errorf("failed to get source branch '%s': %w", params.Branch, err)
branch, err := repo.GetBranch(params.Branch)
if err != nil {
return nil, fmt.Errorf("failed to get source branch '%s': %w", params.Branch, err)
}
// ensure new branch doesn't exist yet (if new branch creation was requested)
if params.Branch != params.NewBranch {
existingBranch, err := repo.GetBranch(params.NewBranch)
if existingBranch != nil {
return errors.Conflict("branch %s already exists", existingBranch.Name)
return nil, errors.Conflict("branch %s already exists", existingBranch.Name)
}
if err != nil && !git.IsErrBranchNotExist(err) {
return fmt.Errorf("failed to create new branch '%s': %w", params.NewBranch, err)
return nil, fmt.Errorf("failed to create new branch '%s': %w", params.NewBranch, err)
}
}
return nil
}
func (s *Service) clone(
ctx context.Context,
shared SharedRepo,
branch string,
) error {
if err := shared.Clone(ctx, branch); err != nil {
return fmt.Errorf("failed to clone branch '%s': %w", branch, err)
commit, err := branch.GetCommit()
if err != nil {
return nil, fmt.Errorf("failed to get branch commit: %w", err)
}
if err := shared.SetDefaultIndex(ctx); err != nil {
return fmt.Errorf("failed to set default index: %w", err)
}
return nil
return commit, nil
}
func (s *Service) processAction(

View File

@ -17,14 +17,12 @@ package git
import (
"context"
"fmt"
"strings"
"time"
"github.com/harness/gitness/errors"
"github.com/harness/gitness/git/adapter"
"github.com/harness/gitness/git/types"
"code.gitea.io/gitea/modules/git"
"github.com/rs/zerolog/log"
)
@ -226,6 +224,7 @@ func (s *Service) ListCommitTags(
}, nil
}
//nolint:gocognit
func (s *Service) CreateCommitTag(ctx context.Context, params *CreateCommitTagParams) (*CreateCommitTagOutput, error) {
if err := params.Validate(); err != nil {
return nil, err
@ -233,74 +232,101 @@ func (s *Service) CreateCommitTag(ctx context.Context, params *CreateCommitTagPa
repoPath := getFullPathForRepo(s.reposRoot, params.RepoUID)
repo, err := git.OpenRepository(ctx, repoPath)
if err != nil {
return nil, fmt.Errorf("CreateCommitTag: failed to open repo: %w", err)
}
sharedRepo, err := s.adapter.SharedRepository(s.tmpDir, params.RepoUID, repo.Path)
if err != nil {
return nil, fmt.Errorf("CreateCommitTag: failed to create new shared repo: %w", err)
}
defer sharedRepo.Close(ctx)
// clone repo (with HEAD branch - target might be anything)
err = sharedRepo.Clone(ctx, "")
if err != nil {
return nil, fmt.Errorf("CreateCommitTag: failed to clone shared repo: %w", err)
}
// get target commit (as target could be branch/tag/commit, and tag can't be pushed using source:destination syntax)
// NOTE: in case the target is an annotated tag, the targetCommit title and message are that of the tag, not the commit
targetCommit, err := sharedRepo.GetCommit(strings.TrimSpace(params.Target))
if git.IsErrNotExist(err) {
targetCommit, err := s.adapter.GetCommit(ctx, repoPath, params.Target)
if errors.IsNotFound(err) {
return nil, errors.NotFound("target '%s' doesn't exist", params.Target)
}
if err != nil {
return nil, fmt.Errorf("failed to get commit id for target '%s': %w", params.Target, err)
return nil, fmt.Errorf("CreateCommitTag: failed to get commit id for target '%s': %w", params.Target, err)
}
tagger := params.Actor
if params.Tagger != nil {
tagger = *params.Tagger
tagName := params.Name
tagRef := adapter.GetReferenceFromTagName(tagName)
var tag *types.Tag
sha, err := s.adapter.GetRef(ctx, repoPath, tagRef)
// TODO: Change GetRef to use errors.NotFound and then remove types.IsNotFoundError(err) below.
if err != nil && !types.IsNotFoundError(err) && !errors.IsNotFound(err) {
return nil, fmt.Errorf("CreateCommitTag: failed to verify tag existence: %w", err)
}
taggerDate := time.Now().UTC()
if params.TaggerDate != nil {
taggerDate = *params.TaggerDate
if err == nil && sha != "" {
return nil, errors.Conflict("tag '%s' already exists", tagName)
}
createTagRequest := &types.CreateTagOptions{
Message: params.Message,
Tagger: types.Signature{
Identity: types.Identity{
Name: tagger.Name,
Email: tagger.Email,
err = func() error {
// Create a directory for the temporary shared repository.
sharedRepo, err := s.adapter.SharedRepository(s.tmpDir, params.RepoUID, repoPath)
if err != nil {
return fmt.Errorf("failed to create new shared repo: %w", err)
}
defer sharedRepo.Close(ctx)
// Create bare repository with alternates pointing to the original repository.
err = sharedRepo.InitAsShared(ctx)
if err != nil {
return fmt.Errorf("failed to create temp repo with alternates: %w", err)
}
tagger := params.Actor
if params.Tagger != nil {
tagger = *params.Tagger
}
taggerDate := time.Now().UTC()
if params.TaggerDate != nil {
taggerDate = *params.TaggerDate
}
createTagRequest := &types.CreateTagOptions{
Message: params.Message,
Tagger: types.Signature{
Identity: types.Identity{
Name: tagger.Name,
Email: tagger.Email,
},
When: taggerDate,
},
When: taggerDate,
},
}
err = s.adapter.CreateTag(
ctx,
sharedRepo.Path(),
params.Name,
targetCommit.ID.String(),
createTagRequest)
if errors.Is(err, types.ErrAlreadyExists) {
return nil, errors.Conflict("tag '%s' already exists", params.Name)
}
}
err = s.adapter.CreateTag(
ctx,
sharedRepo.Path(),
tagName,
targetCommit.SHA,
createTagRequest)
if err != nil {
return fmt.Errorf("failed to create tag '%s': %w", tagName, err)
}
tag, err = s.adapter.GetAnnotatedTag(ctx, sharedRepo.Path(), tagName)
if err != nil {
return fmt.Errorf("failed to read annotated tag after creation: %w", err)
}
err = sharedRepo.MoveObjects(ctx)
if err != nil {
return fmt.Errorf("failed to move git objects: %w", err)
}
return nil
}()
if err != nil {
return nil, fmt.Errorf("CreateCommitTag: failed to create tag '%s': %w", params.Name, err)
return nil, fmt.Errorf("CreateCommitTag: failed to create tag in shared repository: %w", err)
}
envs := CreateEnvironmentForPush(ctx, params.WriteParams)
if err = sharedRepo.PushTag(ctx, params.Name, true, envs...); err != nil {
return nil, fmt.Errorf("CreateCommitTag: failed to push the tag to remote")
err = s.adapter.UpdateRef(
ctx,
params.EnvVars,
repoPath,
tagRef,
types.NilSHA,
tag.Sha,
)
if err != nil {
return nil, fmt.Errorf("failed to create tag reference: %w", err)
}
var commitTag *CommitTag
if params.Message != "" {
tag, err := s.adapter.GetAnnotatedTag(ctx, repoPath, params.Name)
tag, err = s.adapter.GetAnnotatedTag(ctx, repoPath, params.Name)
if err != nil {
return nil, fmt.Errorf("failed to read annotated tag after creation: %w", err)
}
@ -309,19 +335,11 @@ func (s *Service) CreateCommitTag(ctx context.Context, params *CreateCommitTagPa
commitTag = &CommitTag{
Name: params.Name,
IsAnnotated: false,
SHA: targetCommit.ID.String(),
SHA: targetCommit.SHA,
}
}
// gitea overwrites some commit details in case getCommit(ref) was called with ref being a tag
// To avoid this issue, let's get the commit again using the actual id of the commit
// TODO: can we do this nicer?
rawCommit, err := s.adapter.GetCommit(ctx, repoPath, targetCommit.ID.String())
if err != nil {
return nil, fmt.Errorf("failed to get the raw commit '%s' after tag creation: %w", targetCommit.ID.String(), err)
}
c, err := mapCommit(rawCommit)
c, err := mapCommit(targetCommit)
if err != nil {
return nil, err
}