Files
infisical/cli/detect/git.go
2025-07-03 00:09:28 -04:00

217 lines
5.9 KiB
Go

// MIT License
// Copyright (c) 2019 Zachary Rice
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
package detect
import (
"bytes"
"errors"
"fmt"
"net/url"
"os/exec"
"regexp"
"strings"
"time"
"github.com/Infisical/infisical-merge/detect/cmd/scm"
"github.com/gitleaks/go-gitdiff/gitdiff"
"github.com/Infisical/infisical-merge/detect/logging"
"github.com/Infisical/infisical-merge/detect/report"
"github.com/Infisical/infisical-merge/detect/sources"
)
func (d *Detector) DetectGit(cmd *sources.GitCmd, remote *RemoteInfo) ([]report.Finding, error) {
defer cmd.Wait()
var (
diffFilesCh = cmd.DiffFilesCh()
errCh = cmd.ErrCh()
)
// loop to range over both DiffFiles (stdout) and ErrCh (stderr)
for diffFilesCh != nil || errCh != nil {
select {
case gitdiffFile, open := <-diffFilesCh:
if !open {
diffFilesCh = nil
break
}
// skip binary files
if gitdiffFile.IsBinary || gitdiffFile.IsDelete {
continue
}
// Check if commit is allowed
commitSHA := ""
if gitdiffFile.PatchHeader != nil {
commitSHA = gitdiffFile.PatchHeader.SHA
for _, a := range d.Config.Allowlists {
if ok, c := a.CommitAllowed(gitdiffFile.PatchHeader.SHA); ok {
logging.Trace().Str("allowed-commit", c).Msg("skipping commit: global allowlist")
continue
}
}
}
d.addCommit(commitSHA)
d.Sema.Go(func() error {
for _, textFragment := range gitdiffFile.TextFragments {
if textFragment == nil {
return nil
}
fragment := Fragment{
Raw: textFragment.Raw(gitdiff.OpAdd),
CommitSHA: commitSHA,
FilePath: gitdiffFile.NewName,
}
timer := time.AfterFunc(SlowWarningThreshold, func() {
logging.Debug().
Str("commit", commitSHA[:7]).
Str("path", fragment.FilePath).
Msgf("Taking longer than %s to inspect fragment", SlowWarningThreshold.String())
})
for _, finding := range d.Detect(fragment) {
d.AddFinding(augmentGitFinding(remote, finding, textFragment, gitdiffFile))
}
if timer != nil {
timer.Stop()
timer = nil
}
}
return nil
})
case err, open := <-errCh:
if !open {
errCh = nil
break
}
return d.findings, err
}
}
if err := d.Sema.Wait(); err != nil {
return d.findings, err
}
logging.Info().Msgf("%d commits scanned.", len(d.commitMap))
logging.Debug().Msg("Note: this number might be smaller than expected due to commits with no additions")
return d.findings, nil
}
type RemoteInfo struct {
Platform scm.Platform
Url string
}
func NewRemoteInfo(platform scm.Platform, source string) *RemoteInfo {
if platform == scm.NoPlatform {
return &RemoteInfo{Platform: platform}
}
remoteUrl, err := getRemoteUrl(source)
if err != nil {
if strings.Contains(err.Error(), "No remote configured") {
logging.Debug().Msg("skipping finding links: repository has no configured remote.")
platform = scm.NoPlatform
} else {
logging.Error().Err(err).Msg("skipping finding links: unable to parse remote URL")
}
goto End
}
if platform == scm.UnknownPlatform {
platform = platformFromHost(remoteUrl)
if platform == scm.UnknownPlatform {
logging.Info().
Str("host", remoteUrl.Hostname()).
Msg("Unknown SCM platform. Use --platform to include links in findings.")
} else {
logging.Debug().
Str("host", remoteUrl.Hostname()).
Str("platform", platform.String()).
Msg("SCM platform parsed from host")
}
}
End:
var rUrl string
if remoteUrl != nil {
rUrl = remoteUrl.String()
}
return &RemoteInfo{
Platform: platform,
Url: rUrl,
}
}
var sshUrlpat = regexp.MustCompile(`^git@([a-zA-Z0-9.-]+):([\w/.-]+?)(?:\.git)?$`)
func getRemoteUrl(source string) (*url.URL, error) {
// This will return the first remote — typically, "origin".
cmd := exec.Command("git", "ls-remote", "--quiet", "--get-url")
if source != "." {
cmd.Dir = source
}
stdout, err := cmd.Output()
if err != nil {
var exitError *exec.ExitError
if errors.As(err, &exitError) {
return nil, fmt.Errorf("command failed (%d): %w, stderr: %s", exitError.ExitCode(), err, string(bytes.TrimSpace(exitError.Stderr)))
}
return nil, err
}
remoteUrl := string(bytes.TrimSpace(stdout))
if matches := sshUrlpat.FindStringSubmatch(remoteUrl); matches != nil {
remoteUrl = fmt.Sprintf("https://%s/%s", matches[1], matches[2])
}
remoteUrl = strings.TrimSuffix(remoteUrl, ".git")
parsedUrl, err := url.Parse(remoteUrl)
if err != nil {
return nil, fmt.Errorf("unable to parse remote URL: %w", err)
}
// Remove any user info.
parsedUrl.User = nil
return parsedUrl, nil
}
func platformFromHost(u *url.URL) scm.Platform {
switch strings.ToLower(u.Hostname()) {
case "github.com":
return scm.GitHubPlatform
case "gitlab.com":
return scm.GitLabPlatform
case "dev.azure.com", "visualstudio.com":
return scm.AzureDevOpsPlatform
case "bitbucket.org":
return scm.BitBucketPlatform
default:
return scm.UnknownPlatform
}
}