Skip to content
Open
Prev Previous commit
Next Next commit
Added --include-unreachable flag and related processing and output.
  • Loading branch information
Scott Arbeit committed Apr 16, 2025
commit d9da39ee5482e7b63977492f5720c1f8846bbabb
14 changes: 14 additions & 0 deletions git-sizer.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@

"github.com/spf13/pflag"

"github.com/github/git-sizer/counts"
"github.com/github/git-sizer/git"
"github.com/github/git-sizer/internal/refopts"
"github.com/github/git-sizer/isatty"
Expand Down Expand Up @@ -46,6 +47,7 @@
gitconfig: 'sizer.jsonVersion'.
--[no-]progress report (don't report) progress to stderr. Can
be set via gitconfig: 'sizer.progress'.
--include-unreachable include unreachable objects
--version only report the git-sizer version number

Object selection:
Expand Down Expand Up @@ -131,6 +133,7 @@
var progress bool
var version bool
var showRefs bool
var includeUnreachable bool

// Try to open the repository, but it's not an error yet if this
// fails, because the user might only be asking for `--help`.
Expand Down Expand Up @@ -207,6 +210,7 @@
rgb.AddRefopts(flags)

flags.BoolVar(&showRefs, "show-refs", false, "list the references being processed")
flags.BoolVar(&includeUnreachable, "include-unreachable", false, "include unreachable objects")

flags.SortFlags = false

Expand Down Expand Up @@ -344,6 +348,16 @@

historySize.GitDirSize = gitDirSize

// Get unreachable object stats and add to output if requested
if includeUnreachable {
historySize.ShowUnreachable = true
unreachableStats, err := repo.GetUnreachableStats()
if err == nil {
historySize.UnreachableObjectCount = counts.Count32(unreachableStats.Count)

Check failure on line 356 in git-sizer.go

View workflow job for this annotation

GitHub Actions / test (macos-latest)

cannot use counts.Count32(unreachableStats.Count) (type counts.Count32) as type counts.Count64 in assignment

Check failure on line 356 in git-sizer.go

View workflow job for this annotation

GitHub Actions / lint

cannot use counts.Count32(unreachableStats.Count) (value of type counts.Count32) as counts.Count64 value in assignment (typecheck)

Check failure on line 356 in git-sizer.go

View workflow job for this annotation

GitHub Actions / test (windows-latest)

cannot use counts.Count32(unreachableStats.Count) (type counts.Count32) as type counts.Count64 in assignment

Check failure on line 356 in git-sizer.go

View workflow job for this annotation

GitHub Actions / test (ubuntu-latest)

cannot use counts.Count32(unreachableStats.Count) (type counts.Count32) as type counts.Count64 in assignment
historySize.UnreachableObjectSize = counts.Count64(unreachableStats.Size)
}
}

if jsonOutput {
var j []byte
var err error
Expand Down
97 changes: 96 additions & 1 deletion git/git.go
Original file line number Diff line number Diff line change
@@ -1,13 +1,16 @@
package git

import (
"bufio"
"bytes"
"errors"
"fmt"
"io"
"io/fs"
"os"
"os/exec"
"path/filepath"
"strings"
)

// ObjectType represents the type of a Git object ("blob", "tree",
Expand Down Expand Up @@ -157,7 +160,7 @@
return repo.gitDir, nil
}

// GitPath returns that path of a file within the git repository, by
// GitPath returns the path of a file within the git repository, by
// calling `git rev-parse --git-path $relPath`. The returned path is
// relative to the current directory.
func (repo *Repository) GitPath(relPath string) (string, error) {
Expand All @@ -173,3 +176,95 @@
// current directory, we can use it as-is:
return string(bytes.TrimSpace(out)), nil
}

// UnreachableStats holds the count and size of unreachable objects.
type UnreachableStats struct {
Count int64
Size int64
}

// GetUnreachableStats runs 'git fsck --unreachable --no-reflogs --full'
// and returns the count and total size of unreachable objects.
// This implementation collects all OIDs from fsck output and then uses
// batch mode to efficiently retrieve their sizes.
func (repo *Repository) GetUnreachableStats() (UnreachableStats, error) {
// Run git fsck. Using CombinedOutput captures both stdout and stderr.
cmd := exec.Command(repo.gitBin, "-C", repo.gitDir, "fsck", "--unreachable", "--no-reflogs", "--full")

Check failure on line 192 in git/git.go

View workflow job for this annotation

GitHub Actions / lint

G204: Subprocess launched with a potential tainted input or cmd arguments (gosec)
Comment thread
ScottArbeit marked this conversation as resolved.
Outdated
cmd.Env = os.Environ()
output, err := cmd.CombinedOutput()
if err != nil {
fmt.Fprintln(os.Stderr)
fmt.Fprintln(os.Stderr, "An error occurred trying to process unreachable objects.")
os.Stderr.Write(output)

Check failure on line 198 in git/git.go

View workflow job for this annotation

GitHub Actions / lint

Error return value of `os.Stderr.Write` is not checked (errcheck)
fmt.Fprintln(os.Stderr)
return UnreachableStats{Count: 0, Size: 0}, err
}

var oids []string
count := int64(0)
for _, line := range bytes.Split(output, []byte{'\n'}) {
fields := bytes.Fields(line)
// Expected line format: "unreachable <type> <oid> ..."
if len(fields) >= 3 && string(fields[0]) == "unreachable" {
count++
oid := string(fields[2])
oids = append(oids, oid)
}
}

// Retrieve the total size using batch mode.
totalSize, err := repo.getTotalSizeFromOids(oids)
if err != nil {
return UnreachableStats{}, fmt.Errorf("failed to get sizes via batch mode: %w", err)
}

return UnreachableStats{Count: count, Size: totalSize}, nil
}

// getTotalSizeFromOids uses 'git cat-file --batch-check' to retrieve sizes for
// the provided OIDs. It writes each OID to stdin and reads back lines in the
// format: "<oid> <type> <size>".
func (repo *Repository) getTotalSizeFromOids(oids []string) (int64, error) {
cmd := exec.Command(repo.gitBin, "-C", repo.gitDir, "cat-file", "--batch-check")

Check failure on line 228 in git/git.go

View workflow job for this annotation

GitHub Actions / lint

G204: Subprocess launched with a potential tainted input or cmd arguments (gosec)
stdinPipe, err := cmd.StdinPipe()
if err != nil {
return 0, fmt.Errorf("failed to get stdin pipe: %w", err)
}
stdoutPipe, err := cmd.StdoutPipe()
if err != nil {
return 0, fmt.Errorf("failed to get stdout pipe: %w", err)
}

if err := cmd.Start(); err != nil {
return 0, fmt.Errorf("failed to start git cat-file batch: %w", err)
}

// Write all OIDs to the batch process.
go func() {
defer stdinPipe.Close()

Check failure on line 244 in git/git.go

View workflow job for this annotation

GitHub Actions / lint

Error return value of `stdinPipe.Close` is not checked (errcheck)
for _, oid := range oids {
io.WriteString(stdinPipe, oid+"\n")

Check failure on line 246 in git/git.go

View workflow job for this annotation

GitHub Actions / lint

Error return value of `io.WriteString` is not checked (errcheck)
}
}()

var totalSize int64
scanner := bufio.NewScanner(stdoutPipe)
// Each line is expected to be: "<oid> <type> <size>"
for scanner.Scan() {
parts := strings.Fields(scanner.Text())
if len(parts) == 3 {
var size int64
fmt.Sscanf(parts[2], "%d", &size)

Check failure on line 257 in git/git.go

View workflow job for this annotation

GitHub Actions / lint

G104: Errors unhandled. (gosec)
totalSize += size
} else {
return 0, fmt.Errorf("unexpected output format: %s", scanner.Text())
}
}
if err := scanner.Err(); err != nil {
return 0, fmt.Errorf("error reading git cat-file output: %w", err)
}
if err := cmd.Wait(); err != nil {
return 0, fmt.Errorf("git cat-file batch process error: %w", err)
}
return totalSize, nil
}
20 changes: 16 additions & 4 deletions sizes/output.go
Original file line number Diff line number Diff line change
Expand Up @@ -489,8 +489,7 @@ func (s *HistorySize) contents(refGroups []RefGroup) tableContents {
rgis = append(rgis, rgi.Indented(indent))
}

return S(
"",
sections := []tableContents{
S(
"Repository statistics",
S(
Expand Down Expand Up @@ -532,7 +531,6 @@ func (s *HistorySize) contents(refGroups []RefGroup) tableContents {
"The actual on-disk size of the .git directory",
nil, s.GitDirSize, binary, "B", 1e9),
),

S(
"Annotated tags",
I("uniqueTagCount", "Count",
Expand Down Expand Up @@ -610,5 +608,19 @@ func (s *HistorySize) contents(refGroups []RefGroup) tableContents {
"The maximum number of submodules in any checkout",
s.MaxExpandedSubmoduleCountTree, s.MaxExpandedSubmoduleCount, metric, "", 100),
),
)
}

if s.ShowUnreachable {
sections = append(sections, S(
"Unreachable objects",
I("unreachableObjectCount", "Count",
"The total number of unreachable objects in the repository",
nil, s.UnreachableObjectCount, metric, "", 1e7),
I("unreachableObjectSize", "Uncompressed total size",
"The total size of unreachable objects in the repository",
nil, s.UnreachableObjectSize, binary, "B", 1e9),
))
}

return S("", sections...)
}
8 changes: 8 additions & 0 deletions sizes/sizes.go
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,14 @@ type HistorySize struct {

// The actual size of the .git directory on disk.
GitDirSize counts.Count64 `json:"git_dir_size"`

// The total number of unreachable objects in the repository.
UnreachableObjectCount counts.Count64 `json:"unreachable_object_count"`

// The total size of unreachable objects in the repository.
UnreachableObjectSize counts.Count64 `json:"unreachable_object_size"`

ShowUnreachable bool `json:"-"`
}

// Convenience function: forget `*path` if it is non-nil and overwrite
Expand Down
Loading