From 1e1315d5e587d481e73e295dd18e50026af48c79 Mon Sep 17 00:00:00 2001 From: Jeremy Stribling Date: Thu, 11 Oct 2018 16:24:25 -0700 Subject: [PATCH 1/4] object: get object size without reading whole object Signed-off-by: Jeremy Stribling --- plumbing/format/packfile/packfile.go | 16 ++++++ plumbing/storer/object.go | 2 + plumbing/storer/object_test.go | 10 ++++ storage/filesystem/object.go | 75 ++++++++++++++++++++++++++++ storage/filesystem/object_test.go | 24 +++++++++ storage/memory/storage.go | 10 ++++ 6 files changed, 137 insertions(+) diff --git a/plumbing/format/packfile/packfile.go b/plumbing/format/packfile/packfile.go index 852a8344b..dbd5d4bbd 100644 --- a/plumbing/format/packfile/packfile.go +++ b/plumbing/format/packfile/packfile.go @@ -90,6 +90,22 @@ func (p *Packfile) GetByOffset(o int64) (plumbing.EncodedObject, error) { return p.nextObject() } +func (p *Packfile) GetSizeByOffset(o int64) (size int64, err error) { + if _, err := p.s.SeekFromStart(o); err != nil { + if err == io.EOF || isInvalid(err) { + return 0, plumbing.ErrObjectNotFound + } + + return 0, err + } + + h, err := p.nextObjectHeader() + if err != nil { + return 0, err + } + return h.Length, nil +} + func (p *Packfile) nextObjectHeader() (*ObjectHeader, error) { h, err := p.s.NextObjectHeader() p.s.pendingObject = nil diff --git a/plumbing/storer/object.go b/plumbing/storer/object.go index 92aa62918..2ac9b091e 100644 --- a/plumbing/storer/object.go +++ b/plumbing/storer/object.go @@ -40,6 +40,8 @@ type EncodedObjectStorer interface { // HasEncodedObject returns ErrObjNotFound if the object doesn't // exist. If the object does exist, it returns nil. HasEncodedObject(plumbing.Hash) error + // EncodedObjectSize returns the plaintext size of the encoded object. + EncodedObjectSize(plumbing.Hash) (int64, error) } // DeltaObjectStorer is an EncodedObjectStorer that can return delta diff --git a/plumbing/storer/object_test.go b/plumbing/storer/object_test.go index 6b4fe0fb6..bc22f7b06 100644 --- a/plumbing/storer/object_test.go +++ b/plumbing/storer/object_test.go @@ -141,6 +141,16 @@ func (o *MockObjectStorage) HasEncodedObject(h plumbing.Hash) error { return plumbing.ErrObjectNotFound } +func (o *MockObjectStorage) EncodedObjectSize(h plumbing.Hash) ( + size int64, err error) { + for _, o := range o.db { + if o.Hash() == h { + return o.Size(), nil + } + } + return 0, plumbing.ErrObjectNotFound +} + func (o *MockObjectStorage) EncodedObject(t plumbing.ObjectType, h plumbing.Hash) (plumbing.EncodedObject, error) { for _, o := range o.db { if o.Hash() == h { diff --git a/storage/filesystem/object.go b/storage/filesystem/object.go index 68bd140fb..d2ba411f6 100644 --- a/storage/filesystem/object.go +++ b/storage/filesystem/object.go @@ -160,6 +160,81 @@ func (s *ObjectStorage) HasEncodedObject(h plumbing.Hash) (err error) { return nil } +func (s *ObjectStorage) encodedObjectSizeFromUnpacked(h plumbing.Hash) ( + size int64, err error) { + f, err := s.dir.Object(h) + if err != nil { + if os.IsNotExist(err) { + return 0, plumbing.ErrObjectNotFound + } + + return 0, err + } + + r, err := objfile.NewReader(f) + if err != nil { + return 0, err + } + defer ioutil.CheckClose(r, &err) + + _, size, err = r.Header() + return size, err +} + +func (s *ObjectStorage) encodedObjectSizeFromPackfile(h plumbing.Hash) ( + size int64, err error) { + if err := s.requireIndex(); err != nil { + return 0, err + } + + pack, _, offset := s.findObjectInPackfile(h) + if offset == -1 { + return 0, plumbing.ErrObjectNotFound + } + + f, err := s.dir.ObjectPack(pack) + if err != nil { + return 0, err + } + defer ioutil.CheckClose(f, &err) + + idx := s.index[pack] + hash, err := idx.FindHash(offset) + if err == nil { + obj, ok := s.deltaBaseCache.Get(hash) + if ok { + return obj.Size(), nil + } + } + + if err != nil && err != plumbing.ErrObjectNotFound { + return 0, err + } + + var p *packfile.Packfile + if s.deltaBaseCache != nil { + p = packfile.NewPackfileWithCache(idx, s.dir.Fs(), f, s.deltaBaseCache) + } else { + p = packfile.NewPackfile(idx, s.dir.Fs(), f) + } + + return p.GetSizeByOffset(offset) +} + +// EncodedObjectSize returns the plaintext size of the given object, +// without actually reading the full object data from storage. +func (s *ObjectStorage) EncodedObjectSize(h plumbing.Hash) ( + size int64, err error) { + size, err = s.encodedObjectSizeFromUnpacked(h) + if err != nil && err != plumbing.ErrObjectNotFound { + return 0, err + } else if err == nil { + return size, nil + } + + return s.encodedObjectSizeFromPackfile(h) +} + // EncodedObject returns the object with the given hash, by searching for it in // the packfile and the git object directories. func (s *ObjectStorage) EncodedObject(t plumbing.ObjectType, h plumbing.Hash) (plumbing.EncodedObject, error) { diff --git a/storage/filesystem/object_test.go b/storage/filesystem/object_test.go index 407abf29c..3a0cc4f23 100644 --- a/storage/filesystem/object_test.go +++ b/storage/filesystem/object_test.go @@ -83,6 +83,30 @@ func (s *FsSuite) TestGetFromPackfileKeepDescriptors(c *C) { }) } +func (s *FsSuite) TestGetSizeOfObjectFile(c *C) { + fs := fixtures.ByTag(".git").ByTag("unpacked").One().DotGit() + o := NewObjectStorage(dotgit.New(fs), cache.NewObjectLRUDefault()) + + // Get the size of `tree_walker.go`. + expected := plumbing.NewHash("cbd81c47be12341eb1185b379d1c82675aeded6a") + size, err := o.EncodedObjectSize(expected) + c.Assert(err, IsNil) + c.Assert(size, Equals, int64(2412)) +} + +func (s *FsSuite) TestGetSizeFromPackfile(c *C) { + fixtures.Basic().ByTag(".git").Test(c, func(f *fixtures.Fixture) { + fs := f.DotGit() + o := NewObjectStorage(dotgit.New(fs), cache.NewObjectLRUDefault()) + + // Get the size of `binary.jpg`. + expected := plumbing.NewHash("d5c0f4ab811897cadf03aec358ae60d21f91c50d") + size, err := o.EncodedObjectSize(expected) + c.Assert(err, IsNil) + c.Assert(size, Equals, int64(76110)) + }) +} + func (s *FsSuite) TestGetFromPackfileMultiplePackfiles(c *C) { fs := fixtures.ByTag(".git").ByTag("multi-packfile").One().DotGit() o := NewObjectStorage(dotgit.New(fs), cache.NewObjectLRUDefault()) diff --git a/storage/memory/storage.go b/storage/memory/storage.go index 2e3250905..6e1174240 100644 --- a/storage/memory/storage.go +++ b/storage/memory/storage.go @@ -122,6 +122,16 @@ func (o *ObjectStorage) HasEncodedObject(h plumbing.Hash) (err error) { return nil } +func (o *ObjectStorage) EncodedObjectSize(h plumbing.Hash) ( + size int64, err error) { + obj, ok := o.Objects[h] + if !ok { + return 0, plumbing.ErrObjectNotFound + } + + return obj.Size(), nil +} + func (o *ObjectStorage) EncodedObject(t plumbing.ObjectType, h plumbing.Hash) (plumbing.EncodedObject, error) { obj, ok := o.Objects[h] if !ok || (plumbing.AnyObject != t && obj.Type() != t) { From 5c471c34813577a420c1a5af61dd855f70badce1 Mon Sep 17 00:00:00 2001 From: Jeremy Stribling Date: Thu, 11 Oct 2018 16:28:19 -0700 Subject: [PATCH 2/4] tree: add a Size() method for getting plaintext size Without reading the entire object into memory. Signed-off-by: Jeremy Stribling --- plumbing/object/tree.go | 11 +++++++++++ plumbing/object/tree_test.go | 6 ++++++ 2 files changed, 17 insertions(+) diff --git a/plumbing/object/tree.go b/plumbing/object/tree.go index c36a1370f..78d61a1fb 100644 --- a/plumbing/object/tree.go +++ b/plumbing/object/tree.go @@ -87,6 +87,17 @@ func (t *Tree) File(path string) (*File, error) { return NewFile(path, e.Mode, blob), nil } +// Size returns the plaintext size of an object, without reading it +// into memory. +func (t *Tree) Size(path string) (int64, error) { + e, err := t.FindEntry(path) + if err != nil { + return 0, ErrEntryNotFound + } + + return t.s.EncodedObjectSize(e.Hash) +} + // Tree returns the tree identified by the `path` argument. // The path is interpreted as relative to the tree receiver. func (t *Tree) Tree(path string) (*Tree, error) { diff --git a/plumbing/object/tree_test.go b/plumbing/object/tree_test.go index 736642186..889c63ae8 100644 --- a/plumbing/object/tree_test.go +++ b/plumbing/object/tree_test.go @@ -98,6 +98,12 @@ func (s *TreeSuite) TestFileFailsWithExistingTrees(c *C) { c.Assert(err, Equals, ErrFileNotFound) } +func (s *TreeSuite) TestSize(c *C) { + size, err := s.Tree.Size("LICENSE") + c.Assert(err, IsNil) + c.Assert(size, Equals, int64(1072)) +} + func (s *TreeSuite) TestFiles(c *C) { var count int err := s.Tree.Files().ForEach(func(f *File) error { From b3a3f0ab953608fb283392d4431a963622643b4c Mon Sep 17 00:00:00 2001 From: Jeremy Stribling Date: Fri, 12 Oct 2018 13:00:37 -0700 Subject: [PATCH 3/4] filesystem: add a new test for EncodedObjectSize Suggested by taruti. Signed-off-by: Jeremy Stribling --- storage/filesystem/object.go | 4 +--- storage/filesystem/object_test.go | 14 ++++++++++++++ 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/storage/filesystem/object.go b/storage/filesystem/object.go index d2ba411f6..6cd2d4c8e 100644 --- a/storage/filesystem/object.go +++ b/storage/filesystem/object.go @@ -205,9 +205,7 @@ func (s *ObjectStorage) encodedObjectSizeFromPackfile(h plumbing.Hash) ( if ok { return obj.Size(), nil } - } - - if err != nil && err != plumbing.ErrObjectNotFound { + } else if err != nil && err != plumbing.ErrObjectNotFound { return 0, err } diff --git a/storage/filesystem/object_test.go b/storage/filesystem/object_test.go index 3a0cc4f23..4e6bbfb50 100644 --- a/storage/filesystem/object_test.go +++ b/storage/filesystem/object_test.go @@ -107,6 +107,20 @@ func (s *FsSuite) TestGetSizeFromPackfile(c *C) { }) } +func (s *FsSuite) TestGetSizeOfAllObjectFiles(c *C) { + fs := fixtures.ByTag(".git").One().DotGit() + o := NewObjectStorage(dotgit.New(fs), cache.NewObjectLRUDefault()) + + // Get the size of `tree_walker.go`. + err := o.ForEachObjectHash(func(h plumbing.Hash) error { + size, err := o.EncodedObjectSize(h) + c.Assert(err, IsNil) + c.Assert(size, Not(Equals), int64(0)) + return nil + }) + c.Assert(err, IsNil) +} + func (s *FsSuite) TestGetFromPackfileMultiplePackfiles(c *C) { fs := fixtures.ByTag(".git").ByTag("multi-packfile").One().DotGit() o := NewObjectStorage(dotgit.New(fs), cache.NewObjectLRUDefault()) From 6faf286b97ff2e13fbdaf2c6179f8aef36b4498c Mon Sep 17 00:00:00 2001 From: Jeremy Stribling Date: Mon, 15 Oct 2018 10:17:37 -0700 Subject: [PATCH 4/4] packfile: add comment on GetSizeByOffset Suggested by mcuadros. Issue: src-d/go-git#982 Signed-off-by: Jeremy Stribling --- plumbing/format/packfile/packfile.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/plumbing/format/packfile/packfile.go b/plumbing/format/packfile/packfile.go index dbd5d4bbd..0d13066b9 100644 --- a/plumbing/format/packfile/packfile.go +++ b/plumbing/format/packfile/packfile.go @@ -90,6 +90,8 @@ func (p *Packfile) GetByOffset(o int64) (plumbing.EncodedObject, error) { return p.nextObject() } +// GetSizeByOffset retrieves the size of the encoded object from the +// packfile with the given offset. func (p *Packfile) GetSizeByOffset(o int64) (size int64, err error) { if _, err := p.s.SeekFromStart(o); err != nil { if err == io.EOF || isInvalid(err) {