Files

308 lines
6.1 KiB
Go

package gitnaturalapi
import (
"bytes"
"compress/zlib"
"crypto/sha1"
"encoding/binary"
"encoding/hex"
"fmt"
"io"
)
const (
ObjectTypeCommit = 1
ObjectTypeTree = 2
ObjectTypeBlob = 3
ObjectTypeTag = 4
ObjectTypeOfsDelta = 6
ObjectTypeRefDelta = 7
)
type ParsedObject struct {
Type int
Size int
Data []byte
Offset int
Hash string
}
type PackfileResult struct {
Version int
Count int
Objects map[string]*ParsedObject
}
func ParsePackfile(data []byte) (*PackfileResult, error) {
if len(data) < 12 {
return nil, fmt.Errorf("packfile too short")
}
header := string(data[0:4])
if header != "PACK" {
return nil, fmt.Errorf("invalid packfile header: %s", header)
}
version := int(binary.BigEndian.Uint32(data[4:8]))
if version != 2 {
return nil, fmt.Errorf("unsupported packfile version: %d", version)
}
count := int(binary.BigEndian.Uint32(data[8:12]))
objects := make(map[string]*ParsedObject)
pos := 12
for i := 0; i < count; i++ {
obj, newPos, err := parsePackObject(data, pos, objects)
if err != nil {
return nil, fmt.Errorf("error parsing object %d/%d: %w", i+1, count, err)
}
objects[obj.Hash] = obj
pos = newPos
}
return &PackfileResult{
Version: version,
Count: count,
Objects: objects,
}, nil
}
func parsePackObject(data []byte, startPos int, objects map[string]*ParsedObject) (*ParsedObject, int, error) {
pos := startPos
offset := startPos
b := data[pos]
pos++
objType := int((b >> 4) & 0x07)
size := int(b & 0x0f)
shift := 4
for b&0x80 != 0 {
b = data[pos]
pos++
size |= int(b&0x7f) << shift
shift += 7
}
var objData []byte
var err error
switch objType {
case ObjectTypeOfsDelta:
var actualType int
objData, pos, actualType, err = parseOfsDelta(data, pos, offset, objects)
if err != nil {
return nil, 0, err
}
objType = actualType
case ObjectTypeRefDelta:
var actualType int
objData, pos, actualType, err = parseRefDelta(data, pos, objects)
if err != nil {
return nil, 0, err
}
objType = actualType
case ObjectTypeCommit, ObjectTypeTree, ObjectTypeBlob, ObjectTypeTag:
objData, pos, err = zlibDecompress(data, pos)
if err != nil {
return nil, 0, err
}
default:
return nil, 0, fmt.Errorf("unknown object type: %d", objType)
}
hash, err := computeObjectHash(objType, objData)
if err != nil {
return nil, 0, err
}
return &ParsedObject{
Type: objType,
Size: size,
Data: objData,
Offset: offset,
Hash: hash,
}, pos, nil
}
func parseOfsDelta(data []byte, pos int, currentOffset int, objects map[string]*ParsedObject) ([]byte, int, int, error) {
b := data[pos]
pos++
offset := int(b & 0x7f)
for b&0x80 != 0 {
offset++
offset <<= 7
b = data[pos]
pos++
offset += int(b & 0x7f)
}
baseOffset := currentOffset - offset
baseObject, _, err := parsePackObject(data, baseOffset, objects)
if err != nil {
return nil, 0, 0, fmt.Errorf("failed to parse base object at offset %d: %w", baseOffset, err)
}
delta, newPos, err := zlibDecompress(data, pos)
if err != nil {
return nil, 0, 0, err
}
fullObj, err := applyDelta(delta, baseObject.Data)
if err != nil {
return nil, 0, 0, err
}
return fullObj, newPos, baseObject.Type, nil
}
func parseRefDelta(data []byte, pos int, objects map[string]*ParsedObject) ([]byte, int, int, error) {
baseName := hex.EncodeToString(data[pos : pos+20])
pos += 20
delta, newPos, err := zlibDecompress(data, pos)
if err != nil {
return nil, 0, 0, err
}
baseObject, ok := objects[baseName]
if !ok {
return nil, 0, 0, fmt.Errorf("base object not found with name %s", baseName)
}
fullObj, err := applyDelta(delta, baseObject.Data)
if err != nil {
return nil, 0, 0, err
}
return fullObj, newPos, baseObject.Type, nil
}
func computeObjectHash(objType int, data []byte) (string, error) {
var typeStr string
switch objType {
case ObjectTypeCommit:
typeStr = "commit"
case ObjectTypeTree:
typeStr = "tree"
case ObjectTypeBlob:
typeStr = "blob"
case ObjectTypeTag:
typeStr = "tag"
default:
return "", fmt.Errorf("unknown type when computing object hash: %d", objType)
}
header := fmt.Sprintf("%s %d\x00", typeStr, len(data))
h := sha1.New()
h.Write([]byte(header))
h.Write(data)
return hex.EncodeToString(h.Sum(nil)), nil
}
func applyDelta(delta []byte, base []byte) ([]byte, error) {
pos := 0
_, bytesRead := readVariableInt(delta, pos)
pos += bytesRead
resultSize, bytesRead := readVariableInt(delta, pos)
pos += bytesRead
result := make([]byte, resultSize)
resultOffset := 0
for pos < len(delta) {
cmd := delta[pos]
pos++
if cmd&0x80 != 0 {
var copyOffset, copySize int
if cmd&0x01 != 0 {
copyOffset = int(delta[pos])
pos++
}
if cmd&0x02 != 0 {
copyOffset |= int(delta[pos]) << 8
pos++
}
if cmd&0x04 != 0 {
copyOffset |= int(delta[pos]) << 16
pos++
}
if cmd&0x08 != 0 {
copyOffset |= int(delta[pos]) << 24
pos++
}
if cmd&0x10 != 0 {
copySize = int(delta[pos])
pos++
}
if cmd&0x20 != 0 {
copySize |= int(delta[pos]) << 8
pos++
}
if cmd&0x40 != 0 {
copySize |= int(delta[pos]) << 16
pos++
}
if copySize == 0 {
copySize = 0x10000
}
copy(result[resultOffset:], base[copyOffset:copyOffset+copySize])
resultOffset += copySize
} else if cmd > 0 {
copy(result[resultOffset:], delta[pos:pos+int(cmd)])
pos += int(cmd)
resultOffset += int(cmd)
} else {
return nil, fmt.Errorf("invalid delta command")
}
}
return result, nil
}
func zlibDecompress(data []byte, pos int) ([]byte, int, error) {
br := bytes.NewReader(data[pos:])
r, err := zlib.NewReader(br)
if err != nil {
return nil, 0, fmt.Errorf("zlib init error: %w", err)
}
decompressed, err := io.ReadAll(r)
r.Close()
if err != nil {
return nil, 0, fmt.Errorf("zlib decompress error: %w", err)
}
newPos := len(data) - br.Len()
return decompressed, newPos, nil
}
func readVariableInt(data []byte, pos int) (int, int) {
value := 0
shift := 0
bytesRead := 0
for {
b := data[pos]
pos++
bytesRead++
value |= int(b&0x7f) << shift
shift += 7
if b&0x80 == 0 {
break
}
}
return value, bytesRead
}