Under-the-hood of Git

1: Overview

> s='abc'
> printf "$s" | git hash-object --stdin
> printf "blob $(printf "$s" | wc -c)\0$s" | sha1sum

Components

> ls .git/HEAD ref: refs/heads/master > ls .git/refs/heads/master 2e1803ee08fa9aa36e4c5918220e283380a4c385

2: Building our own Git

// imports excluded, see linked repo for details
const init = () => {
const workingDirectory = workingDir();
const files = glob.sync("**/*.txt", { cwd: workingDirectory }); // (1)

const indexData = files.reduce((acc, curr) => { // (2)
const hash = hashFileStats(curr);
acc[curr] = {
cwd: hash,
staging: "",
repository: "",
};
return acc;
}, {});

fs.mkdirSync(`${workingDirectory}/.repo`); // (3)
updateIndex(indexData);
fs.writeFileSync(`${workingDirectory}/.repo/HEAD`); // (4)
fs.mkdirSync(`${workingDirectory}/.repo/objects`); // (4)
}
// imports excluded, see linked repo for details
const status = () => {
const indexData = getIndexData(); // (1)

const notStaged = [];
const notComitted = [];
const updatedIndexData = Object.keys(indexData).reduce((acc, curr) => { // (2)
const hash = hashFileStats(curr); // (2a)
if (hash !== indexData[curr].cwd) { // (2b)
acc[curr] = {
cwd: hash,
staging: indexData[curr].staging,
repository: indexData[curr].repository,
};
notStaged.push(curr);
} else {
if (indexData[curr].cwd !== indexData[curr].staging) {
notStaged.push(curr); // (2c)
} else if (indexData[curr].staging !== indexData[curr].repository) {
notComitted.push(curr); // (2d)
}
acc[curr] = indexData[curr];
}

return acc;
}, {});

updateIndex(updatedIndexData); // (3)

console.log("\nChanged locally but not staged:");
notStaged.map((message) => console.log(`- ${message}`)); // (4)
console.log("\nStaged but not comitted:");
notComitted.map((message) => console.log(`- ${message}`)); // (5)
}
// imports excluded, see linked repo for details
const add = () => {
const workingDirectory = workingDir();

const files = process.argv.slice(2); // (1)

const indexData = getIndexData();

console.log("[add] - write blob objects");
const updatedFiles = files.map((file) => {
const blobHash = hashBlobContentsInFile(file); // (2)
const blobDir = blobHash.substring(0, 2);
const blobObject = blobHash.substring(2);

// TODO - check exists first - for re-adding file with earlier contents
fs.mkdirSync(`${workingDirectory}/.repo/objects/${blobDir}`);

const blobCompressed = compressBlobContentsInFile(file); // (3)
fs.writeFileSync(
`${workingDirectory}/.repo/objects/${blobDir}/${blobObject}`,
blobCompressed
);

const hash = hashFileStats(file); // (4)

return {
file,
hash,
};
});

const updatedIndexData = Object.keys(indexData).reduce((acc, curr) => { // (5)
if (!updatedFiles.find((item) => item.file === curr)) { // (5a)
acc[curr] = {
cwd: indexData[curr].cwd,
staging: indexData[curr].staging,
repository: indexData[curr].repository,
};
return acc;
}
acc[curr] = {
cwd: indexData[curr].cwd,
staging: updatedFiles.find((item) => item.file === curr).hash, // (5b)
repository: indexData[curr].repository,
};
return acc;
}, {});

updateIndex(updatedIndexData); // (6)
}
// imports excluded, see linked repo for details

// array of dir (name) and files (children), ordered by bottom-up
const _buildTree = (paths) => {
return paths.reduce(
(parent, path, key) => {
path.split("/").reduce((r, name, i, { length }) => {
if (!r.children) {
r.children = [];
}
let temp = r.children.find((q) => q.name === name);
if (!temp) {
temp = { name };
if (i + 1 === length) {
temp.type = "blob";
temp.hash = hashBlobContentsInFile(path);
} else {
temp.type = "tree";
}
r.children.push(temp);
}
return temp;
}, parent);

return parent;
},
{ children: [] }
).children;
};

const commit = () => {
const workingDirectory = workingDir();
const indexData = getIndexData();
// TODO - if comitted already then dont recreate tree?? PROB chek first
const paths = Object.keys(indexData).filter( // (1)
(item) => indexData[item].staging || indexData[item].repository
);

const rootTrees = _buildTree(paths); // (2)

const flattenedTrees = rootTrees.reverse().reduce((acc, curr, key) => { // (3)
if (curr.children) {
const hash = createTreeObject(curr.children); // (3a)
const clone = Object.assign({}, curr);
delete clone.children;
clone.hash = hash;
acc.push(curr.children); // (3b)
acc.push([clone]);
} else {
acc[key].push(curr); (3c)
}
return acc;
}, []);

const rootTree = flattenedTrees.reverse()[0];
const treeForCommit = createTreeObject(rootTree); // (4)

const parent = getParentCommit();

const commit = { // (5)
tree: treeForCommit,
parent: parent === "undefined" ? null : parent,
author: "CRAIG", // hardcoded for now
committor: "CRAIG",
message: "Initial commit",
};

const commitHash = createCommitObject(commit); // (6)

const updatedIndexData = Object.keys(indexData).reduce((acc, curr) => { // (7)
const { cwd, staging, repository } = indexData[curr];
let updatedRepo = repository;
if (staging !== repository) { (7a)
updatedRepo = staging;
}
acc[curr] = {
cwd: indexData[curr].cwd,
staging: indexData[curr].staging,
repository: updatedRepo,
};
return acc;
}, {});
updateIndex(updatedIndexData);

fs.writeFileSync(`${workingDirectory}/.repo/HEAD`, commitHash); // (8)
}
import fs from "fs";
import crypto from "crypto";
import zlib from "zlib";

export const workingDir = () => {
const cwd = process.cwd();
return cwd + "/src";
};

export const sha1 = (object) => {
const string = JSON.stringify(object);
return crypto.createHash("sha1").update(string).digest("hex");
};

const getFilePath = (file) => {
const workingDirectory = workingDir();
return `${workingDirectory}/${file}`;
};
const getContentsInFile = (file) => {
const path = getFilePath(file);
return fs.readFileSync(path, { encoding: "utf-8" });
};

export const compressBlobContentsInFile = (file) => {
const contents = getContentsInFile(file);
return zlib.deflateSync(contents);
};

// always same based on contents
export const hashBlobContentsInFile = (file) => {
const contents = getContentsInFile(file);
return sha1({ type: "blob", contents });
};

// different based on midified time
// remove atime + atimeMs which are different each stat() call
export const hashFileStats = (file) => {
const path = getFilePath(file);
const contents = fs.statSync(path);
delete contents["atime"];
delete contents["atimeMs"];
return sha1(contents);
};

export const getIndexData = () => {
const workingDirectory = workingDir();
return JSON.parse(
fs.readFileSync(`${workingDirectory}/.repo/index`, { encoding: "utf-8" })
);
};

export const updateIndex = (indexData) => {
const workingDirectory = workingDir();
fs.writeFileSync(
`${workingDirectory}/.repo/index`,
JSON.stringify(indexData)
);
};

// hash contents, create tree, return hash
export const createTreeObject = (contents) => {
const contentsClone = Object.assign([], contents);
const flatContents = contentsClone.map((item) => {
delete item.children; // dont need full children depth
return item;
});
const workingDirectory = workingDir();
const stringContents = JSON.stringify(flatContents);
const treeHash = sha1(stringContents);
const treeDir = treeHash.substring(0, 2);
const treeObject = treeHash.substring(2);
const treeCompressed = zlib.deflateSync(stringContents);
// create tree object
fs.mkdirSync(`${workingDirectory}/.repo/objects/${treeDir}`);
fs.writeFileSync(
`${workingDirectory}/.repo/objects/${treeDir}/${treeObject}`,
treeCompressed
);
return treeHash;
};

export const createCommitObject = (contents) => {
const workingDirectory = workingDir();
const stringContents = JSON.stringify(contents);
const commitHash = sha1(stringContents);
const commitDir = commitHash.substring(0, 2);
const commitObject = commitHash.substring(2);
const commitCompressed = zlib.deflateSync(stringContents);
// create commit object
fs.mkdirSync(`${workingDirectory}/.repo/objects/${commitDir}`);
fs.writeFileSync(
`${workingDirectory}/.repo/objects/${commitDir}/${commitObject}`,
commitCompressed
);
return commitHash;
};

export const getParentCommit = () => {
const workingDirectory = workingDir();
return fs.readFileSync(`${workingDirectory}/.repo/HEAD`, {
encoding: "utf-8",
});
};

What have we missed?

--

--

Get the Medium app

A button that says 'Download on the App Store', and if clicked it will lead you to the iOS App store
A button that says 'Get it on, Google Play', and if clicked it will lead you to the Google Play store