2063 lines
73 KiB
HTML
2063 lines
73 KiB
HTML
<?xml version="1.0" encoding="UTF-8"?>
|
|
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"
|
|
"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
|
|
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">
|
|
<head>
|
|
<meta http-equiv="Content-Type" content="application/xhtml+xml; charset=UTF-8" />
|
|
<meta name="generator" content="AsciiDoc 10.2.0" />
|
|
<title></title>
|
|
<style type="text/css">
|
|
/* Shared CSS for AsciiDoc xhtml11 and html5 backends */
|
|
|
|
/* Default font. */
|
|
body {
|
|
font-family: Georgia,serif;
|
|
}
|
|
|
|
/* Title font. */
|
|
h1, h2, h3, h4, h5, h6,
|
|
div.title, caption.title,
|
|
thead, p.table.header,
|
|
#toctitle,
|
|
#author, #revnumber, #revdate, #revremark,
|
|
#footer {
|
|
font-family: Arial,Helvetica,sans-serif;
|
|
}
|
|
|
|
body {
|
|
margin: 1em 5% 1em 5%;
|
|
}
|
|
|
|
a {
|
|
color: blue;
|
|
text-decoration: underline;
|
|
}
|
|
a:visited {
|
|
color: fuchsia;
|
|
}
|
|
|
|
em {
|
|
font-style: italic;
|
|
color: navy;
|
|
}
|
|
|
|
strong {
|
|
font-weight: bold;
|
|
color: #083194;
|
|
}
|
|
|
|
h1, h2, h3, h4, h5, h6 {
|
|
color: #527bbd;
|
|
margin-top: 1.2em;
|
|
margin-bottom: 0.5em;
|
|
line-height: 1.3;
|
|
}
|
|
|
|
h1, h2, h3 {
|
|
border-bottom: 2px solid silver;
|
|
}
|
|
h2 {
|
|
padding-top: 0.5em;
|
|
}
|
|
h3 {
|
|
float: left;
|
|
}
|
|
h3 + * {
|
|
clear: left;
|
|
}
|
|
h5 {
|
|
font-size: 1.0em;
|
|
}
|
|
|
|
div.sectionbody {
|
|
margin-left: 0;
|
|
}
|
|
|
|
hr {
|
|
border: 1px solid silver;
|
|
}
|
|
|
|
p {
|
|
margin-top: 0.5em;
|
|
margin-bottom: 0.5em;
|
|
}
|
|
|
|
ul, ol, li > p {
|
|
margin-top: 0;
|
|
}
|
|
ul > li { color: #aaa; }
|
|
ul > li > * { color: black; }
|
|
|
|
.monospaced, code, pre {
|
|
font-family: "Courier New", Courier, monospace;
|
|
font-size: inherit;
|
|
color: navy;
|
|
padding: 0;
|
|
margin: 0;
|
|
}
|
|
pre {
|
|
white-space: pre-wrap;
|
|
}
|
|
|
|
#author {
|
|
color: #527bbd;
|
|
font-weight: bold;
|
|
font-size: 1.1em;
|
|
}
|
|
#email {
|
|
}
|
|
#revnumber, #revdate, #revremark {
|
|
}
|
|
|
|
#footer {
|
|
font-size: small;
|
|
border-top: 2px solid silver;
|
|
padding-top: 0.5em;
|
|
margin-top: 4.0em;
|
|
}
|
|
#footer-text {
|
|
float: left;
|
|
padding-bottom: 0.5em;
|
|
}
|
|
#footer-badges {
|
|
float: right;
|
|
padding-bottom: 0.5em;
|
|
}
|
|
|
|
#preamble {
|
|
margin-top: 1.5em;
|
|
margin-bottom: 1.5em;
|
|
}
|
|
div.imageblock, div.exampleblock, div.verseblock,
|
|
div.quoteblock, div.literalblock, div.listingblock, div.sidebarblock,
|
|
div.admonitionblock {
|
|
margin-top: 1.0em;
|
|
margin-bottom: 1.5em;
|
|
}
|
|
div.admonitionblock {
|
|
margin-top: 2.0em;
|
|
margin-bottom: 2.0em;
|
|
margin-right: 10%;
|
|
color: #606060;
|
|
}
|
|
|
|
div.content { /* Block element content. */
|
|
padding: 0;
|
|
}
|
|
|
|
/* Block element titles. */
|
|
div.title, caption.title {
|
|
color: #527bbd;
|
|
font-weight: bold;
|
|
text-align: left;
|
|
margin-top: 1.0em;
|
|
margin-bottom: 0.5em;
|
|
}
|
|
div.title + * {
|
|
margin-top: 0;
|
|
}
|
|
|
|
td div.title:first-child {
|
|
margin-top: 0.0em;
|
|
}
|
|
div.content div.title:first-child {
|
|
margin-top: 0.0em;
|
|
}
|
|
div.content + div.title {
|
|
margin-top: 0.0em;
|
|
}
|
|
|
|
div.sidebarblock > div.content {
|
|
background: #ffffee;
|
|
border: 1px solid #dddddd;
|
|
border-left: 4px solid #f0f0f0;
|
|
padding: 0.5em;
|
|
}
|
|
|
|
div.listingblock > div.content {
|
|
border: 1px solid #dddddd;
|
|
border-left: 5px solid #f0f0f0;
|
|
background: #f8f8f8;
|
|
padding: 0.5em;
|
|
}
|
|
|
|
div.quoteblock, div.verseblock {
|
|
padding-left: 1.0em;
|
|
margin-left: 1.0em;
|
|
margin-right: 10%;
|
|
border-left: 5px solid #f0f0f0;
|
|
color: #888;
|
|
}
|
|
|
|
div.quoteblock > div.attribution {
|
|
padding-top: 0.5em;
|
|
text-align: right;
|
|
}
|
|
|
|
div.verseblock > pre.content {
|
|
font-family: inherit;
|
|
font-size: inherit;
|
|
}
|
|
div.verseblock > div.attribution {
|
|
padding-top: 0.75em;
|
|
text-align: left;
|
|
}
|
|
/* DEPRECATED: Pre version 8.2.7 verse style literal block. */
|
|
div.verseblock + div.attribution {
|
|
text-align: left;
|
|
}
|
|
|
|
div.admonitionblock .icon {
|
|
vertical-align: top;
|
|
font-size: 1.1em;
|
|
font-weight: bold;
|
|
text-decoration: underline;
|
|
color: #527bbd;
|
|
padding-right: 0.5em;
|
|
}
|
|
div.admonitionblock td.content {
|
|
padding-left: 0.5em;
|
|
border-left: 3px solid #dddddd;
|
|
}
|
|
|
|
div.exampleblock > div.content {
|
|
border-left: 3px solid #dddddd;
|
|
padding-left: 0.5em;
|
|
}
|
|
|
|
div.imageblock div.content { padding-left: 0; }
|
|
span.image img { border-style: none; vertical-align: text-bottom; }
|
|
a.image:visited { color: white; }
|
|
|
|
dl {
|
|
margin-top: 0.8em;
|
|
margin-bottom: 0.8em;
|
|
}
|
|
dt {
|
|
margin-top: 0.5em;
|
|
margin-bottom: 0;
|
|
font-style: normal;
|
|
color: navy;
|
|
}
|
|
dd > *:first-child {
|
|
margin-top: 0.1em;
|
|
}
|
|
|
|
ul, ol {
|
|
list-style-position: outside;
|
|
}
|
|
ol.arabic {
|
|
list-style-type: decimal;
|
|
}
|
|
ol.loweralpha {
|
|
list-style-type: lower-alpha;
|
|
}
|
|
ol.upperalpha {
|
|
list-style-type: upper-alpha;
|
|
}
|
|
ol.lowerroman {
|
|
list-style-type: lower-roman;
|
|
}
|
|
ol.upperroman {
|
|
list-style-type: upper-roman;
|
|
}
|
|
|
|
div.compact ul, div.compact ol,
|
|
div.compact p, div.compact p,
|
|
div.compact div, div.compact div {
|
|
margin-top: 0.1em;
|
|
margin-bottom: 0.1em;
|
|
}
|
|
|
|
tfoot {
|
|
font-weight: bold;
|
|
}
|
|
td > div.verse {
|
|
white-space: pre;
|
|
}
|
|
|
|
div.hdlist {
|
|
margin-top: 0.8em;
|
|
margin-bottom: 0.8em;
|
|
}
|
|
div.hdlist tr {
|
|
padding-bottom: 15px;
|
|
}
|
|
dt.hdlist1.strong, td.hdlist1.strong {
|
|
font-weight: bold;
|
|
}
|
|
td.hdlist1 {
|
|
vertical-align: top;
|
|
font-style: normal;
|
|
padding-right: 0.8em;
|
|
color: navy;
|
|
}
|
|
td.hdlist2 {
|
|
vertical-align: top;
|
|
}
|
|
div.hdlist.compact tr {
|
|
margin: 0;
|
|
padding-bottom: 0;
|
|
}
|
|
|
|
.comment {
|
|
background: yellow;
|
|
}
|
|
|
|
.footnote, .footnoteref {
|
|
font-size: 0.8em;
|
|
}
|
|
|
|
span.footnote, span.footnoteref {
|
|
vertical-align: super;
|
|
}
|
|
|
|
#footnotes {
|
|
margin: 20px 0 20px 0;
|
|
padding: 7px 0 0 0;
|
|
}
|
|
|
|
#footnotes div.footnote {
|
|
margin: 0 0 5px 0;
|
|
}
|
|
|
|
#footnotes hr {
|
|
border: none;
|
|
border-top: 1px solid silver;
|
|
height: 1px;
|
|
text-align: left;
|
|
margin-left: 0;
|
|
width: 20%;
|
|
min-width: 100px;
|
|
}
|
|
|
|
div.colist td {
|
|
padding-right: 0.5em;
|
|
padding-bottom: 0.3em;
|
|
vertical-align: top;
|
|
}
|
|
div.colist td img {
|
|
margin-top: 0.3em;
|
|
}
|
|
|
|
@media print {
|
|
#footer-badges { display: none; }
|
|
}
|
|
|
|
#toc {
|
|
margin-bottom: 2.5em;
|
|
}
|
|
|
|
#toctitle {
|
|
color: #527bbd;
|
|
font-size: 1.1em;
|
|
font-weight: bold;
|
|
margin-top: 1.0em;
|
|
margin-bottom: 0.1em;
|
|
}
|
|
|
|
div.toclevel0, div.toclevel1, div.toclevel2, div.toclevel3, div.toclevel4 {
|
|
margin-top: 0;
|
|
margin-bottom: 0;
|
|
}
|
|
div.toclevel2 {
|
|
margin-left: 2em;
|
|
font-size: 0.9em;
|
|
}
|
|
div.toclevel3 {
|
|
margin-left: 4em;
|
|
font-size: 0.9em;
|
|
}
|
|
div.toclevel4 {
|
|
margin-left: 6em;
|
|
font-size: 0.9em;
|
|
}
|
|
|
|
span.aqua { color: aqua; }
|
|
span.black { color: black; }
|
|
span.blue { color: blue; }
|
|
span.fuchsia { color: fuchsia; }
|
|
span.gray { color: gray; }
|
|
span.green { color: green; }
|
|
span.lime { color: lime; }
|
|
span.maroon { color: maroon; }
|
|
span.navy { color: navy; }
|
|
span.olive { color: olive; }
|
|
span.purple { color: purple; }
|
|
span.red { color: red; }
|
|
span.silver { color: silver; }
|
|
span.teal { color: teal; }
|
|
span.white { color: white; }
|
|
span.yellow { color: yellow; }
|
|
|
|
span.aqua-background { background: aqua; }
|
|
span.black-background { background: black; }
|
|
span.blue-background { background: blue; }
|
|
span.fuchsia-background { background: fuchsia; }
|
|
span.gray-background { background: gray; }
|
|
span.green-background { background: green; }
|
|
span.lime-background { background: lime; }
|
|
span.maroon-background { background: maroon; }
|
|
span.navy-background { background: navy; }
|
|
span.olive-background { background: olive; }
|
|
span.purple-background { background: purple; }
|
|
span.red-background { background: red; }
|
|
span.silver-background { background: silver; }
|
|
span.teal-background { background: teal; }
|
|
span.white-background { background: white; }
|
|
span.yellow-background { background: yellow; }
|
|
|
|
span.big { font-size: 2em; }
|
|
span.small { font-size: 0.6em; }
|
|
|
|
span.underline { text-decoration: underline; }
|
|
span.overline { text-decoration: overline; }
|
|
span.line-through { text-decoration: line-through; }
|
|
|
|
div.unbreakable { page-break-inside: avoid; }
|
|
|
|
|
|
/*
|
|
* xhtml11 specific
|
|
*
|
|
* */
|
|
|
|
div.tableblock {
|
|
margin-top: 1.0em;
|
|
margin-bottom: 1.5em;
|
|
}
|
|
div.tableblock > table {
|
|
border: 3px solid #527bbd;
|
|
}
|
|
thead, p.table.header {
|
|
font-weight: bold;
|
|
color: #527bbd;
|
|
}
|
|
p.table {
|
|
margin-top: 0;
|
|
}
|
|
/* Because the table frame attribute is overridden by CSS in most browsers. */
|
|
div.tableblock > table[frame="void"] {
|
|
border-style: none;
|
|
}
|
|
div.tableblock > table[frame="hsides"] {
|
|
border-left-style: none;
|
|
border-right-style: none;
|
|
}
|
|
div.tableblock > table[frame="vsides"] {
|
|
border-top-style: none;
|
|
border-bottom-style: none;
|
|
}
|
|
|
|
|
|
/*
|
|
* html5 specific
|
|
*
|
|
* */
|
|
|
|
table.tableblock {
|
|
margin-top: 1.0em;
|
|
margin-bottom: 1.5em;
|
|
}
|
|
thead, p.tableblock.header {
|
|
font-weight: bold;
|
|
color: #527bbd;
|
|
}
|
|
p.tableblock {
|
|
margin-top: 0;
|
|
}
|
|
table.tableblock {
|
|
border-width: 3px;
|
|
border-spacing: 0px;
|
|
border-style: solid;
|
|
border-color: #527bbd;
|
|
border-collapse: collapse;
|
|
}
|
|
th.tableblock, td.tableblock {
|
|
border-width: 1px;
|
|
padding: 4px;
|
|
border-style: solid;
|
|
border-color: #527bbd;
|
|
}
|
|
|
|
table.tableblock.frame-topbot {
|
|
border-left-style: hidden;
|
|
border-right-style: hidden;
|
|
}
|
|
table.tableblock.frame-sides {
|
|
border-top-style: hidden;
|
|
border-bottom-style: hidden;
|
|
}
|
|
table.tableblock.frame-none {
|
|
border-style: hidden;
|
|
}
|
|
|
|
th.tableblock.halign-left, td.tableblock.halign-left {
|
|
text-align: left;
|
|
}
|
|
th.tableblock.halign-center, td.tableblock.halign-center {
|
|
text-align: center;
|
|
}
|
|
th.tableblock.halign-right, td.tableblock.halign-right {
|
|
text-align: right;
|
|
}
|
|
|
|
th.tableblock.valign-top, td.tableblock.valign-top {
|
|
vertical-align: top;
|
|
}
|
|
th.tableblock.valign-middle, td.tableblock.valign-middle {
|
|
vertical-align: middle;
|
|
}
|
|
th.tableblock.valign-bottom, td.tableblock.valign-bottom {
|
|
vertical-align: bottom;
|
|
}
|
|
|
|
|
|
/*
|
|
* manpage specific
|
|
*
|
|
* */
|
|
|
|
body.manpage h1 {
|
|
padding-top: 0.5em;
|
|
padding-bottom: 0.5em;
|
|
border-top: 2px solid silver;
|
|
border-bottom: 2px solid silver;
|
|
}
|
|
body.manpage h2 {
|
|
border-style: none;
|
|
}
|
|
body.manpage div.sectionbody {
|
|
margin-left: 3em;
|
|
}
|
|
|
|
@media print {
|
|
body.manpage div#toc { display: none; }
|
|
}
|
|
|
|
|
|
</style>
|
|
<script type="text/javascript">
|
|
/*<+'])');
|
|
// Function that scans the DOM tree for header elements (the DOM2
|
|
// nodeIterator API would be a better technique but not supported by all
|
|
// browsers).
|
|
var iterate = function (el) {
|
|
for (var i = el.firstChild; i != null; i = i.nextSibling) {
|
|
if (i.nodeType == 1 /* Node.ELEMENT_NODE */) {
|
|
var mo = re.exec(i.tagName);
|
|
if (mo && (i.getAttribute("class") || i.getAttribute("className")) != "float") {
|
|
result[result.length] = new TocEntry(i, getText(i), mo[1]-1);
|
|
}
|
|
iterate(i);
|
|
}
|
|
}
|
|
}
|
|
iterate(el);
|
|
return result;
|
|
}
|
|
|
|
var toc = document.getElementById("toc");
|
|
if (!toc) {
|
|
return;
|
|
}
|
|
|
|
// Delete existing TOC entries in case we're reloading the TOC.
|
|
var tocEntriesToRemove = [];
|
|
var i;
|
|
for (i = 0; i < toc.childNodes.length; i++) {
|
|
var entry = toc.childNodes[i];
|
|
if (entry.nodeName.toLowerCase() == 'div'
|
|
&& entry.getAttribute("class")
|
|
&& entry.getAttribute("class").match(/^toclevel/))
|
|
tocEntriesToRemove.push(entry);
|
|
}
|
|
for (i = 0; i < tocEntriesToRemove.length; i++) {
|
|
toc.removeChild(tocEntriesToRemove[i]);
|
|
}
|
|
|
|
// Rebuild TOC entries.
|
|
var entries = tocEntries(document.getElementById("content"), toclevels);
|
|
for (var i = 0; i < entries.length; ++i) {
|
|
var entry = entries[i];
|
|
if (entry.element.id == "")
|
|
entry.element.id = "_toc_" + i;
|
|
var a = document.createElement("a");
|
|
a.href = "#" + entry.element.id;
|
|
a.appendChild(document.createTextNode(entry.text));
|
|
var div = document.createElement("div");
|
|
div.appendChild(a);
|
|
div.className = "toclevel" + entry.toclevel;
|
|
toc.appendChild(div);
|
|
}
|
|
if (entries.length == 0)
|
|
toc.parentNode.removeChild(toc);
|
|
},
|
|
|
|
|
|
/////////////////////////////////////////////////////////////////////
|
|
// Footnotes generator
|
|
/////////////////////////////////////////////////////////////////////
|
|
|
|
/* Based on footnote generation code from:
|
|
* http://www.brandspankingnew.net/archive/2005/07/format_footnote.html
|
|
*/
|
|
|
|
footnotes: function () {
|
|
// Delete existing footnote entries in case we're reloading the footnodes.
|
|
var i;
|
|
var noteholder = document.getElementById("footnotes");
|
|
if (!noteholder) {
|
|
return;
|
|
}
|
|
var entriesToRemove = [];
|
|
for (i = 0; i < noteholder.childNodes.length; i++) {
|
|
var entry = noteholder.childNodes[i];
|
|
if (entry.nodeName.toLowerCase() == 'div' && entry.getAttribute("class") == "footnote")
|
|
entriesToRemove.push(entry);
|
|
}
|
|
for (i = 0; i < entriesToRemove.length; i++) {
|
|
noteholder.removeChild(entriesToRemove[i]);
|
|
}
|
|
|
|
// Rebuild footnote entries.
|
|
var cont = document.getElementById("content");
|
|
var spans = cont.getElementsByTagName("span");
|
|
var refs = {};
|
|
var n = 0;
|
|
for (i=0; i<spans.length; i++) {
|
|
if (spans[i].className == "footnote") {
|
|
n++;
|
|
var note = spans[i].getAttribute("data-note");
|
|
if (!note) {
|
|
// Use [\s\S] in place of . so multi-line matches work.
|
|
// Because JavaScript has no s (dotall) regex flag.
|
|
note = spans[i].innerHTML.match(/\s*\[([\s\S]*)]\s*/)[1];
|
|
spans[i].innerHTML =
|
|
"[<a id='_footnoteref_" + n + "' href='#_footnote_" + n +
|
|
"' title='View footnote' class='footnote'>" + n + "</a>]";
|
|
spans[i].setAttribute("data-note", note);
|
|
}
|
|
noteholder.innerHTML +=
|
|
"<div class='footnote' id='_footnote_" + n + "'>" +
|
|
"<a href='#_footnoteref_" + n + "' title='Return to text'>" +
|
|
n + "</a>. " + note + "</div>";
|
|
var id =spans[i].getAttribute("id");
|
|
if (id != null) refs["#"+id] = n;
|
|
}
|
|
}
|
|
if (n == 0)
|
|
noteholder.parentNode.removeChild(noteholder);
|
|
else {
|
|
// Process footnoterefs.
|
|
for (i=0; i<spans.length; i++) {
|
|
if (spans[i].className == "footnoteref") {
|
|
var href = spans[i].getElementsByTagName("a")[0].getAttribute("href");
|
|
href = href.match(/#.*/)[0]; // Because IE return full URL.
|
|
n = refs[href];
|
|
spans[i].innerHTML =
|
|
"[<a href='#_footnote_" + n +
|
|
"' title='View footnote' class='footnote'>" + n + "</a>]";
|
|
}
|
|
}
|
|
}
|
|
},
|
|
|
|
install: function(toclevels) {
|
|
var timerId;
|
|
|
|
function reinstall() {
|
|
asciidoc.footnotes();
|
|
if (toclevels) {
|
|
asciidoc.toc(toclevels);
|
|
}
|
|
}
|
|
|
|
function reinstallAndRemoveTimer() {
|
|
clearInterval(timerId);
|
|
reinstall();
|
|
}
|
|
|
|
timerId = setInterval(reinstall, 500);
|
|
if (document.addEventListener)
|
|
document.addEventListener("DOMContentLoaded", reinstallAndRemoveTimer, false);
|
|
else
|
|
window.onload = reinstallAndRemoveTimer;
|
|
}
|
|
|
|
}
|
|
asciidoc.install();
|
|
/*]]>*/
|
|
</script>
|
|
</head>
|
|
<body class="article">
|
|
<div id="header">
|
|
</div>
|
|
<div id="content">
|
|
<div class="sect1">
|
|
<h2 id="_reftable">reftable</h2>
|
|
<div class="sectionbody">
|
|
<div class="sect2">
|
|
<h3 id="_overview">Overview</h3>
|
|
<div class="sect3">
|
|
<h4 id="_problem_statement">Problem statement</h4>
|
|
<div class="paragraph"><p>Some repositories contain a lot of references (e.g. android at 866k,
|
|
rails at 31k). The existing packed-refs format takes up a lot of space
|
|
(e.g. 62M), and does not scale with additional references. Lookup of a
|
|
single reference requires linearly scanning the file.</p></div>
|
|
<div class="paragraph"><p>Atomic pushes modifying multiple references require copying the entire
|
|
packed-refs file, which can be a considerable amount of data moved
|
|
(e.g. 62M in, 62M out) for even small transactions (2 refs modified).</p></div>
|
|
<div class="paragraph"><p>Repositories with many loose references occupy a large number of disk
|
|
blocks from the local file system, as each reference is its own file
|
|
storing 41 bytes (and another file for the corresponding reflog). This
|
|
negatively affects the number of inodes available when a large number of
|
|
repositories are stored on the same filesystem. Readers can be penalized
|
|
due to the larger number of syscalls required to traverse and read the
|
|
<code>$GIT_DIR/refs</code> directory.</p></div>
|
|
</div>
|
|
<div class="sect3">
|
|
<h4 id="_objectives">Objectives</h4>
|
|
<div class="ulist"><ul>
|
|
<li>
|
|
<p>
|
|
Near constant time lookup for any single reference, even when the
|
|
repository is cold and not in process or kernel cache.
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p>
|
|
Near constant time verification if an object name is referred to by at least
|
|
one reference (for allow-tip-sha1-in-want).
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p>
|
|
Efficient enumeration of an entire namespace, such as <code>refs/tags/</code>.
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p>
|
|
Support atomic push with <code>O</code>(<code>size_of_update</code>) operations.
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p>
|
|
Combine reflog storage with ref storage for small transactions.
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p>
|
|
Separate reflog storage for base refs and historical logs.
|
|
</p>
|
|
</li>
|
|
</ul></div>
|
|
</div>
|
|
<div class="sect3">
|
|
<h4 id="_description">Description</h4>
|
|
<div class="paragraph"><p>A reftable file is a portable binary file format customized for
|
|
reference storage. References are sorted, enabling linear scans, binary
|
|
search lookup, and range scans.</p></div>
|
|
<div class="paragraph"><p>Storage in the file is organized into variable sized blocks. Prefix
|
|
compression is used within a single block to reduce disk space. Block
|
|
size and alignment are tunable by the writer.</p></div>
|
|
</div>
|
|
<div class="sect3">
|
|
<h4 id="_performance">Performance</h4>
|
|
<div class="paragraph"><p>Space used, packed-refs vs. reftable:</p></div>
|
|
<div class="tableblock">
|
|
<table rules="all"
|
|
width="100%"
|
|
frame="border"
|
|
cellspacing="0" cellpadding="4">
|
|
<col width="16%" />
|
|
<col width="16%" />
|
|
<col width="16%" />
|
|
<col width="16%" />
|
|
<col width="16%" />
|
|
<col width="16%" />
|
|
<thead>
|
|
<tr>
|
|
<th align="left" valign="top">repository </th>
|
|
<th align="right" valign="top">packed-refs </th>
|
|
<th align="right" valign="top">reftable </th>
|
|
<th align="right" valign="top">% original </th>
|
|
<th align="right" valign="top">avg ref </th>
|
|
<th align="right" valign="top">avg obj</th>
|
|
</tr>
|
|
</thead>
|
|
<tbody>
|
|
<tr>
|
|
<td align="left" valign="top"><p class="table">android</p></td>
|
|
<td align="right" valign="top"><p class="table">62.2 M</p></td>
|
|
<td align="right" valign="top"><p class="table">36.1 M</p></td>
|
|
<td align="right" valign="top"><p class="table">58.0%</p></td>
|
|
<td align="right" valign="top"><p class="table">33 bytes</p></td>
|
|
<td align="right" valign="top"><p class="table">5 bytes</p></td>
|
|
</tr>
|
|
<tr>
|
|
<td align="left" valign="top"><p class="table">rails</p></td>
|
|
<td align="right" valign="top"><p class="table">1.8 M</p></td>
|
|
<td align="right" valign="top"><p class="table">1.1 M</p></td>
|
|
<td align="right" valign="top"><p class="table">57.7%</p></td>
|
|
<td align="right" valign="top"><p class="table">29 bytes</p></td>
|
|
<td align="right" valign="top"><p class="table">4 bytes</p></td>
|
|
</tr>
|
|
<tr>
|
|
<td align="left" valign="top"><p class="table">git</p></td>
|
|
<td align="right" valign="top"><p class="table">78.7 K</p></td>
|
|
<td align="right" valign="top"><p class="table">48.1 K</p></td>
|
|
<td align="right" valign="top"><p class="table">61.0%</p></td>
|
|
<td align="right" valign="top"><p class="table">50 bytes</p></td>
|
|
<td align="right" valign="top"><p class="table">4 bytes</p></td>
|
|
</tr>
|
|
<tr>
|
|
<td align="left" valign="top"><p class="table">git (heads)</p></td>
|
|
<td align="right" valign="top"><p class="table">332 b</p></td>
|
|
<td align="right" valign="top"><p class="table">269 b</p></td>
|
|
<td align="right" valign="top"><p class="table">81.0%</p></td>
|
|
<td align="right" valign="top"><p class="table">33 bytes</p></td>
|
|
<td align="right" valign="top"><p class="table">0 bytes</p></td>
|
|
</tr>
|
|
</tbody>
|
|
</table>
|
|
</div>
|
|
<div class="paragraph"><p>Scan (read 866k refs), by reference name lookup (single ref from 866k
|
|
refs), and by SHA-1 lookup (refs with that SHA-1, from 866k refs):</p></div>
|
|
<div class="tableblock">
|
|
<table rules="all"
|
|
width="100%"
|
|
frame="border"
|
|
cellspacing="0" cellpadding="4">
|
|
<col width="20%" />
|
|
<col width="20%" />
|
|
<col width="20%" />
|
|
<col width="20%" />
|
|
<col width="20%" />
|
|
<thead>
|
|
<tr>
|
|
<th align="left" valign="top">format </th>
|
|
<th align="right" valign="top">cache </th>
|
|
<th align="right" valign="top">scan </th>
|
|
<th align="right" valign="top">by name </th>
|
|
<th align="right" valign="top">by SHA-1</th>
|
|
</tr>
|
|
</thead>
|
|
<tbody>
|
|
<tr>
|
|
<td align="left" valign="top"><p class="table">packed-refs</p></td>
|
|
<td align="right" valign="top"><p class="table">cold</p></td>
|
|
<td align="right" valign="top"><p class="table">402 ms</p></td>
|
|
<td align="right" valign="top"><p class="table">409,660.1 usec</p></td>
|
|
<td align="right" valign="top"><p class="table">412,535.8 usec</p></td>
|
|
</tr>
|
|
<tr>
|
|
<td align="left" valign="top"><p class="table">packed-refs</p></td>
|
|
<td align="right" valign="top"><p class="table">hot</p></td>
|
|
<td align="right" valign="top"><p class="table"></p></td>
|
|
<td align="right" valign="top"><p class="table">6,844.6 usec</p></td>
|
|
<td align="right" valign="top"><p class="table">20,110.1 usec</p></td>
|
|
</tr>
|
|
<tr>
|
|
<td align="left" valign="top"><p class="table">reftable</p></td>
|
|
<td align="right" valign="top"><p class="table">cold</p></td>
|
|
<td align="right" valign="top"><p class="table">112 ms</p></td>
|
|
<td align="right" valign="top"><p class="table">33.9 usec</p></td>
|
|
<td align="right" valign="top"><p class="table">323.2 usec</p></td>
|
|
</tr>
|
|
<tr>
|
|
<td align="left" valign="top"><p class="table">reftable</p></td>
|
|
<td align="right" valign="top"><p class="table">hot</p></td>
|
|
<td align="right" valign="top"><p class="table"></p></td>
|
|
<td align="right" valign="top"><p class="table">20.2 usec</p></td>
|
|
<td align="right" valign="top"><p class="table">320.8 usec</p></td>
|
|
</tr>
|
|
</tbody>
|
|
</table>
|
|
</div>
|
|
<div class="paragraph"><p>Space used for 149,932 log entries for 43,061 refs, reflog vs. reftable:</p></div>
|
|
<div class="tableblock">
|
|
<table rules="all"
|
|
width="100%"
|
|
frame="border"
|
|
cellspacing="0" cellpadding="4">
|
|
<col width="33%" />
|
|
<col width="33%" />
|
|
<col width="33%" />
|
|
<thead>
|
|
<tr>
|
|
<th align="left" valign="top">format </th>
|
|
<th align="right" valign="top">size </th>
|
|
<th align="right" valign="top">avg entry</th>
|
|
</tr>
|
|
</thead>
|
|
<tbody>
|
|
<tr>
|
|
<td align="left" valign="top"><p class="table">$GIT_DIR/logs</p></td>
|
|
<td align="right" valign="top"><p class="table">173 M</p></td>
|
|
<td align="right" valign="top"><p class="table">1209 bytes</p></td>
|
|
</tr>
|
|
<tr>
|
|
<td align="left" valign="top"><p class="table">reftable</p></td>
|
|
<td align="right" valign="top"><p class="table">5 M</p></td>
|
|
<td align="right" valign="top"><p class="table">37 bytes</p></td>
|
|
</tr>
|
|
</tbody>
|
|
</table>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
<div class="sect2">
|
|
<h3 id="_details">Details</h3>
|
|
<div class="sect3">
|
|
<h4 id="_peeling">Peeling</h4>
|
|
<div class="paragraph"><p>References stored in a reftable are peeled, a record for an annotated
|
|
(or signed) tag records both the tag object, and the object it refers
|
|
to. This is analogous to storage in the packed-refs format.</p></div>
|
|
</div>
|
|
<div class="sect3">
|
|
<h4 id="_reference_name_encoding">Reference name encoding</h4>
|
|
<div class="paragraph"><p>Reference names are an uninterpreted sequence of bytes that must pass
|
|
<a href="../git-check-ref-format.html">git-check-ref-format(1)</a> as a valid reference name.</p></div>
|
|
</div>
|
|
<div class="sect3">
|
|
<h4 id="_key_unicity">Key unicity</h4>
|
|
<div class="paragraph"><p>Each entry must have a unique key; repeated keys are disallowed.</p></div>
|
|
</div>
|
|
<div class="sect3">
|
|
<h4 id="_network_byte_order">Network byte order</h4>
|
|
<div class="paragraph"><p>All multi-byte, fixed width fields are in network byte order.</p></div>
|
|
</div>
|
|
<div class="sect3">
|
|
<h4 id="_varint_encoding">Varint encoding</h4>
|
|
<div class="paragraph"><p>Varint encoding is identical to the ofs-delta encoding method used
|
|
within pack files.</p></div>
|
|
<div class="paragraph"><p>Decoder works as follows:</p></div>
|
|
<div class="literalblock">
|
|
<div class="content">
|
|
<pre><code>val = buf[ptr] & 0x7f
|
|
while (buf[ptr] & 0x80) {
|
|
ptr++
|
|
val = ((val + 1) << 7) | (buf[ptr] & 0x7f)
|
|
}</code></pre>
|
|
</div></div>
|
|
</div>
|
|
<div class="sect3">
|
|
<h4 id="_ordering">Ordering</h4>
|
|
<div class="paragraph"><p>Blocks are lexicographically ordered by their first reference.</p></div>
|
|
</div>
|
|
<div class="sect3">
|
|
<h4 id="_directory_file_conflicts">Directory/file conflicts</h4>
|
|
<div class="paragraph"><p>The reftable format accepts both <code>refs/heads/foo</code> and
|
|
<code>refs/heads/foo/bar</code> as distinct references.</p></div>
|
|
<div class="paragraph"><p>This property is useful for retaining log records in reftable, but may
|
|
confuse versions of Git using <code>$GIT_DIR/refs</code> directory tree to maintain
|
|
references. Users of reftable may choose to continue to reject <code>foo</code> and
|
|
<code>foo/bar</code> type conflicts to prevent problems for peers.</p></div>
|
|
</div>
|
|
</div>
|
|
<div class="sect2">
|
|
<h3 id="_file_format">File format</h3>
|
|
<div class="sect3">
|
|
<h4 id="_structure">Structure</h4>
|
|
<div class="paragraph"><p>A reftable file has the following high-level structure:</p></div>
|
|
<div class="literalblock">
|
|
<div class="content">
|
|
<pre><code>first_block {
|
|
header
|
|
first_ref_block
|
|
}
|
|
ref_block*
|
|
ref_index*
|
|
obj_block*
|
|
obj_index*
|
|
log_block*
|
|
log_index*
|
|
footer</code></pre>
|
|
</div></div>
|
|
<div class="paragraph"><p>A log-only file omits the <code>ref_block</code>, <code>ref_index</code>, <code>obj_block</code> and
|
|
<code>obj_index</code> sections, containing only the file header and log block:</p></div>
|
|
<div class="literalblock">
|
|
<div class="content">
|
|
<pre><code>first_block {
|
|
header
|
|
}
|
|
log_block*
|
|
log_index*
|
|
footer</code></pre>
|
|
</div></div>
|
|
<div class="paragraph"><p>In a log-only file, the first log block immediately follows the file
|
|
header, without padding to block alignment.</p></div>
|
|
</div>
|
|
<div class="sect3">
|
|
<h4 id="_block_size">Block size</h4>
|
|
<div class="paragraph"><p>The file’s block size is arbitrarily determined by the writer, and does
|
|
not have to be a power of 2. The block size must be larger than the
|
|
longest reference name or log entry used in the repository, as
|
|
references cannot span blocks.</p></div>
|
|
<div class="paragraph"><p>Powers of two that are friendly to the virtual memory system or
|
|
filesystem (such as 4k or 8k) are recommended. Larger sizes (64k) can
|
|
yield better compression, with a possible increased cost incurred by
|
|
readers during access.</p></div>
|
|
<div class="paragraph"><p>The largest block size is <code>16777215</code> bytes (15.99 MiB).</p></div>
|
|
</div>
|
|
<div class="sect3">
|
|
<h4 id="_block_alignment">Block alignment</h4>
|
|
<div class="paragraph"><p>Writers may choose to align blocks at multiples of the block size by
|
|
including <code>padding</code> filled with NUL bytes at the end of a block to round
|
|
out to the chosen alignment. When alignment is used, writers must
|
|
specify the alignment with the file header’s <code>block_size</code> field.</p></div>
|
|
<div class="paragraph"><p>Block alignment is not required by the file format. Unaligned files must
|
|
set <code>block_size</code> <code>=</code> <code>0</code> in the file header, and omit <code>padding</code>. Unaligned
|
|
files with more than one ref block must include the <a href="#Ref-index">ref
|
|
index</a> to support fast lookup. Readers must be able to read both aligned
|
|
and non-aligned files.</p></div>
|
|
<div class="paragraph"><p>Very small files (e.g. a single ref block) may omit <code>padding</code> and the ref
|
|
index to reduce total file size.</p></div>
|
|
</div>
|
|
<div class="sect3">
|
|
<h4 id="_header_version_1">Header (version 1)</h4>
|
|
<div class="paragraph"><p>A 24-byte header appears at the beginning of the file:</p></div>
|
|
<div class="literalblock">
|
|
<div class="content">
|
|
<pre><code>'REFT'
|
|
uint8( version_number = 1 )
|
|
uint24( block_size )
|
|
uint64( min_update_index )
|
|
uint64( max_update_index )</code></pre>
|
|
</div></div>
|
|
<div class="paragraph"><p>Aligned files must specify <code>block_size</code> to configure readers with the
|
|
expected block alignment. Unaligned files must set <code>block_size</code> <code>=</code> <code>0</code>.</p></div>
|
|
<div class="paragraph"><p>The <code>min_update_index</code> and <code>max_update_index</code> describe bounds for the
|
|
<code>update_index</code> field of all log records in this file. When reftables are
|
|
used in a stack for <a href="#Update-transactions">transactions</a>, these
|
|
fields can order the files such that the prior file’s
|
|
<code>max_update_index</code> <code>+</code> <code>1</code> is the next file’s <code>min_update_index</code>.</p></div>
|
|
</div>
|
|
<div class="sect3">
|
|
<h4 id="_header_version_2">Header (version 2)</h4>
|
|
<div class="paragraph"><p>A 28-byte header appears at the beginning of the file:</p></div>
|
|
<div class="literalblock">
|
|
<div class="content">
|
|
<pre><code>'REFT'
|
|
uint8( version_number = 2 )
|
|
uint24( block_size )
|
|
uint64( min_update_index )
|
|
uint64( max_update_index )
|
|
uint32( hash_id )</code></pre>
|
|
</div></div>
|
|
<div class="paragraph"><p>The header is identical to <code>version_number=1</code>, with the 4-byte hash ID
|
|
("sha1" for SHA1 and "s256" for SHA-256) appended to the header.</p></div>
|
|
<div class="paragraph"><p>For maximum backward compatibility, it is recommended to use version 1 when
|
|
writing SHA1 reftables.</p></div>
|
|
</div>
|
|
<div class="sect3">
|
|
<h4 id="_first_ref_block">First ref block</h4>
|
|
<div class="paragraph"><p>The first ref block shares the same block as the file header, and is 24
|
|
bytes smaller than all other blocks in the file. The first block
|
|
immediately begins after the file header, at position 24.</p></div>
|
|
<div class="paragraph"><p>If the first block is a log block (a log-only file), its block header
|
|
begins immediately at position 24.</p></div>
|
|
</div>
|
|
<div class="sect3">
|
|
<h4 id="_ref_block_format">Ref block format</h4>
|
|
<div class="paragraph"><p>A ref block is written as:</p></div>
|
|
<div class="literalblock">
|
|
<div class="content">
|
|
<pre><code>'r'
|
|
uint24( block_len )
|
|
ref_record+
|
|
uint24( restart_offset )+
|
|
uint16( restart_count )
|
|
|
|
padding?</code></pre>
|
|
</div></div>
|
|
<div class="paragraph"><p>Blocks begin with <code>block_type</code> <code>=</code> 'r' and a 3-byte <code>block_len</code> which
|
|
encodes the number of bytes in the block up to, but not including the
|
|
optional <code>padding</code>. This is always less than or equal to the file’s
|
|
block size. In the first ref block, <code>block_len</code> includes 24 bytes for
|
|
the file header.</p></div>
|
|
<div class="paragraph"><p>The 2-byte <code>restart_count</code> stores the number of entries in the
|
|
<code>restart_offset</code> list, which must not be empty. Readers can use
|
|
<code>restart_count</code> to binary search between restarts before starting a
|
|
linear scan.</p></div>
|
|
<div class="paragraph"><p>Exactly <code>restart_count</code> 3-byte <code>restart_offset</code> values precede the
|
|
<code>restart_count</code>. Offsets are relative to the start of the block and
|
|
refer to the first byte of any <code>ref_record</code> whose name has not been
|
|
prefix compressed. Entries in the <code>restart_offset</code> list must be sorted,
|
|
ascending. Readers can start linear scans from any of these records.</p></div>
|
|
<div class="paragraph"><p>A variable number of <code>ref_record</code> fill the middle of the block,
|
|
describing reference names and values. The format is described below.</p></div>
|
|
<div class="paragraph"><p>As the first ref block shares the first file block with the file header,
|
|
all <code>restart_offset</code> in the first block are relative to the start of the
|
|
file (position 0), and include the file header. This forces the first
|
|
<code>restart_offset</code> to be <code>28</code>.</p></div>
|
|
<div class="sect4">
|
|
<h5 id="_ref_record">ref record</h5>
|
|
<div class="paragraph"><p>A <code>ref_record</code> describes a single reference, storing both the name and
|
|
its value(s). Records are formatted as:</p></div>
|
|
<div class="literalblock">
|
|
<div class="content">
|
|
<pre><code>varint( prefix_length )
|
|
varint( (suffix_length << 3) | value_type )
|
|
suffix
|
|
varint( update_index_delta )
|
|
value?</code></pre>
|
|
</div></div>
|
|
<div class="paragraph"><p>The <code>prefix_length</code> field specifies how many leading bytes of the prior
|
|
reference record’s name should be copied to obtain this reference’s
|
|
name. This must be 0 for the first reference in any block, and also must
|
|
be 0 for any <code>ref_record</code> whose offset is listed in the <code>restart_offset</code>
|
|
table at the end of the block.</p></div>
|
|
<div class="paragraph"><p>Recovering a reference name from any <code>ref_record</code> is a simple concat:</p></div>
|
|
<div class="literalblock">
|
|
<div class="content">
|
|
<pre><code>this_name = prior_name[0..prefix_length] + suffix</code></pre>
|
|
</div></div>
|
|
<div class="paragraph"><p>The <code>suffix_length</code> value provides the number of bytes available in
|
|
<code>suffix</code> to copy from <code>suffix</code> to complete the reference name.</p></div>
|
|
<div class="paragraph"><p>The <code>update_index</code> that last modified the reference can be obtained by
|
|
adding <code>update_index_delta</code> to the <code>min_update_index</code> from the file
|
|
header: <code>min_update_index</code> <code>+</code> <code>update_index_delta</code>.</p></div>
|
|
<div class="paragraph"><p>The <code>value</code> follows. Its format is determined by <code>value_type</code>, one of
|
|
the following:</p></div>
|
|
<div class="ulist"><ul>
|
|
<li>
|
|
<p>
|
|
<code>0x0</code>: deletion; no value data (see transactions, below)
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p>
|
|
<code>0x1</code>: one object name; value of the ref
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p>
|
|
<code>0x2</code>: two object names; value of the ref, peeled target
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p>
|
|
<code>0x3</code>: symbolic reference: <code>varint</code>( <code>target_len</code> ) <code>target</code>
|
|
</p>
|
|
</li>
|
|
</ul></div>
|
|
<div class="paragraph"><p>Symbolic references use <code>0x3</code>, followed by the complete name of the
|
|
reference target. No compression is applied to the target name.</p></div>
|
|
<div class="paragraph"><p>Types <code>0x4</code><code>..</code><code>0x7</code> are reserved for future use.</p></div>
|
|
</div>
|
|
</div>
|
|
<div class="sect3">
|
|
<h4 id="_ref_index">Ref index</h4>
|
|
<div class="paragraph"><p>The ref index stores the name of the last reference from every ref block
|
|
in the file, enabling reduced disk seeks for lookups. Any reference can
|
|
be found by searching the index, identifying the containing block, and
|
|
searching within that block.</p></div>
|
|
<div class="paragraph"><p>The index may be organized into a multi-level index, where the 1st level
|
|
index block points to additional ref index blocks (2nd level), which may
|
|
in turn point to either additional index blocks (e.g. 3rd level) or ref
|
|
blocks (leaf level). Disk reads required to access a ref go up with
|
|
higher index levels. Multi-level indexes may be required to ensure no
|
|
single index block exceeds the file format’s max block size of
|
|
<code>16777215</code> bytes (15.99 MiB). To achieve constant O(1) disk seeks for
|
|
lookups the index must be a single level, which is permitted to exceed
|
|
the file’s configured block size, but not the format’s max block size of
|
|
15.99 MiB.</p></div>
|
|
<div class="paragraph"><p>If present, the ref index block(s) appears after the last ref block.</p></div>
|
|
<div class="paragraph"><p>If there are at least 4 ref blocks, a ref index block should be written
|
|
to improve lookup times. Cold reads using the index require 2 disk reads
|
|
(read index, read block), and binary searching < 4 blocks also requires
|
|
⇐ 2 reads. Omitting the index block from smaller files saves space.</p></div>
|
|
<div class="paragraph"><p>If the file is unaligned and contains more than one ref block, the ref
|
|
index must be written.</p></div>
|
|
<div class="paragraph"><p>Index block format:</p></div>
|
|
<div class="literalblock">
|
|
<div class="content">
|
|
<pre><code>'i'
|
|
uint24( block_len )
|
|
index_record+
|
|
uint24( restart_offset )+
|
|
uint16( restart_count )
|
|
|
|
padding?</code></pre>
|
|
</div></div>
|
|
<div class="paragraph"><p>The index blocks begin with <code>block_type</code> <code>=</code> 'i' and a 3-byte <code>block_len</code>
|
|
which encodes the number of bytes in the block, up to but not including
|
|
the optional <code>padding</code>.</p></div>
|
|
<div class="paragraph"><p>The <code>restart_offset</code> and <code>restart_count</code> fields are identical in format,
|
|
meaning and usage as in ref blocks.</p></div>
|
|
<div class="paragraph"><p>To reduce the number of reads required for random access in very large
|
|
files the index block may be larger than other blocks. However, readers
|
|
must hold the entire index in memory to benefit from this, so it’s a
|
|
time-space tradeoff in both file size and reader memory.</p></div>
|
|
<div class="paragraph"><p>Increasing the file’s block size decreases the index size. Alternatively
|
|
a multi-level index may be used, keeping index blocks within the file’s
|
|
block size, but increasing the number of blocks that need to be
|
|
accessed.</p></div>
|
|
<div class="sect4">
|
|
<h5 id="_index_record">index record</h5>
|
|
<div class="paragraph"><p>An index record describes the last entry in another block. Index records
|
|
are written as:</p></div>
|
|
<div class="literalblock">
|
|
<div class="content">
|
|
<pre><code>varint( prefix_length )
|
|
varint( (suffix_length << 3) | 0 )
|
|
suffix
|
|
varint( block_position )</code></pre>
|
|
</div></div>
|
|
<div class="paragraph"><p>Index records use prefix compression exactly like <code>ref_record</code>.</p></div>
|
|
<div class="paragraph"><p>Index records store <code>block_position</code> after the suffix, specifying the
|
|
absolute position in bytes (from the start of the file) of the block
|
|
that ends with this reference. Readers can seek to <code>block_position</code> to
|
|
begin reading the block header.</p></div>
|
|
<div class="paragraph"><p>Readers must examine the block header at <code>block_position</code> to determine
|
|
if the next block is another level index block, or the leaf-level ref
|
|
block.</p></div>
|
|
</div>
|
|
<div class="sect4">
|
|
<h5 id="_reading_the_index">Reading the index</h5>
|
|
<div class="paragraph"><p>Readers loading the ref index must first read the footer (below) to
|
|
obtain <code>ref_index_position</code>. If not present, the position will be 0. The
|
|
<code>ref_index_position</code> is for the 1st level root of the ref index.</p></div>
|
|
</div>
|
|
</div>
|
|
<div class="sect3">
|
|
<h4 id="_obj_block_format">Obj block format</h4>
|
|
<div class="paragraph"><p>Object blocks are optional. Writers may choose to omit object blocks,
|
|
especially if readers will not use the object name to ref mapping.</p></div>
|
|
<div class="paragraph"><p>Object blocks use unique, abbreviated 2-31 byte object name keys, mapping to
|
|
ref blocks containing references pointing to that object directly, or as
|
|
the peeled value of an annotated tag. Like ref blocks, object blocks use
|
|
the file’s standard block size. The abbreviation length is available in
|
|
the footer as <code>obj_id_len</code>.</p></div>
|
|
<div class="paragraph"><p>To save space in small files, object blocks may be omitted if the ref
|
|
index is not present, as brute force search will only need to read a few
|
|
ref blocks. When missing, readers should brute force a linear search of
|
|
all references to lookup by object name.</p></div>
|
|
<div class="paragraph"><p>An object block is written as:</p></div>
|
|
<div class="literalblock">
|
|
<div class="content">
|
|
<pre><code>'o'
|
|
uint24( block_len )
|
|
obj_record+
|
|
uint24( restart_offset )+
|
|
uint16( restart_count )
|
|
|
|
padding?</code></pre>
|
|
</div></div>
|
|
<div class="paragraph"><p>Fields are identical to ref block. Binary search using the restart table
|
|
works the same as in reference blocks.</p></div>
|
|
<div class="paragraph"><p>Because object names are abbreviated by writers to the shortest unique
|
|
abbreviation within the reftable, obj key lengths have a variable length. Their
|
|
length must be at least 2 bytes. Readers must compare only for common prefix
|
|
match within an obj block or obj index.</p></div>
|
|
<div class="sect4">
|
|
<h5 id="_obj_record">obj record</h5>
|
|
<div class="paragraph"><p>An <code>obj_record</code> describes a single object abbreviation, and the blocks
|
|
containing references using that unique abbreviation:</p></div>
|
|
<div class="literalblock">
|
|
<div class="content">
|
|
<pre><code>varint( prefix_length )
|
|
varint( (suffix_length << 3) | cnt_3 )
|
|
suffix
|
|
varint( cnt_large )?
|
|
varint( position_delta )*</code></pre>
|
|
</div></div>
|
|
<div class="paragraph"><p>Like in reference blocks, abbreviations are prefix compressed within an
|
|
obj block. On large reftables with many unique objects, higher block
|
|
sizes (64k), and higher restart interval (128), a <code>prefix_length</code> of 2
|
|
or 3 and <code>suffix_length</code> of 3 may be common in obj records (unique
|
|
abbreviation of 5-6 raw bytes, 10-12 hex digits).</p></div>
|
|
<div class="paragraph"><p>Each record contains <code>position_count</code> number of positions for matching
|
|
ref blocks. For 1-7 positions the count is stored in <code>cnt_3</code>. When
|
|
<code>cnt_3</code> <code>=</code> <code>0</code> the actual count follows in a varint, <code>cnt_large</code>.</p></div>
|
|
<div class="paragraph"><p>The use of <code>cnt_3</code> bets most objects are pointed to by only a single
|
|
reference, some may be pointed to by a couple of references, and very
|
|
few (if any) are pointed to by more than 7 references.</p></div>
|
|
<div class="paragraph"><p>A special case exists when <code>cnt_3</code> <code>=</code> <code>0</code> and <code>cnt_large</code> <code>=</code> <code>0</code>: there are no
|
|
<code>position_delta</code>, but at least one reference starts with this
|
|
abbreviation. A reader that needs exact reference names must scan all
|
|
references to find which specific references have the desired object.
|
|
Writers should use this format when the <code>position_delta</code> list would have
|
|
overflowed the file’s block size due to a high number of references
|
|
pointing to the same object.</p></div>
|
|
<div class="paragraph"><p>The first <code>position_delta</code> is the position from the start of the file.
|
|
Additional <code>position_delta</code> entries are sorted ascending and relative to
|
|
the prior entry, e.g. a reader would perform:</p></div>
|
|
<div class="literalblock">
|
|
<div class="content">
|
|
<pre><code>pos = position_delta[0]
|
|
prior = pos
|
|
for (j = 1; j < position_count; j++) {
|
|
pos = prior + position_delta[j]
|
|
prior = pos
|
|
}</code></pre>
|
|
</div></div>
|
|
<div class="paragraph"><p>With a position in hand, a reader must linearly scan the ref block,
|
|
starting from the first <code>ref_record</code>, testing each reference’s object names
|
|
(for <code>value_type</code> <code>=</code> <code>0x1</code> or <code>0x2</code>) for full equality. Faster searching by
|
|
object name within a single ref block is not supported by the reftable format.
|
|
Smaller block sizes reduce the number of candidates this step must
|
|
consider.</p></div>
|
|
</div>
|
|
</div>
|
|
<div class="sect3">
|
|
<h4 id="_obj_index">Obj index</h4>
|
|
<div class="paragraph"><p>The obj index stores the abbreviation from the last entry for every obj
|
|
block in the file, enabling reduced disk seeks for all lookups. It is
|
|
formatted exactly the same as the ref index, but refers to obj blocks.</p></div>
|
|
<div class="paragraph"><p>The obj index should be present if obj blocks are present, as obj blocks
|
|
should only be written in larger files.</p></div>
|
|
<div class="paragraph"><p>Readers loading the obj index must first read the footer (below) to
|
|
obtain <code>obj_index_position</code>. If not present, the position will be 0.</p></div>
|
|
</div>
|
|
<div class="sect3">
|
|
<h4 id="_log_block_format">Log block format</h4>
|
|
<div class="paragraph"><p>Unlike ref and obj blocks, log blocks are always unaligned.</p></div>
|
|
<div class="paragraph"><p>Log blocks are variable in size, and do not match the <code>block_size</code>
|
|
specified in the file header or footer. Writers should choose an
|
|
appropriate buffer size to prepare a log block for deflation, such as
|
|
<code>2</code> <code>*</code> <code>block_size</code>.</p></div>
|
|
<div class="paragraph"><p>A log block is written as:</p></div>
|
|
<div class="literalblock">
|
|
<div class="content">
|
|
<pre><code>'g'
|
|
uint24( block_len )
|
|
zlib_deflate {
|
|
log_record+
|
|
uint24( restart_offset )+
|
|
uint16( restart_count )
|
|
}</code></pre>
|
|
</div></div>
|
|
<div class="paragraph"><p>Log blocks look similar to ref blocks, except <code>block_type</code> <code>=</code> 'g'.</p></div>
|
|
<div class="paragraph"><p>The 4-byte block header is followed by the deflated block contents using
|
|
zlib deflate. The <code>block_len</code> in the header is the inflated size
|
|
(including 4-byte block header), and should be used by readers to
|
|
preallocate the inflation output buffer. A log block’s <code>block_len</code> may
|
|
exceed the file’s block size.</p></div>
|
|
<div class="paragraph"><p>Offsets within the log block (e.g. <code>restart_offset</code>) still include the
|
|
4-byte header. Readers may prefer prefixing the inflation output buffer
|
|
with the 4-byte header.</p></div>
|
|
<div class="paragraph"><p>Within the deflate container, a variable number of <code>log_record</code> describe
|
|
reference changes. The log record format is described below. See ref
|
|
block format (above) for a description of <code>restart_offset</code> and
|
|
<code>restart_count</code>.</p></div>
|
|
<div class="paragraph"><p>Because log blocks have no alignment or padding between blocks, readers
|
|
must keep track of the bytes consumed by the inflater to know where the
|
|
next log block begins.</p></div>
|
|
<div class="sect4">
|
|
<h5 id="_log_record">log record</h5>
|
|
<div class="paragraph"><p>Log record keys are structured as:</p></div>
|
|
<div class="literalblock">
|
|
<div class="content">
|
|
<pre><code>ref_name '\0' reverse_int64( update_index )</code></pre>
|
|
</div></div>
|
|
<div class="paragraph"><p>where <code>update_index</code> is the unique transaction identifier. The
|
|
<code>update_index</code> field must be unique within the scope of a <code>ref_name</code>.
|
|
See the update transactions section below for further details.</p></div>
|
|
<div class="paragraph"><p>The <code>reverse_int64</code> function inverses the value so lexicographical
|
|
ordering the network byte order encoding sorts the more recent records
|
|
with higher <code>update_index</code> values first:</p></div>
|
|
<div class="literalblock">
|
|
<div class="content">
|
|
<pre><code>reverse_int64(int64 t) {
|
|
return 0xffffffffffffffff - t;
|
|
}</code></pre>
|
|
</div></div>
|
|
<div class="paragraph"><p>Log records have a similar starting structure to ref and index records,
|
|
utilizing the same prefix compression scheme applied to the log record
|
|
key described above.</p></div>
|
|
<div class="literalblock">
|
|
<div class="content">
|
|
<pre><code> varint( prefix_length )
|
|
varint( (suffix_length << 3) | log_type )
|
|
suffix
|
|
log_data {
|
|
old_id
|
|
new_id
|
|
varint( name_length ) name
|
|
varint( email_length ) email
|
|
varint( time_seconds )
|
|
sint16( tz_offset )
|
|
varint( message_length ) message
|
|
}?</code></pre>
|
|
</div></div>
|
|
<div class="paragraph"><p>Log record entries use <code>log_type</code> to indicate what follows:</p></div>
|
|
<div class="ulist"><ul>
|
|
<li>
|
|
<p>
|
|
<code>0x0</code>: deletion; no log data.
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p>
|
|
<code>0x1</code>: standard git reflog data using <code>log_data</code> above.
|
|
</p>
|
|
</li>
|
|
</ul></div>
|
|
<div class="paragraph"><p>The <code>log_type</code> <code>=</code> <code>0x0</code> is mostly useful for <code>git</code> <code>stash</code> <code>drop</code>, removing an
|
|
entry from the reflog of <code>refs/stash</code> in a transaction file (below),
|
|
without needing to rewrite larger files. Readers reading a stack of
|
|
reflogs must treat this as a deletion.</p></div>
|
|
<div class="paragraph"><p>For <code>log_type</code> <code>=</code> <code>0x1</code>, the <code>log_data</code> section follows
|
|
<a href="../git-update-ref.html">git-update-ref(1)</a> logging and includes:</p></div>
|
|
<div class="ulist"><ul>
|
|
<li>
|
|
<p>
|
|
two object names (old id, new id)
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p>
|
|
varint string of committer’s name
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p>
|
|
varint string of committer’s email
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p>
|
|
varint time in seconds since epoch (Jan 1, 1970)
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p>
|
|
2-byte timezone offset in minutes (signed)
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p>
|
|
varint string of message
|
|
</p>
|
|
</li>
|
|
</ul></div>
|
|
<div class="paragraph"><p><code>tz_offset</code> is the absolute number of minutes from GMT the committer was
|
|
at the time of the update. For example <code>GMT-0800</code> is encoded in reftable
|
|
as <code>sint16</code>(<code>-480</code>) and <code>GMT+0230</code> is <code>sint16</code>(<code>150</code>).</p></div>
|
|
<div class="paragraph"><p>The committer email does not contain < or >, it’s the value normally
|
|
found between the <> in a git commit object header.</p></div>
|
|
<div class="paragraph"><p>The <code>message_length</code> may be 0, in which case there was no message
|
|
supplied for the update.</p></div>
|
|
<div class="paragraph"><p>Contrary to traditional reflog (which is a file), renames are encoded as
|
|
a combination of ref deletion and ref creation. A deletion is a log
|
|
record with a zero new_id, and a creation is a log record with a zero old_id.</p></div>
|
|
</div>
|
|
<div class="sect4">
|
|
<h5 id="_reading_the_log">Reading the log</h5>
|
|
<div class="paragraph"><p>Readers accessing the log must first read the footer (below) to
|
|
determine the <code>log_position</code>. The first block of the log begins at
|
|
<code>log_position</code> bytes since the start of the file. The <code>log_position</code> is
|
|
not block aligned.</p></div>
|
|
</div>
|
|
<div class="sect4">
|
|
<h5 id="_importing_logs">Importing logs</h5>
|
|
<div class="paragraph"><p>When importing from <code>$GIT_DIR/logs</code> writers should globally order all
|
|
log records roughly by timestamp while preserving file order, and assign
|
|
unique, increasing <code>update_index</code> values for each log line. Newer log
|
|
records get higher <code>update_index</code> values.</p></div>
|
|
<div class="paragraph"><p>Although an import may write only a single reftable file, the reftable
|
|
file must span many unique <code>update_index</code>, as each log line requires its
|
|
own <code>update_index</code> to preserve semantics.</p></div>
|
|
</div>
|
|
</div>
|
|
<div class="sect3">
|
|
<h4 id="_log_index">Log index</h4>
|
|
<div class="paragraph"><p>The log index stores the log key
|
|
(<code>refname</code> <code>\0</code> <code>reverse_int64</code>(<code>update_index</code>)) for the last log record of
|
|
every log block in the file, supporting bounded-time lookup.</p></div>
|
|
<div class="paragraph"><p>A log index block must be written if 2 or more log blocks are written to
|
|
the file. If present, the log index appears after the last log block.
|
|
There is no padding used to align the log index to block alignment.</p></div>
|
|
<div class="paragraph"><p>Log index format is identical to ref index, except the keys are 9 bytes
|
|
longer to include '\0' and the 8-byte <code>reverse_int64</code>(<code>update_index</code>).
|
|
Records use <code>block_position</code> to refer to the start of a log block.</p></div>
|
|
<div class="sect4">
|
|
<h5 id="_reading_the_index_2">Reading the index</h5>
|
|
<div class="paragraph"><p>Readers loading the log index must first read the footer (below) to
|
|
obtain <code>log_index_position</code>. If not present, the position will be 0.</p></div>
|
|
</div>
|
|
</div>
|
|
<div class="sect3">
|
|
<h4 id="_footer">Footer</h4>
|
|
<div class="paragraph"><p>After the last block of the file, a file footer is written. It begins
|
|
like the file header, but is extended with additional data.</p></div>
|
|
<div class="literalblock">
|
|
<div class="content">
|
|
<pre><code> HEADER
|
|
|
|
uint64( ref_index_position )
|
|
uint64( (obj_position << 5) | obj_id_len )
|
|
uint64( obj_index_position )
|
|
|
|
uint64( log_position )
|
|
uint64( log_index_position )
|
|
|
|
uint32( CRC-32 of above )</code></pre>
|
|
</div></div>
|
|
<div class="paragraph"><p>If a section is missing (e.g. ref index) the corresponding position
|
|
field (e.g. <code>ref_index_position</code>) will be 0.</p></div>
|
|
<div class="ulist"><ul>
|
|
<li>
|
|
<p>
|
|
<code>obj_position</code>: byte position for the first obj block.
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p>
|
|
<code>obj_id_len</code>: number of bytes used to abbreviate object names in
|
|
obj blocks.
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p>
|
|
<code>log_position</code>: byte position for the first log block.
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p>
|
|
<code>ref_index_position</code>: byte position for the start of the ref index.
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p>
|
|
<code>obj_index_position</code>: byte position for the start of the obj index.
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p>
|
|
<code>log_index_position</code>: byte position for the start of the log index.
|
|
</p>
|
|
</li>
|
|
</ul></div>
|
|
<div class="paragraph"><p>The size of the footer is 68 bytes for version 1, and 72 bytes for
|
|
version 2.</p></div>
|
|
<div class="sect4">
|
|
<h5 id="_reading_the_footer">Reading the footer</h5>
|
|
<div class="paragraph"><p>Readers must first read the file start to determine the version
|
|
number. Then they seek to <code>file_length</code> <code>-</code> <code>FOOTER_LENGTH</code> to access the
|
|
footer. A trusted external source (such as <code>stat</code>(<code>2</code>)) is necessary to
|
|
obtain <code>file_length</code>. When reading the footer, readers must verify:</p></div>
|
|
<div class="ulist"><ul>
|
|
<li>
|
|
<p>
|
|
4-byte magic is correct
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p>
|
|
1-byte version number is recognized
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p>
|
|
4-byte CRC-32 matches the other 64 bytes (including magic, and
|
|
version)
|
|
</p>
|
|
</li>
|
|
</ul></div>
|
|
<div class="paragraph"><p>Once verified, the other fields of the footer can be accessed.</p></div>
|
|
</div>
|
|
<div class="sect4">
|
|
<h5 id="_empty_tables">Empty tables</h5>
|
|
<div class="paragraph"><p>A reftable may be empty. In this case, the file starts with a header
|
|
and is immediately followed by a footer.</p></div>
|
|
</div>
|
|
</div>
|
|
<div class="sect3">
|
|
<h4 id="_binary_search">Binary search</h4>
|
|
<div class="paragraph"><p>Binary search within a block is supported by the <code>restart_offset</code> fields
|
|
at the end of the block. Readers can binary search through the restart
|
|
table to locate between which two restart points the sought reference or
|
|
key should appear.</p></div>
|
|
<div class="paragraph"><p>Each record identified by a <code>restart_offset</code> stores the complete key in
|
|
the <code>suffix</code> field of the record, making the compare operation during
|
|
binary search straightforward.</p></div>
|
|
<div class="paragraph"><p>Once a restart point lexicographically before the sought reference has
|
|
been identified, readers can linearly scan through the following record
|
|
entries to locate the sought record, terminating if the current record
|
|
sorts after (and therefore the sought key is not present).</p></div>
|
|
<div class="sect4">
|
|
<h5 id="_restart_point_selection">Restart point selection</h5>
|
|
<div class="paragraph"><p>Writers determine the restart points at file creation. The process is
|
|
arbitrary, but every 16 or 64 records is recommended. Every 16 may be
|
|
more suitable for smaller block sizes (4k or 8k), every 64 for larger
|
|
block sizes (64k).</p></div>
|
|
<div class="paragraph"><p>More frequent restart points reduces prefix compression and increases
|
|
space consumed by the restart table, both of which increase file size.</p></div>
|
|
<div class="paragraph"><p>Less frequent restart points makes prefix compression more effective,
|
|
decreasing overall file size, with increased penalties for readers
|
|
walking through more records after the binary search step.</p></div>
|
|
<div class="paragraph"><p>A maximum of <code>65535</code> restart points per block is supported.</p></div>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
<div class="sect2">
|
|
<h3 id="_considerations">Considerations</h3>
|
|
<div class="sect3">
|
|
<h4 id="_lightweight_refs_dominate">Lightweight refs dominate</h4>
|
|
<div class="paragraph"><p>The reftable format assumes the vast majority of references are single
|
|
object names valued with common prefixes, such as Gerrit Code Review’s
|
|
<code>refs/changes/</code> namespace, GitHub’s <code>refs/pulls/</code> namespace, or many
|
|
lightweight tags in the <code>refs/tags/</code> namespace.</p></div>
|
|
<div class="paragraph"><p>Annotated tags storing the peeled object cost an additional object name per
|
|
reference.</p></div>
|
|
</div>
|
|
<div class="sect3">
|
|
<h4 id="_low_overhead">Low overhead</h4>
|
|
<div class="paragraph"><p>A reftable with very few references (e.g. git.git with 5 heads) is 269
|
|
bytes for reftable, vs. 332 bytes for packed-refs. This supports
|
|
reftable scaling down for transaction logs (below).</p></div>
|
|
</div>
|
|
<div class="sect3">
|
|
<h4 id="_block_size_2">Block size</h4>
|
|
<div class="paragraph"><p>For a Gerrit Code Review type repository with many change refs, larger
|
|
block sizes (64 KiB) and less frequent restart points (every 64) yield
|
|
better compression due to more references within the block compressing
|
|
against the prior reference.</p></div>
|
|
<div class="paragraph"><p>Larger block sizes reduce the index size, as the reftable will require
|
|
fewer blocks to store the same number of references.</p></div>
|
|
</div>
|
|
<div class="sect3">
|
|
<h4 id="_minimal_disk_seeks">Minimal disk seeks</h4>
|
|
<div class="paragraph"><p>Assuming the index block has been loaded into memory, binary searching
|
|
for any single reference requires exactly 1 disk seek to load the
|
|
containing block.</p></div>
|
|
</div>
|
|
<div class="sect3">
|
|
<h4 id="_scans_and_lookups_dominate">Scans and lookups dominate</h4>
|
|
<div class="paragraph"><p>Scanning all references and lookup by name (or namespace such as
|
|
<code>refs/heads/</code>) are the most common activities performed on repositories.
|
|
Object names are stored directly with references to optimize this use case.</p></div>
|
|
</div>
|
|
<div class="sect3">
|
|
<h4 id="_logs_are_infrequently_read">Logs are infrequently read</h4>
|
|
<div class="paragraph"><p>Logs are infrequently accessed, but can be large. Deflating log blocks
|
|
saves disk space, with some increased penalty at read time.</p></div>
|
|
<div class="paragraph"><p>Logs are stored in an isolated section from refs, reducing the burden on
|
|
reference readers that want to ignore logs. Further, historical logs can
|
|
be isolated into log-only files.</p></div>
|
|
</div>
|
|
<div class="sect3">
|
|
<h4 id="_logs_are_read_backwards">Logs are read backwards</h4>
|
|
<div class="paragraph"><p>Logs are frequently accessed backwards (most recent N records for master
|
|
to answer <code>master@</code>{4}), so log records are grouped by reference, and
|
|
sorted descending by update index.</p></div>
|
|
</div>
|
|
</div>
|
|
<div class="sect2">
|
|
<h3 id="_repository_format">Repository format</h3>
|
|
<div class="sect3">
|
|
<h4 id="_version_1">Version 1</h4>
|
|
<div class="paragraph"><p>A repository must set its <code>$GIT_DIR/config</code> to configure reftable:</p></div>
|
|
<div class="literalblock">
|
|
<div class="content">
|
|
<pre><code>[core]
|
|
repositoryformatversion = 1
|
|
[extensions]
|
|
refStorage = reftable</code></pre>
|
|
</div></div>
|
|
</div>
|
|
<div class="sect3">
|
|
<h4 id="_layout">Layout</h4>
|
|
<div class="paragraph"><p>A collection of reftable files are stored in the <code>$GIT_DIR/reftable/</code> directory.
|
|
Their names should have a random element, such that each filename is globally
|
|
unique; this helps avoid spurious failures on Windows, where open files cannot
|
|
be removed or overwritten. It suggested to use
|
|
<code>$</code>{min_update_index}-${max_update_index}-${random}.ref as a naming convention.</p></div>
|
|
<div class="paragraph"><p>Log-only files use the <code>.log</code> extension, while ref-only and mixed ref
|
|
and log files use <code>.ref</code>. extension.</p></div>
|
|
<div class="paragraph"><p>The stack ordering file is <code>$GIT_DIR/reftable/tables.list</code> and lists the
|
|
current files, one per line, in order, from oldest (base) to newest
|
|
(most recent):</p></div>
|
|
<div class="literalblock">
|
|
<div class="content">
|
|
<pre><code>$ cat .git/reftable/tables.list
|
|
00000001-00000001-RANDOM1.log
|
|
00000002-00000002-RANDOM2.ref
|
|
00000003-00000003-RANDOM3.ref</code></pre>
|
|
</div></div>
|
|
<div class="paragraph"><p>Readers must read <code>$GIT_DIR/reftable/tables.list</code> to determine which
|
|
files are relevant right now, and search through the stack in reverse
|
|
order (last reftable is examined first).</p></div>
|
|
<div class="paragraph"><p>Reftable files not listed in <code>tables.list</code> may be new (and about to be
|
|
added to the stack by the active writer), or ancient and ready to be
|
|
pruned.</p></div>
|
|
</div>
|
|
<div class="sect3">
|
|
<h4 id="_backward_compatibility">Backward compatibility</h4>
|
|
<div class="paragraph"><p>Older clients should continue to recognize the directory as a git
|
|
repository so they don’t look for an enclosing repository in parent
|
|
directories. To this end, a reftable-enabled repository must contain the
|
|
following dummy files</p></div>
|
|
<div class="ulist"><ul>
|
|
<li>
|
|
<p>
|
|
<code>.git/HEAD</code>, a regular file containing <code>ref:</code> <code>refs/heads/.invalid</code>.
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p>
|
|
<code>.git/refs/</code>, a directory
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p>
|
|
<code>.git/refs/heads</code>, a regular file
|
|
</p>
|
|
</li>
|
|
</ul></div>
|
|
</div>
|
|
<div class="sect3">
|
|
<h4 id="_readers">Readers</h4>
|
|
<div class="paragraph"><p>Readers can obtain a consistent snapshot of the reference space by
|
|
following:</p></div>
|
|
<div class="olist arabic"><ol class="arabic">
|
|
<li>
|
|
<p>
|
|
Open and read the <code>tables.list</code> file.
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p>
|
|
Open each of the reftable files that it mentions.
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p>
|
|
If any of the files is missing, goto 1.
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p>
|
|
Read from the now-open files as long as necessary.
|
|
</p>
|
|
</li>
|
|
</ol></div>
|
|
</div>
|
|
<div class="sect3">
|
|
<h4 id="_update_transactions">Update transactions</h4>
|
|
<div class="paragraph"><p>Although reftables are immutable, mutations are supported by writing a
|
|
new reftable and atomically appending it to the stack:</p></div>
|
|
<div class="olist arabic"><ol class="arabic">
|
|
<li>
|
|
<p>
|
|
Acquire <code>tables.list.lock</code>.
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p>
|
|
Read <code>tables.list</code> to determine current reftables.
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p>
|
|
Select <code>update_index</code> to be most recent file’s
|
|
<code>max_update_index</code> <code>+</code> <code>1</code>.
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p>
|
|
Prepare temp reftable <code>tmp_XXXXXX</code>, including log entries.
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p>
|
|
Rename <code>tmp_XXXXXX</code> to <code>$</code>{update_index}-${update_index}-${random}.ref.
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p>
|
|
Copy <code>tables.list</code> to <code>tables.list.lock</code>, appending file from (5).
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p>
|
|
Rename <code>tables.list.lock</code> to <code>tables.list</code>.
|
|
</p>
|
|
</li>
|
|
</ol></div>
|
|
<div class="paragraph"><p>During step 4 the new file’s <code>min_update_index</code> and <code>max_update_index</code>
|
|
are both set to the <code>update_index</code> selected by step 3. All log records
|
|
for the transaction use the same <code>update_index</code> in their keys. This
|
|
enables later correlation of which references were updated by the same
|
|
transaction.</p></div>
|
|
<div class="paragraph"><p>Because a single <code>tables.list.lock</code> file is used to manage locking, the
|
|
repository is single-threaded for writers. Writers may have to busy-spin
|
|
(with backoff) around creating <code>tables.list.lock</code>, for up to an
|
|
acceptable wait period, aborting if the repository is too busy to
|
|
mutate. Application servers wrapped around repositories (e.g. Gerrit
|
|
Code Review) can layer their own lock/wait queue to improve fairness to
|
|
writers.</p></div>
|
|
</div>
|
|
<div class="sect3">
|
|
<h4 id="_reference_deletions">Reference deletions</h4>
|
|
<div class="paragraph"><p>Deletion of any reference can be explicitly stored by setting the <code>type</code>
|
|
to <code>0x0</code> and omitting the <code>value</code> field of the <code>ref_record</code>. This serves
|
|
as a tombstone, overriding any assertions about the existence of the
|
|
reference from earlier files in the stack.</p></div>
|
|
</div>
|
|
<div class="sect3">
|
|
<h4 id="_compaction">Compaction</h4>
|
|
<div class="paragraph"><p>A partial stack of reftables can be compacted by merging references
|
|
using a straightforward merge join across reftables, selecting the most
|
|
recent value for output, and omitting deleted references that do not
|
|
appear in remaining, lower reftables.</p></div>
|
|
<div class="paragraph"><p>A compacted reftable should set its ‘min_update_index` to the smallest
|
|
of the input files’ <code>min_update_index</code>, and its <code>max_update_index</code>
|
|
likewise to the largest input <code>max_update_index</code>.</p></div>
|
|
<div class="paragraph"><p>For sake of illustration, assume the stack currently consists of
|
|
reftable files (from oldest to newest): A, B, C, and D. The compactor is
|
|
going to compact B and C, leaving A and D alone.</p></div>
|
|
<div class="olist arabic"><ol class="arabic">
|
|
<li>
|
|
<p>
|
|
Obtain lock <code>tables.list.lock</code> and read the <code>tables.list</code> file.
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p>
|
|
Obtain locks <code>B.lock</code> and <code>C.lock</code>. Ownership of these locks
|
|
prevents other processes from trying to compact these files.
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p>
|
|
Release <code>tables.list.lock</code>.
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p>
|
|
Compact <code>B</code> and <code>C</code> into a temp file
|
|
<code>$</code>{min_update_index}-${max_update_index}_XXXXXX.
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p>
|
|
Reacquire lock <code>tables.list.lock</code>.
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p>
|
|
Verify that <code>B</code> and <code>C</code> are still in the stack, in that order. This
|
|
should always be the case, assuming that other processes are adhering to
|
|
the locking protocol.
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p>
|
|
Rename <code>$</code>{min_update_index}-${max_update_index}_XXXXXX to
|
|
<code>$</code>{min_update_index}-${max_update_index}-${random}.ref.
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p>
|
|
Write the new stack to <code>tables.list.lock</code>, replacing <code>B</code> and <code>C</code>
|
|
with the file from (4).
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p>
|
|
Rename <code>tables.list.lock</code> to <code>tables.list</code>.
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p>
|
|
Delete <code>B</code> and <code>C</code>, perhaps after a short sleep to avoid forcing
|
|
readers to backtrack.
|
|
</p>
|
|
</li>
|
|
</ol></div>
|
|
<div class="paragraph"><p>This strategy permits compactions to proceed independently of updates.</p></div>
|
|
<div class="paragraph"><p>Each reftable (compacted or not) is uniquely identified by its name, so
|
|
open reftables can be cached by their name.</p></div>
|
|
</div>
|
|
<div class="sect3">
|
|
<h4 id="_windows">Windows</h4>
|
|
<div class="paragraph"><p>On windows, and other systems that do not allow deleting or renaming to open
|
|
files, compaction may succeed, but other readers may prevent obsolete tables
|
|
from being deleted.</p></div>
|
|
<div class="paragraph"><p>On these platforms, the following strategy can be followed: on closing a
|
|
reftable stack, reload <code>tables.list</code>, and delete any tables no longer mentioned
|
|
in <code>tables.list</code>.</p></div>
|
|
<div class="paragraph"><p>Irregular program exit may still leave about unused files. In this case, a
|
|
cleanup operation should proceed as follows:</p></div>
|
|
<div class="ulist"><ul>
|
|
<li>
|
|
<p>
|
|
take a lock <code>tables.list.lock</code> to prevent concurrent modifications
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p>
|
|
refresh the reftable stack, by reading <code>tables.list</code>
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p>
|
|
for each <code>*.ref</code> file, remove it if
|
|
</p>
|
|
<div class="ulist"><ul>
|
|
<li>
|
|
<p>
|
|
it is not mentioned in <code>tables.list</code>, and
|
|
</p>
|
|
</li>
|
|
<li>
|
|
<p>
|
|
its max update_index is not beyond the max update_index of the stack
|
|
</p>
|
|
</li>
|
|
</ul></div>
|
|
</li>
|
|
</ul></div>
|
|
</div>
|
|
</div>
|
|
<div class="sect2">
|
|
<h3 id="_alternatives_considered">Alternatives considered</h3>
|
|
<div class="sect3">
|
|
<h4 id="_bzip_packed_refs">bzip packed-refs</h4>
|
|
<div class="paragraph"><p><code>bzip2</code> can significantly shrink a large packed-refs file (e.g. 62 MiB
|
|
compresses to 23 MiB, 37%). However the bzip format does not support
|
|
random access to a single reference. Readers must inflate and discard
|
|
while performing a linear scan.</p></div>
|
|
<div class="paragraph"><p>Breaking packed-refs into chunks (individually compressing each chunk)
|
|
would reduce the amount of data a reader must inflate, but still leaves
|
|
the problem of indexing chunks to support readers efficiently locating
|
|
the correct chunk.</p></div>
|
|
<div class="paragraph"><p>Given the compression achieved by reftable’s encoding, it does not seem
|
|
necessary to add the complexity of bzip/gzip/zlib.</p></div>
|
|
</div>
|
|
<div class="sect3">
|
|
<h4 id="_michael_haggerty_8217_s_alternate_format">Michael Haggerty’s alternate format</h4>
|
|
<div class="paragraph"><p>Michael Haggerty proposed
|
|
<a href="https://lore.kernel.org/git/CAMy9T_HCnyc1g8XWOOWhe7nN0aEFyyBskV2aOMb_fe%2BwGvEJ7A%40mail.gmail.com/">an
|
|
alternate</a> format to reftable on the Git mailing list. This format uses
|
|
smaller chunks, without the restart table, and avoids block alignment
|
|
with padding. Reflog entries immediately follow each ref, and are thus
|
|
interleaved between refs.</p></div>
|
|
<div class="paragraph"><p>Performance testing indicates reftable is faster for lookups (51%
|
|
faster, 11.2 usec vs. 5.4 usec), although reftable produces a slightly
|
|
larger file (+ ~3.2%, 28.3M vs 29.2M):</p></div>
|
|
<div class="tableblock">
|
|
<table rules="all"
|
|
width="100%"
|
|
frame="border"
|
|
cellspacing="0" cellpadding="4">
|
|
<col width="25%" />
|
|
<col width="25%" />
|
|
<col width="25%" />
|
|
<col width="25%" />
|
|
<thead>
|
|
<tr>
|
|
<th align="right" valign="top">format </th>
|
|
<th align="right" valign="top">size </th>
|
|
<th align="right" valign="top">seek cold </th>
|
|
<th align="right" valign="top">seek hot</th>
|
|
</tr>
|
|
</thead>
|
|
<tbody>
|
|
<tr>
|
|
<td align="right" valign="top"><p class="table">mh-alt</p></td>
|
|
<td align="right" valign="top"><p class="table">28.3 M</p></td>
|
|
<td align="right" valign="top"><p class="table">23.4 usec</p></td>
|
|
<td align="right" valign="top"><p class="table">11.2 usec</p></td>
|
|
</tr>
|
|
<tr>
|
|
<td align="right" valign="top"><p class="table">reftable</p></td>
|
|
<td align="right" valign="top"><p class="table">29.2 M</p></td>
|
|
<td align="right" valign="top"><p class="table">19.9 usec</p></td>
|
|
<td align="right" valign="top"><p class="table">5.4 usec</p></td>
|
|
</tr>
|
|
</tbody>
|
|
</table>
|
|
</div>
|
|
</div>
|
|
<div class="sect3">
|
|
<h4 id="_jgit_ketch_reftree">JGit Ketch RefTree</h4>
|
|
<div class="paragraph"><p><a href="https://dev.eclipse.org/mhonarc/lists/jgit-dev/msg03073.html">JGit Ketch</a>
|
|
proposed
|
|
<a href="https://lore.kernel.org/git/CAJo%3DhJvnAPNAdDcAAwAvU9C4RVeQdoS3Ev9WTguHx4fD0V_nOg%40mail.gmail.com/">RefTree</a>,
|
|
an encoding of references inside Git tree objects stored as part of the
|
|
repository’s object database.</p></div>
|
|
<div class="paragraph"><p>The RefTree format adds additional load on the object database storage
|
|
layer (more loose objects, more objects in packs), and relies heavily on
|
|
the packer’s delta compression to save space. Namespaces which are flat
|
|
(e.g. thousands of tags in refs/tags) initially create very large loose
|
|
objects, and so RefTree does not address the problem of copying many
|
|
references to modify a handful.</p></div>
|
|
<div class="paragraph"><p>Flat namespaces are not efficiently searchable in RefTree, as tree
|
|
objects in canonical formatting cannot be binary searched. This fails
|
|
the need to handle a large number of references in a single namespace,
|
|
such as GitHub’s <code>refs/pulls</code>, or a project with many tags.</p></div>
|
|
</div>
|
|
<div class="sect3">
|
|
<h4 id="_lmdb">LMDB</h4>
|
|
<div class="paragraph"><p>David Turner proposed
|
|
<a href="https://lore.kernel.org/git/1455772670-21142-26-git-send-email-dturner@twopensource.com/">using
|
|
LMDB</a>, as LMDB is lightweight (64k of runtime code) and GPL-compatible
|
|
license.</p></div>
|
|
<div class="paragraph"><p>A downside of LMDB is its reliance on a single C implementation. This
|
|
makes embedding inside JGit (a popular reimplementation of Git)
|
|
difficult, and hoisting onto virtual storage (for JGit DFS) virtually
|
|
impossible.</p></div>
|
|
<div class="paragraph"><p>A common format that can be supported by all major Git implementations
|
|
(git-core, JGit, libgit2) is strongly preferred.</p></div>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
<div id="footnotes"><hr /></div>
|
|
<div id="footer">
|
|
<div id="footer-text">
|
|
Last updated
|
|
2025-08-18 02:18:23 CEST
|
|
</div>
|
|
</div>
|
|
</body>
|
|
</html>
|