Skip to content

Commit

Permalink
Support new v1 directory listings #708 (#709)
Browse files Browse the repository at this point in the history
  • Loading branch information
Jaifroid authored Apr 7, 2021
1 parent 5f16fe3 commit 2d792b9
Show file tree
Hide file tree
Showing 4 changed files with 176 additions and 57 deletions.
16 changes: 8 additions & 8 deletions tests/tests.js
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,7 @@ define(['jquery', 'zimArchive', 'zimDirEntry', 'util', 'uiUtil', 'utf8'],
QUnit.test("article '(The Night Time Is) The Right Time' correctly redirects to 'Night Time Is the Right Time'", function(assert) {
var done = assert.async();
assert.expect(6);
localZimArchive.getDirEntryByTitle("A/(The_Night_Time_Is)_The_Right_Time.html").then(function(dirEntry) {
localZimArchive.getDirEntryByPath("A/(The_Night_Time_Is)_The_Right_Time.html").then(function(dirEntry) {
assert.ok(dirEntry !== null, "DirEntry found");
if (dirEntry !== null) {
assert.ok(dirEntry.isRedirect(), "DirEntry is a redirect.");
Expand All @@ -223,7 +223,7 @@ define(['jquery', 'zimArchive', 'zimDirEntry', 'util', 'uiUtil', 'utf8'],
QUnit.test("article 'Raelettes' correctly redirects to 'The Raelettes'", function(assert) {
var done = assert.async();
assert.expect(6);
localZimArchive.getDirEntryByTitle("A/Raelettes.html").then(function(dirEntry) {
localZimArchive.getDirEntryByPath("A/Raelettes.html").then(function(dirEntry) {
assert.ok(dirEntry !== null, "DirEntry found");
if (dirEntry !== null) {
assert.ok(dirEntry.isRedirect(), "DirEntry is a redirect.");
Expand All @@ -242,7 +242,7 @@ define(['jquery', 'zimArchive', 'zimDirEntry', 'util', 'uiUtil', 'utf8'],
QUnit.test("article 'Bein Green' correctly redirects to 'Bein' Green", function(assert) {
var done = assert.async();
assert.expect(6);
localZimArchive.getDirEntryByTitle("A/Bein_Green.html").then(function(dirEntry) {
localZimArchive.getDirEntryByPath("A/Bein_Green.html").then(function(dirEntry) {
assert.ok(dirEntry !== null, "DirEntry found");
if (dirEntry !== null) {
assert.ok(dirEntry.isRedirect(), "DirEntry is a redirect.");
Expand All @@ -261,7 +261,7 @@ define(['jquery', 'zimArchive', 'zimDirEntry', 'util', 'uiUtil', 'utf8'],
QUnit.test("article 'America, the Beautiful' correctly redirects to 'America the Beautiful'", function(assert) {
var done = assert.async();
assert.expect(6);
localZimArchive.getDirEntryByTitle("A/America,_the_Beautiful.html").then(function(dirEntry) {
localZimArchive.getDirEntryByPath("A/America,_the_Beautiful.html").then(function(dirEntry) {
assert.ok(dirEntry !== null, "DirEntry found");
if (dirEntry !== null) {
assert.ok(dirEntry.isRedirect(), "DirEntry is a redirect.");
Expand All @@ -280,7 +280,7 @@ define(['jquery', 'zimArchive', 'zimDirEntry', 'util', 'uiUtil', 'utf8'],
QUnit.test("Image 'm/RayCharles_AManAndHisSoul.jpg' can be loaded", function(assert) {
var done = assert.async();
assert.expect(5);
localZimArchive.getDirEntryByTitle("I/m/RayCharles_AManAndHisSoul.jpg").then(function(dirEntry) {
localZimArchive.getDirEntryByPath("I/m/RayCharles_AManAndHisSoul.jpg").then(function(dirEntry) {
assert.ok(dirEntry !== null, "DirEntry found");
if (dirEntry !== null) {
assert.equal(dirEntry.namespace +"/"+ dirEntry.url, "I/m/RayCharles_AManAndHisSoul.jpg", "URL is correct.");
Expand All @@ -301,7 +301,7 @@ define(['jquery', 'zimArchive', 'zimDirEntry', 'util', 'uiUtil', 'utf8'],
var done = assert.async();

assert.expect(5);
localZimArchive.getDirEntryByTitle("-/s/style.css").then(function(dirEntry) {
localZimArchive.getDirEntryByPath("-/s/style.css").then(function(dirEntry) {
assert.ok(dirEntry !== null, "DirEntry found");
if (dirEntry !== null) {
assert.equal(dirEntry.namespace +"/"+ dirEntry.url, "-/s/style.css", "URL is correct.");
Expand All @@ -321,7 +321,7 @@ define(['jquery', 'zimArchive', 'zimDirEntry', 'util', 'uiUtil', 'utf8'],
QUnit.test("Javascript '-/j/local.js' can be loaded", function(assert) {
var done = assert.async();
assert.expect(5);
localZimArchive.getDirEntryByTitle("-/j/local.js").then(function(dirEntry) {
localZimArchive.getDirEntryByPath("-/j/local.js").then(function(dirEntry) {
assert.ok(dirEntry !== null, "DirEntry found");
if (dirEntry !== null) {
assert.equal(dirEntry.namespace +"/"+ dirEntry.url, "-/j/local.js", "URL is correct.");
Expand All @@ -342,7 +342,7 @@ define(['jquery', 'zimArchive', 'zimDirEntry', 'util', 'uiUtil', 'utf8'],
QUnit.test("Split article 'A/Ray_Charles.html' can be loaded", function(assert) {
var done = assert.async();
assert.expect(7);
localZimArchive.getDirEntryByTitle("A/Ray_Charles.html").then(function(dirEntry) {
localZimArchive.getDirEntryByPath("A/Ray_Charles.html").then(function(dirEntry) {
assert.ok(dirEntry !== null, "Title found");
if (dirEntry !== null) {
assert.equal(dirEntry.namespace +"/"+ dirEntry.url, "A/Ray_Charles.html", "URL is correct.");
Expand Down
42 changes: 21 additions & 21 deletions www/js/app.js
Original file line number Diff line number Diff line change
Expand Up @@ -1260,7 +1260,7 @@ define(['jquery', 'zimArchiveLoader', 'uiUtil', 'settingsStore','abstractFilesys
});
}
};
selectedArchive.getDirEntryByTitle(title).then(readFile).catch(function () {
selectedArchive.getDirEntryByPath(title).then(readFile).catch(function () {
messagePort.postMessage({ 'action': 'giveContent', 'title': title, 'content': new Uint8Array() });
});
} else {
Expand Down Expand Up @@ -1462,8 +1462,8 @@ define(['jquery', 'zimArchiveLoader', 'uiUtil', 'settingsStore','abstractFilesys
// Extract the image at the top of the images array and remove it from the array
var image = images.shift();
var imageUrl = image.getAttribute('data-kiwixurl');
var title = decodeURIComponent(imageUrl);
selectedArchive.getDirEntryByTitle(title).then(function (dirEntry) {
var url = decodeURIComponent(imageUrl);
selectedArchive.getDirEntryByPath(url).then(function (dirEntry) {
selectedArchive.readBinaryFile(dirEntry, function (fileDirEntry, content) {
var mimetype = dirEntry.getMimetype();
uiUtil.feedNodeWithBlob(image, 'src', content, mimetype, function() {
Expand All @@ -1472,7 +1472,7 @@ define(['jquery', 'zimArchiveLoader', 'uiUtil', 'settingsStore','abstractFilesys
});
});
}).catch(function (e) {
console.error("could not find DirEntry for image:" + title, e);
console.error("could not find DirEntry for image:" + url, e);
images.busy = false;
extractImage();
});
Expand Down Expand Up @@ -1508,14 +1508,14 @@ define(['jquery', 'zimArchiveLoader', 'uiUtil', 'settingsStore','abstractFilesys
cssCount++;
var link = $(this);
var linkUrl = link.attr("data-kiwixurl");
var title = uiUtil.removeUrlParameters(decodeURIComponent(linkUrl));
if (cssCache.has(title)) {
var cssContent = cssCache.get(title);
var url = uiUtil.removeUrlParameters(decodeURIComponent(linkUrl));
if (cssCache.has(url)) {
var cssContent = cssCache.get(url);
uiUtil.replaceCSSLinkWithInlineCSS(link, cssContent);
cssFulfilled++;
} else {
if (params.useCache) $('#cachingAssets').show();
selectedArchive.getDirEntryByTitle(title)
selectedArchive.getDirEntryByPath(url)
.then(function (dirEntry) {
return selectedArchive.readUtf8File(dirEntry,
function (fileDirEntry, content) {
Expand All @@ -1527,7 +1527,7 @@ define(['jquery', 'zimArchiveLoader', 'uiUtil', 'settingsStore','abstractFilesys
}
);
}).catch(function (e) {
console.error("could not find DirEntry for CSS : " + title, e);
console.error("could not find DirEntry for CSS : " + url, e);
cssCount--;
renderIfCSSFulfilled();
});
Expand Down Expand Up @@ -1563,7 +1563,7 @@ define(['jquery', 'zimArchiveLoader', 'uiUtil', 'settingsStore','abstractFilesys
// var scriptUrl = script.attr("data-kiwixurl");
// // TODO check that the type of the script is text/javascript or application/javascript
// var title = uiUtil.removeUrlParameters(decodeURIComponent(scriptUrl));
// selectedArchive.getDirEntryByTitle(title).then(function(dirEntry) {
// selectedArchive.getDirEntryByPath(title).then(function(dirEntry) {
// if (dirEntry === null) {
// console.log("Error: js file not found: " + title);
// } else {
Expand Down Expand Up @@ -1591,7 +1591,7 @@ define(['jquery', 'zimArchiveLoader', 'uiUtil', 'settingsStore','abstractFilesys
return;
}
var mediaElement = /audio|video/i.test(mediaSource.tagName) ? mediaSource : mediaSource.parentElement;
selectedArchive.getDirEntryByTitle(source).then(function(dirEntry) {
selectedArchive.getDirEntryByPath(source).then(function(dirEntry) {
return selectedArchive.readBinaryFile(dirEntry, function (fileDirEntry, mediaArray) {
var mimeType = mediaSource.type ? mediaSource.type : dirEntry.getMimetype();
var blob = new Blob([mediaArray], { type: mimeType });
Expand Down Expand Up @@ -1649,30 +1649,30 @@ define(['jquery', 'zimArchiveLoader', 'uiUtil', 'settingsStore','abstractFilesys


/**
* Extracts the content of the given article title, or a downloadable file, from the ZIM
* Extracts the content of the given article pathname, or a downloadable file, from the ZIM
*
* @param {String} title The path and filename to the article or file to be extracted
* @param {String} path The pathname (namespace + filename) to the article or file to be extracted
* @param {Boolean|String} download A Bolean value that will trigger download of title, or the filename that should
* be used to save the file in local FS (in HTML5 spec, a string value for the download attribute is optional)
* @param {String} contentType The mimetype of the downloadable file, if known
*/
function goToArticle(title, download, contentType) {
function goToArticle(path, download, contentType) {
$("#searchingArticles").show();
selectedArchive.getDirEntryByTitle(title).then(function(dirEntry) {
selectedArchive.getDirEntryByPath(path).then(function(dirEntry) {
if (dirEntry === null || dirEntry === undefined) {
$("#searchingArticles").hide();
alert("Article with title " + title + " not found in the archive");
alert("Article with url " + path + " not found in the archive");
} else if (download) {
selectedArchive.readBinaryFile(dirEntry, function (fileDirEntry, content) {
var mimetype = contentType || fileDirEntry.getMimetype();
uiUtil.displayFileDownloadAlert(title, download, mimetype, content);
uiUtil.displayFileDownloadAlert(path, download, mimetype, content);
});
} else {
params.isLandingPage = false;
$('#activeContent').hide();
readArticle(dirEntry);
}
}).catch(function(e) { alert("Error reading article with title " + title + " : " + e); });
}).catch(function(e) { alert("Error reading article with url " + path + " : " + e); });
}

function goToRandomArticle() {
Expand All @@ -1683,9 +1683,9 @@ define(['jquery', 'zimArchiveLoader', 'uiUtil', 'settingsStore','abstractFilesys
alert("Error finding random article.");
} else {
// We fall back to the old A namespace to support old ZIM files without a text/html MIME type for articles
// DEV: This will need to be changed if we search titlePtrList version 1
// in a future PR, as that list contains only articles
if (dirEntry.getMimetype() === 'text/html' || dirEntry.namespace === 'A') {
// DEV: If articlePtrPos is defined in zimFile, then we are using a v1 article-only title listing. By definition,
// all dirEntries in an article-only listing must be articles.
if (selectedArchive._file.articlePtrPos || dirEntry.getMimetype() === 'text/html' || dirEntry.namespace === 'A') {
params.isLandingPage = false;
$('#activeContent').hide();
$('#searchingArticles').show();
Expand Down
60 changes: 43 additions & 17 deletions www/js/lib/zimArchive.js
Original file line number Diff line number Diff line change
Expand Up @@ -46,17 +46,39 @@ define(['zimfile', 'zimDirEntry', 'util', 'utf8'],
* Creates a ZIM archive object to access the ZIM file at the given path in the given storage.
* This constructor can also be used with a single File parameter.
*
* @param {StorageFirefoxOS|Array.<Blob>} storage Storage (in this case, the path must be given) or Array of Files (path parameter must be omitted)
* @param {String} path
* @param {callbackZIMArchive} callbackReady
* @param {StorageFirefoxOS|Array<Blob>} storage Storage (in this case, the path must be given) or Array of Files (path parameter must be omitted)
* @param {String} path The Storage path for an OS that requires this to be specified
* @param {callbackZIMArchive} callbackReady The function to call when the archive is ready to use
*/
function ZIMArchive(storage, path, callbackReady) {
var that = this;
that._file = null;
that._language = ""; //@TODO
var createZimfile = function(fileArray) {
zimfile.fromFileArray(fileArray).then(function(file) {
var createZimfile = function (fileArray) {
zimfile.fromFileArray(fileArray).then(function (file) {
that._file = file;
// File has been created, but we need to add any Listings which extend the archive metadata
that._file.setListings([
// Provide here any Listings for which we need to extract metadata as key:value obects to be added to the file
// 'ptrName' and 'countName' contain the key names to be set in the archive file object
{
// This defines the standard v0 (legacy) title index that contains listings for every entry in the ZIM (not just articles)
// It represents the same index that is referenced in the ZIM archive header
path: 'X/listing/titleOrdered/v0',
ptrName: 'titlePtrPos',
countName: 'entryCount'
},
{
// This defines a new version 1 index that is present in no-namespace ZIMs, and contains a title-ordered list of articles
path: 'X/listing/titleOrdered/v1',
ptrName: 'articlePtrPos',
countName: 'articleCount'
}
]);
// DEV: Currently, extended listings are only used for title (=article) listings when the user searches
// for an article or uses the Random button, by which time the listings will have been extracted.
// If, in the future, listings are used in a more time-critical manner, consider forcing a wait before
// declaring the archive to be ready, by chaining the following callback in a .then() function of setListings.
callbackReady(that);
});
};
Expand Down Expand Up @@ -245,7 +267,9 @@ define(['zimfile', 'zimDirEntry', 'util', 'utf8'],
ZIMArchive.prototype.findDirEntriesWithPrefixCaseSensitive = function(prefix, resultSize, search, callback) {
var that = this;
var cns = this.getContentNamespace();
util.binarySearch(0, this._file.articleCount, function(i) {
// Search v1 article listing if available, otherwise fallback to v0
var articleCount = this._file.articleCount || this._file.entryCount;
util.binarySearch(0, articleCount, function(i) {
return that._file.dirEntryByTitleIndex(i).then(function(dirEntry) {
if (search.status === 'cancelled') return 0;
var ns = dirEntry.namespace;
Expand All @@ -257,7 +281,7 @@ define(['zimfile', 'zimDirEntry', 'util', 'utf8'],
}, true).then(function(firstIndex) {
var dirEntries = [];
var addDirEntries = function(index) {
if (search.status === 'cancelled' || index >= firstIndex + resultSize || index >= that._file.articleCount) {
if (search.status === 'cancelled' || index >= firstIndex + resultSize || index >= articleCount) {
return dirEntries;
}
return that._file.dirEntryByTitleIndex(index).then(function(dirEntry) {
Expand Down Expand Up @@ -322,18 +346,18 @@ define(['zimfile', 'zimDirEntry', 'util', 'utf8'],
};

/**
* Searches a DirEntry (article / page) by its title.
* @param {String} title
* @return {Promise} resolving to the DirEntry object or null if not found.
* Searches the URL pointer list of Directory Entries by pathname
* @param {String} path The pathname of the DirEntry that is required (namespace + filename)
* @return {Promise<DirEntry>} A Promise that resolves to a Directory Entry, or null if not found.
*/
ZIMArchive.prototype.getDirEntryByTitle = function(title) {
ZIMArchive.prototype.getDirEntryByPath = function(path) {
var that = this;
return util.binarySearch(0, this._file.articleCount, function(i) {
return util.binarySearch(0, this._file.entryCount, function(i) {
return that._file.dirEntryByUrlIndex(i).then(function(dirEntry) {
var url = dirEntry.namespace + "/" + dirEntry.url;
if (title < url)
if (path < url)
return -1;
else if (title > url)
else if (path > url)
return 1;
else
return 0;
Expand All @@ -351,8 +375,10 @@ define(['zimfile', 'zimDirEntry', 'util', 'utf8'],
* @param {callbackDirEntry} callback
*/
ZIMArchive.prototype.getRandomDirEntry = function(callback) {
var index = Math.floor(Math.random() * this._file.articleCount);
this._file.dirEntryByUrlIndex(index).then(callback);
// Prefer an article-only (v1) title pointer list, if available
var articleCount = this._file.articleCount || this._file.entryCount;
var index = Math.floor(Math.random() * articleCount);
this._file.dirEntryByTitleIndex(index).then(callback);
};

/**
Expand All @@ -362,7 +388,7 @@ define(['zimfile', 'zimDirEntry', 'util', 'utf8'],
*/
ZIMArchive.prototype.getMetadata = function (key, callback) {
var that = this;
this.getDirEntryByTitle("M/" + key).then(function (dirEntry) {
this.getDirEntryByPath("M/" + key).then(function (dirEntry) {
if (dirEntry === null || dirEntry === undefined) {
console.warn("Title M/" + key + " not found in the archive");
callback();
Expand Down
Loading

0 comments on commit 2d792b9

Please sign in to comment.