diff --git a/source/org/openzim/ArticleEntry.java b/source/org/openzim/ArticleEntry.java
new file mode 100644
index 0000000000..7eeae2e069
--- /dev/null
+++ b/source/org/openzim/ArticleEntry.java
@@ -0,0 +1,46 @@
+/*
+ * Copyright (C) 2011 Arunesh Mathur
+ *
+ * This file is a part of zimreader-java.
+ *
+ * zimreader-java is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License version 3.0 as
+ * published by the Free Software Foundation.
+ *
+ * zimreader-java is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with zimreader-java. If not, see .
+ */
+
+
+package org.openzim;
+
+public class ArticleEntry extends DirectoryEntry {
+
+ int clusterNumber;
+
+ int blobnumber;
+
+ public ArticleEntry(final int mimeType, final char namespace, final int revision,
+ final int clusterNumber, final int blobNumber, final String url, final String title,
+ final int urlListindex) {
+
+ super(mimeType, namespace, revision, url, title, urlListindex);
+
+ this.clusterNumber = clusterNumber;
+ this.blobnumber = blobNumber;
+ }
+
+ public int getClusterNumber() {
+ return this.clusterNumber;
+ }
+
+ public int getBlobnumber() {
+ return this.blobnumber;
+ }
+
+}
diff --git a/source/org/openzim/DirectoryEntry.java b/source/org/openzim/DirectoryEntry.java
new file mode 100644
index 0000000000..92c52de415
--- /dev/null
+++ b/source/org/openzim/DirectoryEntry.java
@@ -0,0 +1,69 @@
+/*
+ * Copyright (C) 2011 Arunesh Mathur
+ *
+ * This file is a part of zimreader-java.
+ *
+ * zimreader-java is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License version 3.0 as
+ * published by the Free Software Foundation.
+ *
+ * zimreader-java is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with zimreader-java. If not, see .
+ */
+
+package org.openzim;
+
+public abstract class DirectoryEntry {
+
+ int mimeType;
+
+ char namespace;
+
+ int revision;
+
+ String url;
+
+ String title;
+
+ int urlListindex;
+
+ public DirectoryEntry(final int mimeType, final char namespace, final int revision,
+ final String url, final String title, final int index) {
+ this.mimeType = mimeType;
+ this.namespace = namespace;
+ this.revision = revision;
+ this.url = url;
+ this.title = title;
+ this.urlListindex = index;
+ }
+
+ public int getMimeType() {
+ return this.mimeType;
+ }
+
+ public char getNamespace() {
+ return this.namespace;
+ }
+
+ public int getRevision() {
+ return this.revision;
+ }
+
+ public String getUrl() {
+ return this.url;
+ }
+
+ public String getTitle() {
+ return this.title;
+ }
+
+ public int getUrlListindex() {
+ return this.urlListindex;
+ }
+
+}
diff --git a/source/org/openzim/RandomAcessFileZIMInputStream.java b/source/org/openzim/RandomAcessFileZIMInputStream.java
new file mode 100644
index 0000000000..006dd4498e
--- /dev/null
+++ b/source/org/openzim/RandomAcessFileZIMInputStream.java
@@ -0,0 +1,135 @@
+/*
+ * Copyright (C) 2011 Arunesh Mathur
+ *
+ * This file is a part of zimreader-java.
+ *
+ * zimreader-java is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License version 3.0 as
+ * published by the Free Software Foundation.
+ *
+ * zimreader-java is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with zimreader-java. If not, see .
+ */
+
+package org.openzim;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.RandomAccessFile;
+
+/**
+ * This is an implementation of RandomAccessFile to ensure that it is an
+ * InputStream as well, specifically designed for reading a ZIM file. Ad-Hoc
+ * implementation, can be improved.
+ *
+ * @author Arunesh Mathur
+ */
+
+public class RandomAcessFileZIMInputStream extends InputStream {
+
+ private final RandomAccessFile mRAFReader;
+
+ private long mMarked = -1;
+
+ public RandomAcessFileZIMInputStream(final RandomAccessFile reader) {
+ this.mRAFReader = reader;
+ }
+
+ // TODO: Remove the parameter buffer
+ public int readTwoLittleEndianBytesValue(final byte[] buffer) throws IOException {
+ if (buffer.length < 2) {
+ throw new OutOfMemoryError("buffer too small");
+ } else {
+ this.mRAFReader.read(buffer, 0, 2);
+ return Utilities.toTwoLittleEndianInteger(buffer);
+ }
+ }
+
+ // TODO: Remove the parameter buffer
+ public int readFourLittleEndianBytesValue(final byte[] buffer) throws IOException {
+ if (buffer.length < 4) {
+ throw new OutOfMemoryError("buffer too small");
+ } else {
+ this.mRAFReader.read(buffer, 0, 4);
+ return Utilities.toFourLittleEndianInteger(buffer);
+ }
+ }
+
+ // TODO: Remove the parameter buffer
+ public int readEightLittleEndianBytesValue(final byte[] buffer)
+ throws IOException {
+ if (buffer.length < 8) {
+ throw new OutOfMemoryError("buffer too small");
+ } else {
+ this.mRAFReader.read(buffer, 0, 8);
+ return Utilities.toEightLittleEndianInteger(buffer);
+ }
+ }
+
+ // TODO: Remove the parameter buffer
+ public int readSixteenLittleEndianBytesValue(final byte[] buffer)
+ throws IOException {
+ if (buffer.length < 16) {
+ throw new OutOfMemoryError("buffer too small");
+ } else {
+ this.mRAFReader.read(buffer, 0, 16);
+ return Utilities.toSixteenLittleEndianInteger(buffer);
+ }
+ }
+
+ // Reads characters from the current position into a String and stops when a
+ // '\0' is encountered
+ public String readString() throws IOException {
+ final StringBuffer sb = new StringBuffer();
+ /*
+ * int i; byte[] buffer = new byte[100]; while (true) {
+ * mRAFReader.read(buffer); for (i = 0; i < buffer.length; i++) { if
+ * (buffer[i] == '\0') { break; } sb.append((char) buffer[i]); } if (i
+ * != buffer.length) break; } return sb.toString();
+ */
+ int b;
+ b = this.mRAFReader.read();
+ while (b != '\0') {
+ sb.append((char) b);
+ b = this.mRAFReader.read();
+ }
+ return sb.toString();
+
+ }
+
+ @Override
+ public int read() throws IOException {
+ return this.mRAFReader.read();
+ }
+
+ public RandomAccessFile getRandomAccessFile() {
+ return this.mRAFReader;
+ }
+
+ public void seek(final long pos) throws IOException {
+ this.mRAFReader.seek(pos);
+ }
+
+ public long getFilePointer() throws IOException {
+ return this.mRAFReader.getFilePointer();
+ }
+
+ public void mark() throws IOException {
+ this.mMarked = this.mRAFReader.getFilePointer();
+ }
+
+ @Override
+ public void reset() throws IOException {
+ if (this.mMarked == -1) {
+ return;
+ } else {
+ this.mRAFReader.seek(this.mMarked);
+ this.mMarked = -1;
+ }
+ }
+}
diff --git a/source/org/openzim/RedirectEntry.java b/source/org/openzim/RedirectEntry.java
new file mode 100644
index 0000000000..fdbe3fba1d
--- /dev/null
+++ b/source/org/openzim/RedirectEntry.java
@@ -0,0 +1,37 @@
+/*
+ * Copyright (C) 2011 Arunesh Mathur
+ *
+ * This file is a part of zimreader-java.
+ *
+ * zimreader-java is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License version 3.0 as
+ * published by the Free Software Foundation.
+ *
+ * zimreader-java is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with zimreader-java. If not, see .
+ */
+
+package org.openzim;
+
+public class RedirectEntry extends DirectoryEntry {
+
+ int redirectIndex;
+
+ public RedirectEntry(final int mimeType, final char namespace, final int revision,
+ final int redirectIndex, final String url, final String title, final int urlListindex) {
+
+ super(mimeType, namespace, revision, url, title, urlListindex);
+
+ this.redirectIndex = redirectIndex;
+ }
+
+ public int getRedirectIndex() {
+ return this.redirectIndex;
+ }
+
+}
diff --git a/source/org/openzim/Utilities.java b/source/org/openzim/Utilities.java
new file mode 100644
index 0000000000..0de337c9cf
--- /dev/null
+++ b/source/org/openzim/Utilities.java
@@ -0,0 +1,84 @@
+/*
+ * Copyright (C) 2011 Arunesh Mathur
+ *
+ * This file is a part of zimreader-java.
+ *
+ * zimreader-java is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License version 3.0 as
+ * published by the Free Software Foundation.
+ *
+ * zimreader-java is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with zimreader-java. If not, see .
+ */
+
+
+package org.openzim;
+
+import java.io.IOException;
+import java.io.InputStream;
+
+public class Utilities {
+
+ // TODO: Write a binary search algorithm
+ public static int binarySearch() {
+ return -1;
+ }
+
+ public static int toTwoLittleEndianInteger(final byte[] buffer) throws IOException {
+ if (buffer.length < 2) {
+ throw new OutOfMemoryError("buffer too small");
+ } else {
+ final int result = ((buffer[0] & 0xFF) | ((buffer[1] & 0xFF) << 8));
+ return result;
+ }
+ }
+
+ public static int toFourLittleEndianInteger(final byte[] buffer) throws IOException {
+ if (buffer.length < 4) {
+ throw new OutOfMemoryError("buffer too small");
+ } else {
+ final int result = ((buffer[0] & 0xFF) | ((buffer[1] & 0xFF) << 8)
+ | ((buffer[2] & 0xFF) << 16) | ((buffer[3] & 0xFF) << 24));
+ return result;
+ }
+ }
+
+ public static int toEightLittleEndianInteger(final byte[] buffer) throws IOException {
+ if (buffer.length < 8) {
+ throw new OutOfMemoryError("buffer too small");
+ } else {
+ final int result = ((buffer[0] & 0xFF) | ((buffer[1] & 0xFF) << 8)
+ | ((buffer[2] & 0xFF) << 16) | ((buffer[3] & 0xFF) << 24)
+ | ((buffer[4] & 0xFF) << 32) | ((buffer[5] & 0xFF) << 40)
+ | ((buffer[6] & 0xFF) << 48) | ((buffer[7] & 0xFF) << 56));
+ return result;
+ }
+ }
+
+ public static int toSixteenLittleEndianInteger(final byte[] buffer) throws IOException {
+ if (buffer.length < 16) {
+ throw new OutOfMemoryError("buffer too small");
+ } else {
+ final int result = ((buffer[0] & 0xFF) | ((buffer[1] & 0xFF) << 8)
+ | ((buffer[2] & 0xFF) << 16) | ((buffer[3] & 0xFF) << 24)
+ | ((buffer[4] & 0xFF) << 32) | ((buffer[5] & 0xFF) << 40)
+ | ((buffer[6] & 0xFF) << 48) | ((buffer[7] & 0xFF) << 56)
+ | ((buffer[8] & 0xFF) << 64) | ((buffer[9] & 0xFF) << 72)
+ | ((buffer[10] & 0xFF) << 80) | ((buffer[11] & 0xFF) << 88)
+ | ((buffer[12] & 0xFF) << 96)
+ | ((buffer[13] & 0xFF) << 104)
+ | ((buffer[14] & 0xFF) << 112) | ((buffer[15] & 0xFF) << 120));
+ return result;
+ }
+ }
+
+ public static void skipFully(final InputStream stream, final long bytes) throws IOException {
+ for (long i = stream.skip(bytes); i < bytes; i += stream.skip(bytes - i));
+ }
+
+}
diff --git a/source/org/openzim/ZIMFile.java b/source/org/openzim/ZIMFile.java
new file mode 100644
index 0000000000..c86119be1d
--- /dev/null
+++ b/source/org/openzim/ZIMFile.java
@@ -0,0 +1,198 @@
+/*
+ * Copyright (C) 2011 Arunesh Mathur
+ *
+ * This file is a part of zimreader-java.
+ *
+ * zimreader-java is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License version 3.0 as
+ * published by the Free Software Foundation.
+ *
+ * zimreader-java is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with zimreader-java. If not, see .
+ */
+
+package org.openzim;
+
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.RandomAccessFile;
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * @author Arunesh Mathur
+ *
+ * A ZIM file implementation that stores the Header and the MIMETypeList
+ *
+ */
+public class ZIMFile extends File {
+
+ /**
+ *
+ */
+ private static final long serialVersionUID = 1L;
+
+ private Header mHeader;
+
+ private List mMIMETypeList; // Can be removed if not needed
+
+ public ZIMFile(final String path) {
+ super(path);
+
+ try {
+ readHeader();
+ } catch (final FileNotFoundException e) {
+ e.printStackTrace();
+ }
+ }
+
+ private void readHeader() throws FileNotFoundException {
+
+ // Helpers
+ int len = 0;
+ StringBuffer mimeBuffer = null;
+
+ // The byte[] that will help us in reading bytes out of the file
+ final byte[] buffer = new byte[16];
+
+ // Check whether the file exists
+ if (!(this.exists())) {
+ throw new FileNotFoundException(
+ "The file that you specified was not found.");
+ }
+
+ // The reader that will be used to read contents from the file
+
+ final RandomAcessFileZIMInputStream reader = new RandomAcessFileZIMInputStream(
+ new RandomAccessFile(this, "r"));
+
+ // The ZIM file header
+ this.mHeader = new Header();
+
+ // Read the contents of the header
+ try {
+ this.mHeader.magicNumber = reader.readFourLittleEndianBytesValue(buffer);
+ // System.out.println(mHeader.magicNumber);
+
+ this.mHeader.version = reader.readFourLittleEndianBytesValue(buffer);
+ // System.out.println(mHeader.version);
+
+ this.mHeader.uuid = reader.readSixteenLittleEndianBytesValue(buffer);
+ // System.out.println(mHeader.uuid); reader.read(buffer, 0, 4);
+
+ this.mHeader.articleCount = reader
+ .readFourLittleEndianBytesValue(buffer);
+ // System.out.println(mHeader.articleCount);
+
+ this.mHeader.clusterCount = reader
+ .readFourLittleEndianBytesValue(buffer);
+ // System.out.println(mHeader.clusterCount);
+
+ this.mHeader.urlPtrPos = reader.readEightLittleEndianBytesValue(buffer);
+ // System.out.println(mHeader.urlPtrPos);
+
+ this.mHeader.titlePtrPos = reader
+ .readEightLittleEndianBytesValue(buffer);
+ // System.out.println(mHeader.titlePtrPos);
+
+ this.mHeader.clusterPtrPos = reader
+ .readEightLittleEndianBytesValue(buffer);
+ // System.out.println(mHeader.clusterPtrPos);
+
+ this.mHeader.mimeListPos = reader
+ .readEightLittleEndianBytesValue(buffer);
+ // System.out.println(mHeader.mimeListPos);
+
+ this.mHeader.mainPage = reader.readFourLittleEndianBytesValue(buffer);
+ // System.out.println(mHeader.mainPage);
+
+ this.mHeader.layoutPage = reader.readFourLittleEndianBytesValue(buffer);
+ // System.out.println(mHeader.layoutPage);
+
+ // Initialise the MIMETypeList
+ this.mMIMETypeList = new ArrayList<>();
+ while (true) {
+ reader.read(buffer, 0, 1);
+ len = 0;
+ mimeBuffer = new StringBuffer();
+ while (buffer[0] != '\0') {
+ mimeBuffer.append((char) buffer[0]);
+ reader.read(buffer, 0, 1);
+ len++;
+ }
+ if (len == 0) {
+ break;
+ }
+ this.mMIMETypeList.add(mimeBuffer.toString());
+ // System.out.println(mimeBuffer);
+ }
+
+ } catch (final Exception e) {
+ e.printStackTrace();
+ }
+ }
+
+ public int getVersion() {
+ return this.mHeader.version;
+ }
+
+ public int getUuid() {
+ return this.mHeader.uuid;
+ }
+
+ public int getArticleCount() {
+ return this.mHeader.articleCount;
+ }
+
+ public int getClusterCount() {
+ return this.mHeader.clusterCount;
+ }
+
+ public int getUrlPtrPos() {
+ return this.mHeader.urlPtrPos;
+ }
+
+ public int getTitlePtrPos() {
+ return this.mHeader.titlePtrPos;
+ }
+
+ public int getClusterPtrPos() {
+ return this.mHeader.clusterPtrPos;
+ }
+
+ public String getMIMEType(final int mimeNumber) {
+ return this.mMIMETypeList.get(mimeNumber);
+ }
+
+ public int getHeaderSize() {
+ return this.mHeader.mimeListPos;
+ }
+
+ public int getMainPage() {
+ return this.mHeader.mainPage;
+ }
+
+ public int getLayoutPage() {
+ return this.mHeader.layoutPage;
+ }
+
+ public class Header {
+ int magicNumber;
+ int version;
+ int uuid;
+ int articleCount;
+ int clusterCount;
+ int urlPtrPos;
+ int titlePtrPos;
+ int clusterPtrPos;
+ int mimeListPos;
+ int mainPage;
+ int layoutPage;
+ }
+
+}
diff --git a/source/org/openzim/ZIMReader.java b/source/org/openzim/ZIMReader.java
new file mode 100644
index 0000000000..affd6ea6d8
--- /dev/null
+++ b/source/org/openzim/ZIMReader.java
@@ -0,0 +1,408 @@
+/*
+ * Copyright (C) 2011 Arunesh Mathur
+ *
+ * This file is a part of zimreader-java.
+ *
+ * zimreader-java is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License version 3.0 as
+ * published by the Free Software Foundation.
+ *
+ * zimreader-java is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with zimreader-java. If not, see .
+ */
+
+package org.openzim;
+
+import java.io.ByteArrayOutputStream;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.io.RandomAccessFile;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.tukaani.xz.SingleXZInputStream;
+
+/**
+ * @author Arunesh Mathur
+ *
+ * A ZIMReader that reads data from the ZIMFile
+ *
+ */
+public class ZIMReader {
+
+ private final ZIMFile mFile;
+ private RandomAcessFileZIMInputStream mReader;
+
+ public ZIMReader(final ZIMFile file) {
+ this.mFile = file;
+ try {
+ this.mReader = new RandomAcessFileZIMInputStream(new RandomAccessFile(
+ this.mFile, "r"));
+ } catch (final FileNotFoundException e) {
+ e.printStackTrace();
+ }
+ }
+
+ public List getURLListByURL() throws IOException {
+
+ int i = 0, pos, mimeType;
+
+ final byte[] buffer = new byte[8];
+
+ // The list that will eventually return the list of URL's
+ final ArrayList returnList = new ArrayList<>();
+
+ // Move to the spot where URL's are listed
+ this.mReader.seek(this.mFile.getUrlPtrPos());
+
+ for (i = 0; i < this.mFile.getArticleCount(); i++) {
+
+ // The position of URL i
+ pos = this.mReader.readEightLittleEndianBytesValue(buffer);
+
+ // Mark the current position that we need to return to
+ this.mReader.mark();
+
+ // Move to the position of URL i
+ this.mReader.seek(pos);
+
+ // Article or Redirect entry?
+ mimeType = this.mReader.readTwoLittleEndianBytesValue(buffer);
+
+ if (mimeType == 65535) {
+ this.mReader.seek(pos + 12);
+ returnList.add(this.mReader.readString());
+ } else {
+ this.mReader.seek(pos + 16);
+ returnList.add(this.mReader.readString());
+ }
+
+ this.mReader.reset();
+ }
+
+ return returnList;
+ }
+
+ public List getURLListByTitle() throws IOException {
+
+ int i = 0, pos, mimeType, articleNumber, urlPtrPos;
+
+ final byte[] buffer = new byte[8];
+
+ // The list that will eventually return the list of URL's
+ final ArrayList returnList = new ArrayList<>();
+
+ // Get the UrlPtrPos or one time storage
+ urlPtrPos = this.mFile.getUrlPtrPos();
+
+ // Move to the spot where URL's are listed
+ this.mReader.seek(this.mFile.getTitlePtrPos());
+
+ for (i = 0; i < this.mFile.getArticleCount(); i++) {
+
+ // The articleNumber of the position of URL i
+ articleNumber = this.mReader.readFourLittleEndianBytesValue(buffer);
+
+ // Mark the current position that we need to return to
+ this.mReader.mark();
+
+ this.mReader.seek(urlPtrPos + (8 * (articleNumber)));
+
+ // The position of URL i
+ pos = this.mReader.readEightLittleEndianBytesValue(buffer);
+ this.mReader.seek(pos);
+
+ // Article or Redirect entry?
+ mimeType = this.mReader.readTwoLittleEndianBytesValue(buffer);
+
+ if (mimeType == 65535) {
+ this.mReader.seek(pos + 12);
+ final String url = this.mReader.readString();
+ returnList.add(url);
+ } else {
+ this.mReader.seek(pos + 16);
+ final String url = this.mReader.readString();
+ returnList.add(url);
+ }
+
+ // Return to the marked position
+ this.mReader.reset();
+ }
+
+ return returnList;
+ }
+
+ // Gives the minimum required information needed for the given articleName
+ public DirectoryEntry getDirectoryInfo(String articleName, final char namespace)
+ throws IOException {
+
+ DirectoryEntry entry;
+ String cmpStr;
+ final int numberOfArticles = this.mFile.getArticleCount();
+ int beg = this.mFile.getTitlePtrPos(), end = beg + (numberOfArticles * 4), mid;
+
+ articleName = namespace + "/" + articleName;
+
+ while (beg <= end) {
+ mid = beg + 4 * (((end - beg) / 4) / 2);
+ entry = getDirectoryInfoAtTitlePosition(mid);
+ if (entry == null) {
+ return null;
+ }
+ cmpStr = entry.getNamespace() + "/" + entry.getUrl();
+ if (articleName.compareTo(cmpStr) < 0) {
+ end = mid - 4;
+
+ } else if (articleName.compareTo(cmpStr) > 0) {
+ beg = mid + 4;
+
+ } else {
+ return entry;
+ }
+ }
+
+ return null;
+
+ }
+
+ public ByteArrayOutputStream getArticleData(final String articleName, final char namespace) throws IOException {
+
+ // search in the cache first, if not found, then call getDirectoryInfo(articleName)
+
+ byte[] buffer = new byte[8];
+
+ final DirectoryEntry mainEntry = getDirectoryInfo(articleName, namespace);
+
+ if (mainEntry != null) {
+
+ // Check what kind of an entry was mainEnrty
+ if (mainEntry.getClass() == ArticleEntry.class) {
+
+ // Cast to ArticleEntry
+ final ArticleEntry article = (ArticleEntry) mainEntry;
+
+ // Get the cluster and blob numbers from the article
+ final int clusterNumber = article.getClusterNumber();
+ final int blobNumber = article.getBlobnumber();
+
+ // Move to the cluster entry in the clusterPtrPos
+ this.mReader.seek(this.mFile.getClusterPtrPos() + clusterNumber * 8);
+
+ // Read the location of the cluster
+ final int clusterPos = this.mReader
+ .readEightLittleEndianBytesValue(buffer);
+
+ // Move to the cluster
+ this.mReader.seek(clusterPos);
+
+ // Read the first byte, for compression information
+ final int compressionType = this.mReader.read();
+
+ // Reference declaration
+ SingleXZInputStream xzReader = null;
+ int firstOffset, numberOfBlobs, offset1,
+ offset2,
+ location,
+ differenceOffset;
+
+ ByteArrayOutputStream baos;
+
+ // Check the compression type that was read
+ switch (compressionType) {
+
+ // TODO: Read uncompressed data directly
+ case 0:
+ case 1:
+
+ // Read the first 4 bytes to find out the number of artciles
+ buffer = new byte[4];
+
+ // Create a dictionary with size 40MiB, the zimlib uses this
+ // size while creating
+
+ // Read the first offset
+ this.mReader.read(buffer);
+
+ // The first four bytes are the offset of the zeroth blob
+ firstOffset = Utilities
+ .toFourLittleEndianInteger(buffer);
+
+ // The number of blobs
+ numberOfBlobs = firstOffset / 4;
+
+ // The blobNumber has to be lesser than the numberOfBlobs
+ assert blobNumber < numberOfBlobs;
+
+
+ if (blobNumber == 0) {
+ // The first offset is what we read earlier
+ offset1 = firstOffset;
+ } else {
+
+ location = (blobNumber - 1) * 4;
+ Utilities.skipFully(this.mReader, location);
+ this.mReader.read(buffer);
+ offset1 = Utilities.toFourLittleEndianInteger(buffer);
+ }
+
+ this.mReader.read(buffer);
+ offset2 = Utilities.toFourLittleEndianInteger(buffer);
+
+ differenceOffset = offset2 - offset1;
+ buffer = new byte[differenceOffset];
+
+ Utilities.skipFully(this.mReader,
+ (offset1 - 4 * (blobNumber + 2)));
+
+ this.mReader.read(buffer, 0, differenceOffset);
+
+ baos = new ByteArrayOutputStream();
+ baos.write(buffer, 0, differenceOffset);
+
+ return baos;
+
+ // LZMA2 compressed data
+ case 4:
+
+ // Read the first 4 bytes to find out the number of artciles
+ buffer = new byte[4];
+
+ // Create a dictionary with size 40MiB, the zimlib uses this
+ // size while creating
+ xzReader = new SingleXZInputStream(this.mReader, 4194304);
+
+ // Read the first offset
+ xzReader.read(buffer);
+
+ // The first four bytes are the offset of the zeroth blob
+ firstOffset = Utilities
+ .toFourLittleEndianInteger(buffer);
+
+ // The number of blobs
+ numberOfBlobs = firstOffset / 4;
+
+ // The blobNumber has to be lesser than the numberOfBlobs
+ assert blobNumber < numberOfBlobs;
+
+ if(blobNumber == 0) {
+ // The first offset is what we read earlier
+ offset1 = firstOffset;
+ } else {
+
+ location = (blobNumber - 1) * 4;
+ Utilities.skipFully(xzReader, location);
+ xzReader.read(buffer);
+ offset1 = Utilities.toFourLittleEndianInteger(buffer);
+ }
+
+ xzReader.read(buffer);
+ offset2 = Utilities.toFourLittleEndianInteger(buffer);
+
+ differenceOffset = offset2 - offset1;
+ buffer = new byte[differenceOffset];
+
+ Utilities.skipFully(xzReader,
+ (offset1 - 4 * (blobNumber + 2)));
+
+ xzReader.read(buffer, 0, differenceOffset);
+
+ baos = new ByteArrayOutputStream();
+ baos.write(buffer, 0, differenceOffset);
+
+ return baos;
+
+ }
+ }
+ }
+
+ return null;
+
+ }
+
+ public DirectoryEntry getDirectoryInfoAtTitlePosition(final int position)
+ throws IOException {
+
+ // Helpers
+ int pos;
+ final byte[] buffer = new byte[8];
+
+ // At the appropriate position in the titlePtrPos
+ this.mReader.seek(position);
+
+ // Get value of article at index
+ pos = this.mReader.readFourLittleEndianBytesValue(buffer);
+
+ // Move to the position in urlPtrPos
+ this.mReader.seek(this.mFile.getUrlPtrPos() + 8 * pos);
+
+ // Get value of article in urlPtrPos
+ pos = this.mReader.readEightLittleEndianBytesValue(buffer);
+
+ // Go to the location of the directory entry
+ this.mReader.seek(pos);
+
+ final int type = this.mReader.readTwoLittleEndianBytesValue(buffer);
+
+ // Ignore the parameter length
+ this.mReader.read();
+
+ final char namespace = (char) this.mReader.read();
+ // System.out.println("Namepsace: " + namespace);
+
+ final int revision = this.mReader.readFourLittleEndianBytesValue(buffer);
+ // System.out.println("Revision: " + revision);
+
+ // TODO: Remove redundant if condition code
+ // Article or Redirect entry
+ if (type == 65535) {
+
+ // System.out.println("MIMEType: " + type);
+
+ final int redirectIndex = this.mReader.readFourLittleEndianBytesValue(buffer);
+ // System.out.println("RedirectIndex: " + redirectIndex);
+
+ final String url = this.mReader.readString();
+ // System.out.println("URL: " + url);
+
+ String title = this.mReader.readString();
+ title = title.equals("") ? url : title;
+ // System.out.println("Title: " + title);
+
+ return new RedirectEntry(type, namespace, revision, redirectIndex,
+ url, title, (position - this.mFile.getUrlPtrPos()) / 8);
+
+ } else {
+
+ // System.out.println("MIMEType: " + mFile.getMIMEType(type));
+
+ final int clusterNumber = this.mReader.readFourLittleEndianBytesValue(buffer);
+ // System.out.println("Cluster Number: " + clusterNumber);
+
+ final int blobNumber = this.mReader.readFourLittleEndianBytesValue(buffer);
+ // System.out.println("Blob Number: " + blobNumber);
+
+ final String url = this.mReader.readString();
+ // System.out.println("URL: " + url);
+
+ String title = this.mReader.readString();
+ title = title.equals("") ? url : title;
+ // System.out.println("Title: " + title);
+
+ // Parameter data ignored
+
+ return new ArticleEntry(type, namespace, revision, clusterNumber,
+ blobNumber, url, title,
+ (position - this.mFile.getUrlPtrPos()) / 8);
+ }
+
+ }
+
+ public ZIMFile getZIMFile() {
+ return this.mFile;
+ }
+}
diff --git a/source/org/openzim/ZIMTest.java b/source/org/openzim/ZIMTest.java
new file mode 100644
index 0000000000..6d8ed64fb9
--- /dev/null
+++ b/source/org/openzim/ZIMTest.java
@@ -0,0 +1,44 @@
+/*
+ * Copyright (C) 2011 Arunesh Mathur
+ *
+ * This file is a part of zimreader-java.
+ *
+ * zimreader-java is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License version 3.0 as
+ * published by the Free Software Foundation.
+ *
+ * zimreader-java is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with zimreader-java. If not, see .
+ */
+
+package org.openzim;
+
+import java.io.IOException;
+
+public class ZIMTest {
+ public static void main(final String[] args) {
+ if(args.length!=2) {
+ System.out.println("Usage: java ZIMTest ");
+ System.exit(0);
+ }
+
+ // args[0] is the Zim File's location
+ final ZIMFile file = new ZIMFile(args[0]);
+
+ // Associate the Zim File with a Reader
+ final ZIMReader zReader = new ZIMReader(file);
+
+ try {
+ // args[1] is the name of the articles that is
+ // to be fetched
+ System.out.println(zReader.getArticleData(args[1],'A').toString("utf-8"));
+ } catch (final IOException e) {
+ e.printStackTrace();
+ }
+ }
+}