-
-
Notifications
You must be signed in to change notification settings - Fork 429
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
integrated the source code of a openzim file format reader. These are
the raw format reader files with no integration in YaCy yet, which will maybe follow as a next step. The zim file format is documented in https://openzim.org and the reader code was taken from the archived, non-maintained repository at https://github.com/openzim/zimreader-java
- Loading branch information
Showing
8 changed files
with
1,021 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
/* | ||
* Copyright (C) 2011 Arunesh Mathur | ||
* | ||
* This file is a part of zimreader-java. | ||
* | ||
* zimreader-java is free software: you can redistribute it and/or modify | ||
* it under the terms of the GNU Lesser General Public License version 3.0 as | ||
* published by the Free Software Foundation. | ||
* | ||
* zimreader-java is distributed in the hope that it will be useful, | ||
* but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
* GNU General Public License for more details. | ||
* | ||
* You should have received a copy of the GNU Lesser General Public License | ||
* along with zimreader-java. If not, see <http://www.gnu.org/licenses/>. | ||
*/ | ||
|
||
|
||
package org.openzim; | ||
|
||
public class ArticleEntry extends DirectoryEntry { | ||
|
||
int clusterNumber; | ||
|
||
int blobnumber; | ||
|
||
public ArticleEntry(final int mimeType, final char namespace, final int revision, | ||
final int clusterNumber, final int blobNumber, final String url, final String title, | ||
final int urlListindex) { | ||
|
||
super(mimeType, namespace, revision, url, title, urlListindex); | ||
|
||
this.clusterNumber = clusterNumber; | ||
this.blobnumber = blobNumber; | ||
} | ||
|
||
public int getClusterNumber() { | ||
return this.clusterNumber; | ||
} | ||
|
||
public int getBlobnumber() { | ||
return this.blobnumber; | ||
} | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,69 @@ | ||
/* | ||
* Copyright (C) 2011 Arunesh Mathur | ||
* | ||
* This file is a part of zimreader-java. | ||
* | ||
* zimreader-java is free software: you can redistribute it and/or modify | ||
* it under the terms of the GNU Lesser General Public License version 3.0 as | ||
* published by the Free Software Foundation. | ||
* | ||
* zimreader-java is distributed in the hope that it will be useful, | ||
* but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
* GNU General Public License for more details. | ||
* | ||
* You should have received a copy of the GNU Lesser General Public License | ||
* along with zimreader-java. If not, see <http://www.gnu.org/licenses/>. | ||
*/ | ||
|
||
package org.openzim; | ||
|
||
public abstract class DirectoryEntry { | ||
|
||
int mimeType; | ||
|
||
char namespace; | ||
|
||
int revision; | ||
|
||
String url; | ||
|
||
String title; | ||
|
||
int urlListindex; | ||
|
||
public DirectoryEntry(final int mimeType, final char namespace, final int revision, | ||
final String url, final String title, final int index) { | ||
this.mimeType = mimeType; | ||
this.namespace = namespace; | ||
this.revision = revision; | ||
this.url = url; | ||
this.title = title; | ||
this.urlListindex = index; | ||
} | ||
|
||
public int getMimeType() { | ||
return this.mimeType; | ||
} | ||
|
||
public char getNamespace() { | ||
return this.namespace; | ||
} | ||
|
||
public int getRevision() { | ||
return this.revision; | ||
} | ||
|
||
public String getUrl() { | ||
return this.url; | ||
} | ||
|
||
public String getTitle() { | ||
return this.title; | ||
} | ||
|
||
public int getUrlListindex() { | ||
return this.urlListindex; | ||
} | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,135 @@ | ||
/* | ||
* Copyright (C) 2011 Arunesh Mathur | ||
* | ||
* This file is a part of zimreader-java. | ||
* | ||
* zimreader-java is free software: you can redistribute it and/or modify | ||
* it under the terms of the GNU Lesser General Public License version 3.0 as | ||
* published by the Free Software Foundation. | ||
* | ||
* zimreader-java is distributed in the hope that it will be useful, | ||
* but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
* GNU General Public License for more details. | ||
* | ||
* You should have received a copy of the GNU Lesser General Public License | ||
* along with zimreader-java. If not, see <http://www.gnu.org/licenses/>. | ||
*/ | ||
|
||
package org.openzim; | ||
|
||
import java.io.IOException; | ||
import java.io.InputStream; | ||
import java.io.RandomAccessFile; | ||
|
||
/** | ||
* This is an implementation of RandomAccessFile to ensure that it is an | ||
* InputStream as well, specifically designed for reading a ZIM file. Ad-Hoc | ||
* implementation, can be improved. | ||
* | ||
* @author Arunesh Mathur <aruneshmathur1990 at gmail.com> | ||
*/ | ||
|
||
public class RandomAcessFileZIMInputStream extends InputStream { | ||
|
||
private final RandomAccessFile mRAFReader; | ||
|
||
private long mMarked = -1; | ||
|
||
public RandomAcessFileZIMInputStream(final RandomAccessFile reader) { | ||
this.mRAFReader = reader; | ||
} | ||
|
||
// TODO: Remove the parameter buffer | ||
public int readTwoLittleEndianBytesValue(final byte[] buffer) throws IOException { | ||
if (buffer.length < 2) { | ||
throw new OutOfMemoryError("buffer too small"); | ||
} else { | ||
this.mRAFReader.read(buffer, 0, 2); | ||
return Utilities.toTwoLittleEndianInteger(buffer); | ||
} | ||
} | ||
|
||
// TODO: Remove the parameter buffer | ||
public int readFourLittleEndianBytesValue(final byte[] buffer) throws IOException { | ||
if (buffer.length < 4) { | ||
throw new OutOfMemoryError("buffer too small"); | ||
} else { | ||
this.mRAFReader.read(buffer, 0, 4); | ||
return Utilities.toFourLittleEndianInteger(buffer); | ||
} | ||
} | ||
|
||
// TODO: Remove the parameter buffer | ||
public int readEightLittleEndianBytesValue(final byte[] buffer) | ||
throws IOException { | ||
if (buffer.length < 8) { | ||
throw new OutOfMemoryError("buffer too small"); | ||
} else { | ||
this.mRAFReader.read(buffer, 0, 8); | ||
return Utilities.toEightLittleEndianInteger(buffer); | ||
} | ||
} | ||
|
||
// TODO: Remove the parameter buffer | ||
public int readSixteenLittleEndianBytesValue(final byte[] buffer) | ||
throws IOException { | ||
if (buffer.length < 16) { | ||
throw new OutOfMemoryError("buffer too small"); | ||
} else { | ||
this.mRAFReader.read(buffer, 0, 16); | ||
return Utilities.toSixteenLittleEndianInteger(buffer); | ||
} | ||
} | ||
|
||
// Reads characters from the current position into a String and stops when a | ||
// '\0' is encountered | ||
public String readString() throws IOException { | ||
final StringBuffer sb = new StringBuffer(); | ||
/* | ||
* int i; byte[] buffer = new byte[100]; while (true) { | ||
* mRAFReader.read(buffer); for (i = 0; i < buffer.length; i++) { if | ||
* (buffer[i] == '\0') { break; } sb.append((char) buffer[i]); } if (i | ||
* != buffer.length) break; } return sb.toString(); | ||
*/ | ||
int b; | ||
b = this.mRAFReader.read(); | ||
while (b != '\0') { | ||
sb.append((char) b); | ||
b = this.mRAFReader.read(); | ||
} | ||
return sb.toString(); | ||
|
||
} | ||
|
||
@Override | ||
public int read() throws IOException { | ||
return this.mRAFReader.read(); | ||
} | ||
|
||
public RandomAccessFile getRandomAccessFile() { | ||
return this.mRAFReader; | ||
} | ||
|
||
public void seek(final long pos) throws IOException { | ||
this.mRAFReader.seek(pos); | ||
} | ||
|
||
public long getFilePointer() throws IOException { | ||
return this.mRAFReader.getFilePointer(); | ||
} | ||
|
||
public void mark() throws IOException { | ||
this.mMarked = this.mRAFReader.getFilePointer(); | ||
} | ||
|
||
@Override | ||
public void reset() throws IOException { | ||
if (this.mMarked == -1) { | ||
return; | ||
} else { | ||
this.mRAFReader.seek(this.mMarked); | ||
this.mMarked = -1; | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
/* | ||
* Copyright (C) 2011 Arunesh Mathur | ||
* | ||
* This file is a part of zimreader-java. | ||
* | ||
* zimreader-java is free software: you can redistribute it and/or modify | ||
* it under the terms of the GNU Lesser General Public License version 3.0 as | ||
* published by the Free Software Foundation. | ||
* | ||
* zimreader-java is distributed in the hope that it will be useful, | ||
* but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
* GNU General Public License for more details. | ||
* | ||
* You should have received a copy of the GNU Lesser General Public License | ||
* along with zimreader-java. If not, see <http://www.gnu.org/licenses/>. | ||
*/ | ||
|
||
package org.openzim; | ||
|
||
public class RedirectEntry extends DirectoryEntry { | ||
|
||
int redirectIndex; | ||
|
||
public RedirectEntry(final int mimeType, final char namespace, final int revision, | ||
final int redirectIndex, final String url, final String title, final int urlListindex) { | ||
|
||
super(mimeType, namespace, revision, url, title, urlListindex); | ||
|
||
this.redirectIndex = redirectIndex; | ||
} | ||
|
||
public int getRedirectIndex() { | ||
return this.redirectIndex; | ||
} | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,84 @@ | ||
/* | ||
* Copyright (C) 2011 Arunesh Mathur | ||
* | ||
* This file is a part of zimreader-java. | ||
* | ||
* zimreader-java is free software: you can redistribute it and/or modify | ||
* it under the terms of the GNU Lesser General Public License version 3.0 as | ||
* published by the Free Software Foundation. | ||
* | ||
* zimreader-java is distributed in the hope that it will be useful, | ||
* but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
* GNU General Public License for more details. | ||
* | ||
* You should have received a copy of the GNU Lesser General Public License | ||
* along with zimreader-java. If not, see <http://www.gnu.org/licenses/>. | ||
*/ | ||
|
||
|
||
package org.openzim; | ||
|
||
import java.io.IOException; | ||
import java.io.InputStream; | ||
|
||
public class Utilities { | ||
|
||
// TODO: Write a binary search algorithm | ||
public static int binarySearch() { | ||
return -1; | ||
} | ||
|
||
public static int toTwoLittleEndianInteger(final byte[] buffer) throws IOException { | ||
if (buffer.length < 2) { | ||
throw new OutOfMemoryError("buffer too small"); | ||
} else { | ||
final int result = ((buffer[0] & 0xFF) | ((buffer[1] & 0xFF) << 8)); | ||
return result; | ||
} | ||
} | ||
|
||
public static int toFourLittleEndianInteger(final byte[] buffer) throws IOException { | ||
if (buffer.length < 4) { | ||
throw new OutOfMemoryError("buffer too small"); | ||
} else { | ||
final int result = ((buffer[0] & 0xFF) | ((buffer[1] & 0xFF) << 8) | ||
| ((buffer[2] & 0xFF) << 16) | ((buffer[3] & 0xFF) << 24)); | ||
return result; | ||
} | ||
} | ||
|
||
public static int toEightLittleEndianInteger(final byte[] buffer) throws IOException { | ||
if (buffer.length < 8) { | ||
throw new OutOfMemoryError("buffer too small"); | ||
} else { | ||
final int result = ((buffer[0] & 0xFF) | ((buffer[1] & 0xFF) << 8) | ||
| ((buffer[2] & 0xFF) << 16) | ((buffer[3] & 0xFF) << 24) | ||
| ((buffer[4] & 0xFF) << 32) | ((buffer[5] & 0xFF) << 40) | ||
| ((buffer[6] & 0xFF) << 48) | ((buffer[7] & 0xFF) << 56)); | ||
return result; | ||
} | ||
} | ||
|
||
public static int toSixteenLittleEndianInteger(final byte[] buffer) throws IOException { | ||
if (buffer.length < 16) { | ||
throw new OutOfMemoryError("buffer too small"); | ||
} else { | ||
final int result = ((buffer[0] & 0xFF) | ((buffer[1] & 0xFF) << 8) | ||
| ((buffer[2] & 0xFF) << 16) | ((buffer[3] & 0xFF) << 24) | ||
| ((buffer[4] & 0xFF) << 32) | ((buffer[5] & 0xFF) << 40) | ||
| ((buffer[6] & 0xFF) << 48) | ((buffer[7] & 0xFF) << 56) | ||
| ((buffer[8] & 0xFF) << 64) | ((buffer[9] & 0xFF) << 72) | ||
| ((buffer[10] & 0xFF) << 80) | ((buffer[11] & 0xFF) << 88) | ||
| ((buffer[12] & 0xFF) << 96) | ||
| ((buffer[13] & 0xFF) << 104) | ||
| ((buffer[14] & 0xFF) << 112) | ((buffer[15] & 0xFF) << 120)); | ||
return result; | ||
} | ||
} | ||
|
||
public static void skipFully(final InputStream stream, final long bytes) throws IOException { | ||
for (long i = stream.skip(bytes); i < bytes; i += stream.skip(bytes - i)); | ||
} | ||
|
||
} |
Oops, something went wrong.