forked from olivernn/lunr.js
-
Notifications
You must be signed in to change notification settings - Fork 10
/
index.html
255 lines (212 loc) · 9.94 KB
/
index.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
<!DOCTYPE html>
<head>
<title>lunr.js - A bit like Solr, but much smaller and not as bright</title>
<link rel="stylesheet" href="/styles.css" type="text/css">
<script type="text/javascript">
var _gaq = _gaq || [];
_gaq.push(['_setAccount', 'UA-25695442-4']);
_gaq.push(['_trackPageview']);
(function() {
var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;
ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s);
})();
</script>
</head>
<body>
<div class='wrap'>
<header>
<h1>lunr<span>.js</span></h1>
<h2>Simple full-text search in your browser</h2>
</header>
<nav>
<ul>
<li><a href="/docs">Docs</a></li>
<li><a href="/example">Examples</a></li>
<li><a href="http://github.com/olivernn/lunr.js">Code</a></li>
</ul>
</nav>
<section class="columns">
<article>
<header>
<h3>Get Started</h3>
</header>
<p>Set up an index for your notes:</p>
<pre>
var index = lunr(function () {
this.field('title', {boost: 10})
this.field('body')
this.ref('id')
})</pre>
<p>Add documents to your index</p>
<pre>
index.add({
id: 1,
title: 'Foo',
body: 'Foo foo foo!'
})
index.add({
id: 2,
title: 'Bar',
body: 'Bar bar bar!'
})</pre>
<p>Search your documents</p>
<pre>
index.search('foo')</pre>
</article>
<article>
<header>
<h3>About</h3>
</header>
<p>lunr.js is a simple full text search engine for your client side applications. It is designed to be small, yet full featured, enabling you to provide a great search experience without the need for external, server side, search services.</p>
<p>lunr.js has no external dependencies, although it does require a modern browser with ES5 support. In older browsers you can use an ES5 shim, such as <a href="http://augmentjs.com">augment.js</a>, to provide any missing JavaScript functionality.</p>
</article>
<article class="download">
<header>
<h3>Download</h3>
</header>
<ul>
<li><a href="https://raw.github.com/olivernn/lunr.js/master/lunr.js">lunr.js</a> - uncompressed</li>
<li><a href="https://raw.github.com/olivernn/lunr.js/master/lunr.min.js">lunr.min.js</a> - minified</li>
</ul>
</article>
</section>
<section>
<article>
<header>
<h3>Pipeline</h3>
</header>
<p>Every document and search query that enters lunr is passed through a text <a href="/docs#Pipeline">processing pipeline</a>. The pipeline is simply a stack of functions that perform some processing on the text. Pipeline functions act on the text one token at a time, and what they return is passed to the next function in the pipeline.</p>
<p>By default lunr adds a <a href="/docs#stopWordFilter">stop word filter</a> and <a href="/docs#stemmer">stemmer</a> to the pipeline. You can also add your own processors or remove the default ones depending on your requirements. The stemmer currently used is an English language stemmer, which could be replaced with a non-English language stemmer if required, or a <a href="http://en.wikipedia.org/wiki/Metaphone">Metaphoning</a> processor could be added.</p>
<pre>
var index = lunr(function () {
this.pipeline.add(function (token, tokenIndex, tokens) {
// text processing in here
})
this.pipeline.after(lunr.stopWordFilter, function (token, tokenIndex, tokens) {
// text processing in here
})
})
</pre>
<p>Functions in the pipeline are called with three arguments: the current token being processed; the index of that token in the array of tokens, and the whole list of tokens part of the document being processed. This enables simple unigram processing of tokens as well as more sophisticated n-gram processing.</p>
<p>The function should return the processed version of the text, which will in turn be passed to the next function in the pipeline. Returning <code>undefined</code> will prevent any further processing of the token, and that token will not make it to the index.</p>
</article>
</section>
<section class="columns">
<article>
<header>
<h3>Tokenization</h3>
</header>
<p>Tokenization is how lunr converts documents and searches into individual tokens, ready to be run through the text processing pipeline and entered or looked up in the index.</p>
<p>The default tokenizer included with lunr is designed to handle general english text well, although application, or language specific tokenizers can be used instead.</p>
</article>
<article>
<header>
<h3>Stemming</h3>
</header>
<p>Stemming increases the recall of the search index by reducing related words down to their stem, so that non-exact search terms still match relevant documents. For example 'search', 'searching' and 'searched' all get reduced to the stem 'search'.</p>
<p>lunr automatically includes a stemmer based on <a href="http://tartarus.org/martin/PorterStemmer/">Martin Porter's</a> algorithms.</p>
</article>
<article>
<header>
<h3>Stop words</h3>
</header>
<p>Stop words are words that are very common and are not useful in differentiating between documents. These are automatically removed by lunr. This helps to reduce the size of the index and improve search speed and accuracy.</p>
<p>The default stop word filter contains a large list of very common words in English. For best results a corpus specific stop word filter can also be added to the pipeline. The search algorithm already penalises more common words, but preventing them from entering the index at all can be very beneficial for both space and speed performance.</p>
</article>
</section>
<footer>
<ul>
<li>Code by <a href="http://twitter.com/olivernn">Oliver Nightingale</a></li>
<li><a href="http://github.com/olivernn/lunr.js">Code</a></li>
<li><a href="/docs">Documentation</a></li>
<li><a href="http://github.com/olivernn/lunr.js/issues">Issues</a></li>
</ul>
</footer>
</div>
<script>
(function (hijs) {
//
// hijs - JavaScript Syntax Highlighter
//
// Copyright (c) 2010 Alexis Sellier
//
// All elements which match this will be syntax highlighted.
var selector = hijs || 'pre';
var keywords = ('var function if else for while break switch case do new null in with void '
+'continue delete return this true false throw catch typeof with instanceof').split(' '),
special = ('eval window document undefined NaN Infinity parseInt parseFloat '
+'encodeURI decodeURI encodeURIComponent decodeURIComponent').split(' ');
// Syntax definition
// The key becomes the class name of the <span>
// around the matched block of code.
var syntax = [
['comment', /(\/\*(?:[^*\n]|\*+[^\/*])*\*+\/)/g],
['comment', /(\/\/[^\n]*)/g],
['string' , /("(?:(?!")[^\\\n]|\\.)*"|'(?:(?!')[^\\\n]|\\.)*')/g],
['regexp' , /(\/.+\/[mgi]*)(?!\s*\w)/g],
['class' , /\b([A-Z][a-zA-Z]+)\b/g],
['number' , /\b([0-9]+(?:\.[0-9]+)?)\b/g],
['keyword', new(RegExp)('\\b(' + keywords.join('|') + ')\\b', 'g')],
['special', new(RegExp)('\\b(' + special.join('|') + ')\\b', 'g')]
];
var nodes, table = {};
if (/^[a-z]+$/.test(selector)) {
nodes = document.getElementsByTagName(selector);
} else if (/^\.[\w-]+$/.test(selector)) {
nodes = document.getElementsByClassName(selector.slice(1));
} else if (document.querySelectorAll) {
nodes = document.querySelectorAll(selector);
} else {
nodes = [];
}
for (var i = 0, children; i < nodes.length; i++) {
children = nodes[i].childNodes;
for (var j = 0, str; j < children.length; j++) {
code = children[j];
if (code.length >= 0) { // It's a text node
// Don't highlight command-line snippets
if (! /^\$/.test(code.nodeValue.trim())) {
syntax.forEach(function (s) {
var k = s[0], v = s[1];
code.nodeValue = code.nodeValue.replace(v, function (_, m) {
return '\u00ab' + encode(k) + '\u00b7'
+ encode(m) +
'\u00b7' + encode(k) + '\u00bb';
});
});
}
}
}
}
for (var i = 0; i < nodes.length; i++) {
nodes[i].innerHTML =
nodes[i].innerHTML.replace(/\u00ab(.+?)\u00b7(.+?)\u00b7\1\u00bb/g, function (_, name, value) {
value = value.replace(/\u00ab[^\u00b7]+\u00b7/g, '').replace(/\u00b7[^\u00bb]+\u00bb/g, '');
return '<span class="' + decode(name) + '">' + escape(decode(value)) + '</span>';
});
}
function escape(str) {
return str.replace(/</g, '<').replace(/>/g, '>');
}
// Encode ASCII characters to, and from Braille
function encode (str, encoded) {
table[encoded = str.split('').map(function (s) {
if (s.charCodeAt(0) > 127) { return s }
return String.fromCharCode(s.charCodeAt(0) + 0x2800);
}).join('')] = str;
return encoded;
}
function decode (str) {
if (str in table) {
return table[str];
} else {
return str.trim().split('').map(function (s) {
if (s.charCodeAt(0) - 0x2800 > 127) { return s }
return String.fromCharCode(s.charCodeAt(0) - 0x2800);
}).join('');
}
}
})(window.hijs);
</script>
</body>