Le blog de Victor Héry https://blog.victor-hery.com
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

3569 lines
105 KiB

  1. /**
  2. * lunr - http://lunrjs.com - A bit like Solr, but much smaller and not as bright - 2.3.8
  3. * Copyright (C) 2019 Oliver Nightingale
  4. * @license MIT
  5. */
  6. (function() {
  7. /**
  8. * A convenience function for configuring and constructing
  9. * a new lunr Index.
  10. *
  11. * A lunr.Builder instance is created and the pipeline setup
  12. * with a trimmer, stop word filter and stemmer.
  13. *
  14. * This builder object is yielded to the configuration function
  15. * that is passed as a parameter, allowing the list of fields
  16. * and other builder parameters to be customised.
  17. *
  18. * All documents _must_ be added within the passed config function.
  19. *
  20. * @example
  21. * var idx = lunr(function () {
  22. * this.field('title')
  23. * this.field('body')
  24. * this.ref('id')
  25. *
  26. * documents.forEach(function (doc) {
  27. * this.add(doc)
  28. * }, this)
  29. * })
  30. *
  31. * @see {@link lunr.Builder}
  32. * @see {@link lunr.Pipeline}
  33. * @see {@link lunr.trimmer}
  34. * @see {@link lunr.stopWordFilter}
  35. * @see {@link lunr.stemmer}
  36. * @namespace {function} lunr
  37. */
  38. var lunr = function(config) {
  39. var builder = new lunr.Builder();
  40. builder.pipeline.add(lunr.trimmer, lunr.stopWordFilter, lunr.stemmer);
  41. builder.searchPipeline.add(lunr.stemmer);
  42. config.call(builder, builder);
  43. return builder.build();
  44. };
  45. lunr.version = "2.3.8";
  46. /*!
  47. * lunr.utils
  48. * Copyright (C) 2019 Oliver Nightingale
  49. */
  50. /**
  51. * A namespace containing utils for the rest of the lunr library
  52. * @namespace lunr.utils
  53. */
  54. lunr.utils = {};
  55. /**
  56. * Print a warning message to the console.
  57. *
  58. * @param {String} message The message to be printed.
  59. * @memberOf lunr.utils
  60. * @function
  61. */
  62. lunr.utils.warn = (function(global) {
  63. /* eslint-disable no-console */
  64. return function(message) {
  65. if (global.console && console.warn) {
  66. console.warn(message);
  67. }
  68. };
  69. /* eslint-enable no-console */
  70. })(this);
  71. /**
  72. * Convert an object to a string.
  73. *
  74. * In the case of `null` and `undefined` the function returns
  75. * the empty string, in all other cases the result of calling
  76. * `toString` on the passed object is returned.
  77. *
  78. * @param {Any} obj The object to convert to a string.
  79. * @return {String} string representation of the passed object.
  80. * @memberOf lunr.utils
  81. */
  82. lunr.utils.asString = function(obj) {
  83. if (obj === void 0 || obj === null) {
  84. return "";
  85. } else {
  86. return obj.toString();
  87. }
  88. };
  89. /**
  90. * Clones an object.
  91. *
  92. * Will create a copy of an existing object such that any mutations
  93. * on the copy cannot affect the original.
  94. *
  95. * Only shallow objects are supported, passing a nested object to this
  96. * function will cause a TypeError.
  97. *
  98. * Objects with primitives, and arrays of primitives are supported.
  99. *
  100. * @param {Object} obj The object to clone.
  101. * @return {Object} a clone of the passed object.
  102. * @throws {TypeError} when a nested object is passed.
  103. * @memberOf Utils
  104. */
  105. lunr.utils.clone = function(obj) {
  106. if (obj === null || obj === undefined) {
  107. return obj;
  108. }
  109. var clone = Object.create(null),
  110. keys = Object.keys(obj);
  111. for (var i = 0; i < keys.length; i++) {
  112. var key = keys[i],
  113. val = obj[key];
  114. if (Array.isArray(val)) {
  115. clone[key] = val.slice();
  116. continue;
  117. }
  118. if (
  119. typeof val === "string" ||
  120. typeof val === "number" ||
  121. typeof val === "boolean"
  122. ) {
  123. clone[key] = val;
  124. continue;
  125. }
  126. throw new TypeError(
  127. "clone is not deep and does not support nested objects"
  128. );
  129. }
  130. return clone;
  131. };
  132. lunr.FieldRef = function(docRef, fieldName, stringValue) {
  133. this.docRef = docRef;
  134. this.fieldName = fieldName;
  135. this._stringValue = stringValue;
  136. };
  137. lunr.FieldRef.joiner = "/";
  138. lunr.FieldRef.fromString = function(s) {
  139. var n = s.indexOf(lunr.FieldRef.joiner);
  140. if (n === -1) {
  141. throw "malformed field ref string";
  142. }
  143. var fieldRef = s.slice(0, n),
  144. docRef = s.slice(n + 1);
  145. return new lunr.FieldRef(docRef, fieldRef, s);
  146. };
  147. lunr.FieldRef.prototype.toString = function() {
  148. if (this._stringValue == undefined) {
  149. this._stringValue = this.fieldName + lunr.FieldRef.joiner + this.docRef;
  150. }
  151. return this._stringValue;
  152. };
  153. /*!
  154. * lunr.Set
  155. * Copyright (C) 2019 Oliver Nightingale
  156. */
  157. /**
  158. * A lunr set.
  159. *
  160. * @constructor
  161. */
  162. lunr.Set = function(elements) {
  163. this.elements = Object.create(null);
  164. if (elements) {
  165. this.length = elements.length;
  166. for (var i = 0; i < this.length; i++) {
  167. this.elements[elements[i]] = true;
  168. }
  169. } else {
  170. this.length = 0;
  171. }
  172. };
  173. /**
  174. * A complete set that contains all elements.
  175. *
  176. * @static
  177. * @readonly
  178. * @type {lunr.Set}
  179. */
  180. lunr.Set.complete = {
  181. intersect: function(other) {
  182. return other;
  183. },
  184. union: function(other) {
  185. return other;
  186. },
  187. contains: function() {
  188. return true;
  189. }
  190. };
  191. /**
  192. * An empty set that contains no elements.
  193. *
  194. * @static
  195. * @readonly
  196. * @type {lunr.Set}
  197. */
  198. lunr.Set.empty = {
  199. intersect: function() {
  200. return this;
  201. },
  202. union: function(other) {
  203. return other;
  204. },
  205. contains: function() {
  206. return false;
  207. }
  208. };
  209. /**
  210. * Returns true if this set contains the specified object.
  211. *
  212. * @param {object} object - Object whose presence in this set is to be tested.
  213. * @returns {boolean} - True if this set contains the specified object.
  214. */
  215. lunr.Set.prototype.contains = function(object) {
  216. return !!this.elements[object];
  217. };
  218. /**
  219. * Returns a new set containing only the elements that are present in both
  220. * this set and the specified set.
  221. *
  222. * @param {lunr.Set} other - set to intersect with this set.
  223. * @returns {lunr.Set} a new set that is the intersection of this and the specified set.
  224. */
  225. lunr.Set.prototype.intersect = function(other) {
  226. var a,
  227. b,
  228. elements,
  229. intersection = [];
  230. if (other === lunr.Set.complete) {
  231. return this;
  232. }
  233. if (other === lunr.Set.empty) {
  234. return other;
  235. }
  236. if (this.length < other.length) {
  237. a = this;
  238. b = other;
  239. } else {
  240. a = other;
  241. b = this;
  242. }
  243. elements = Object.keys(a.elements);
  244. for (var i = 0; i < elements.length; i++) {
  245. var element = elements[i];
  246. if (element in b.elements) {
  247. intersection.push(element);
  248. }
  249. }
  250. return new lunr.Set(intersection);
  251. };
  252. /**
  253. * Returns a new set combining the elements of this and the specified set.
  254. *
  255. * @param {lunr.Set} other - set to union with this set.
  256. * @return {lunr.Set} a new set that is the union of this and the specified set.
  257. */
  258. lunr.Set.prototype.union = function(other) {
  259. if (other === lunr.Set.complete) {
  260. return lunr.Set.complete;
  261. }
  262. if (other === lunr.Set.empty) {
  263. return this;
  264. }
  265. return new lunr.Set(
  266. Object.keys(this.elements).concat(Object.keys(other.elements))
  267. );
  268. };
  269. /**
  270. * A function to calculate the inverse document frequency for
  271. * a posting. This is shared between the builder and the index
  272. *
  273. * @private
  274. * @param {object} posting - The posting for a given term
  275. * @param {number} documentCount - The total number of documents.
  276. */
  277. lunr.idf = function(posting, documentCount) {
  278. var documentsWithTerm = 0;
  279. for (var fieldName in posting) {
  280. if (fieldName == "_index") continue; // Ignore the term index, its not a field
  281. documentsWithTerm += Object.keys(posting[fieldName]).length;
  282. }
  283. var x =
  284. (documentCount - documentsWithTerm + 0.5) / (documentsWithTerm + 0.5);
  285. return Math.log(1 + Math.abs(x));
  286. };
  287. /**
  288. * A token wraps a string representation of a token
  289. * as it is passed through the text processing pipeline.
  290. *
  291. * @constructor
  292. * @param {string} [str=''] - The string token being wrapped.
  293. * @param {object} [metadata={}] - Metadata associated with this token.
  294. */
  295. lunr.Token = function(str, metadata) {
  296. this.str = str || "";
  297. this.metadata = metadata || {};
  298. };
  299. /**
  300. * Returns the token string that is being wrapped by this object.
  301. *
  302. * @returns {string}
  303. */
  304. lunr.Token.prototype.toString = function() {
  305. return this.str;
  306. };
  307. /**
  308. * A token update function is used when updating or optionally
  309. * when cloning a token.
  310. *
  311. * @callback lunr.Token~updateFunction
  312. * @param {string} str - The string representation of the token.
  313. * @param {Object} metadata - All metadata associated with this token.
  314. */
  315. /**
  316. * Applies the given function to the wrapped string token.
  317. *
  318. * @example
  319. * token.update(function (str, metadata) {
  320. * return str.toUpperCase()
  321. * })
  322. *
  323. * @param {lunr.Token~updateFunction} fn - A function to apply to the token string.
  324. * @returns {lunr.Token}
  325. */
  326. lunr.Token.prototype.update = function(fn) {
  327. this.str = fn(this.str, this.metadata);
  328. return this;
  329. };
  330. /**
  331. * Creates a clone of this token. Optionally a function can be
  332. * applied to the cloned token.
  333. *
  334. * @param {lunr.Token~updateFunction} [fn] - An optional function to apply to the cloned token.
  335. * @returns {lunr.Token}
  336. */
  337. lunr.Token.prototype.clone = function(fn) {
  338. fn =
  339. fn ||
  340. function(s) {
  341. return s;
  342. };
  343. return new lunr.Token(fn(this.str, this.metadata), this.metadata);
  344. };
  345. /*!
  346. * lunr.tokenizer
  347. * Copyright (C) 2019 Oliver Nightingale
  348. */
  349. /**
  350. * A function for splitting a string into tokens ready to be inserted into
  351. * the search index. Uses `lunr.tokenizer.separator` to split strings, change
  352. * the value of this property to change how strings are split into tokens.
  353. *
  354. * This tokenizer will convert its parameter to a string by calling `toString` and
  355. * then will split this string on the character in `lunr.tokenizer.separator`.
  356. * Arrays will have their elements converted to strings and wrapped in a lunr.Token.
  357. *
  358. * Optional metadata can be passed to the tokenizer, this metadata will be cloned and
  359. * added as metadata to every token that is created from the object to be tokenized.
  360. *
  361. * @static
  362. * @param {?(string|object|object[])} obj - The object to convert into tokens
  363. * @param {?object} metadata - Optional metadata to associate with every token
  364. * @returns {lunr.Token[]}
  365. * @see {@link lunr.Pipeline}
  366. */
  367. lunr.tokenizer = function(obj, metadata) {
  368. if (obj == null || obj == undefined) {
  369. return [];
  370. }
  371. if (Array.isArray(obj)) {
  372. return obj.map(function(t) {
  373. return new lunr.Token(
  374. lunr.utils.asString(t).toLowerCase(),
  375. lunr.utils.clone(metadata)
  376. );
  377. });
  378. }
  379. var str = obj.toString().toLowerCase(),
  380. len = str.length,
  381. tokens = [];
  382. for (var sliceEnd = 0, sliceStart = 0; sliceEnd <= len; sliceEnd++) {
  383. var char = str.charAt(sliceEnd),
  384. sliceLength = sliceEnd - sliceStart;
  385. if (char.match(lunr.tokenizer.separator) || sliceEnd == len) {
  386. if (sliceLength > 0) {
  387. var tokenMetadata = lunr.utils.clone(metadata) || {};
  388. tokenMetadata["position"] = [sliceStart, sliceLength];
  389. tokenMetadata["index"] = tokens.length;
  390. tokens.push(
  391. new lunr.Token(str.slice(sliceStart, sliceEnd), tokenMetadata)
  392. );
  393. }
  394. sliceStart = sliceEnd + 1;
  395. }
  396. }
  397. return tokens;
  398. };
  399. /**
  400. * The separator used to split a string into tokens. Override this property to change the behaviour of
  401. * `lunr.tokenizer` behaviour when tokenizing strings. By default this splits on whitespace and hyphens.
  402. *
  403. * @static
  404. * @see lunr.tokenizer
  405. */
  406. lunr.tokenizer.separator = /[\s\-]+/;
  407. /*!
  408. * lunr.Pipeline
  409. * Copyright (C) 2019 Oliver Nightingale
  410. */
  411. /**
  412. * lunr.Pipelines maintain an ordered list of functions to be applied to all
  413. * tokens in documents entering the search index and queries being ran against
  414. * the index.
  415. *
  416. * An instance of lunr.Index created with the lunr shortcut will contain a
  417. * pipeline with a stop word filter and an English language stemmer. Extra
  418. * functions can be added before or after either of these functions or these
  419. * default functions can be removed.
  420. *
  421. * When run the pipeline will call each function in turn, passing a token, the
  422. * index of that token in the original list of all tokens and finally a list of
  423. * all the original tokens.
  424. *
  425. * The output of functions in the pipeline will be passed to the next function
  426. * in the pipeline. To exclude a token from entering the index the function
  427. * should return undefined, the rest of the pipeline will not be called with
  428. * this token.
  429. *
  430. * For serialisation of pipelines to work, all functions used in an instance of
  431. * a pipeline should be registered with lunr.Pipeline. Registered functions can
  432. * then be loaded. If trying to load a serialised pipeline that uses functions
  433. * that are not registered an error will be thrown.
  434. *
  435. * If not planning on serialising the pipeline then registering pipeline functions
  436. * is not necessary.
  437. *
  438. * @constructor
  439. */
  440. lunr.Pipeline = function() {
  441. this._stack = [];
  442. };
  443. lunr.Pipeline.registeredFunctions = Object.create(null);
  444. /**
  445. * A pipeline function maps lunr.Token to lunr.Token. A lunr.Token contains the token
  446. * string as well as all known metadata. A pipeline function can mutate the token string
  447. * or mutate (or add) metadata for a given token.
  448. *
  449. * A pipeline function can indicate that the passed token should be discarded by returning
  450. * null, undefined or an empty string. This token will not be passed to any downstream pipeline
  451. * functions and will not be added to the index.
  452. *
  453. * Multiple tokens can be returned by returning an array of tokens. Each token will be passed
  454. * to any downstream pipeline functions and all will returned tokens will be added to the index.
  455. *
  456. * Any number of pipeline functions may be chained together using a lunr.Pipeline.
  457. *
  458. * @interface lunr.PipelineFunction
  459. * @param {lunr.Token} token - A token from the document being processed.
  460. * @param {number} i - The index of this token in the complete list of tokens for this document/field.
  461. * @param {lunr.Token[]} tokens - All tokens for this document/field.
  462. * @returns {(?lunr.Token|lunr.Token[])}
  463. */
  464. /**
  465. * Register a function with the pipeline.
  466. *
  467. * Functions that are used in the pipeline should be registered if the pipeline
  468. * needs to be serialised, or a serialised pipeline needs to be loaded.
  469. *
  470. * Registering a function does not add it to a pipeline, functions must still be
  471. * added to instances of the pipeline for them to be used when running a pipeline.
  472. *
  473. * @param {lunr.PipelineFunction} fn - The function to check for.
  474. * @param {String} label - The label to register this function with
  475. */
  476. lunr.Pipeline.registerFunction = function(fn, label) {
  477. if (label in this.registeredFunctions) {
  478. lunr.utils.warn("Overwriting existing registered function: " + label);
  479. }
  480. fn.label = label;
  481. lunr.Pipeline.registeredFunctions[fn.label] = fn;
  482. };
  483. /**
  484. * Warns if the function is not registered as a Pipeline function.
  485. *
  486. * @param {lunr.PipelineFunction} fn - The function to check for.
  487. * @private
  488. */
  489. lunr.Pipeline.warnIfFunctionNotRegistered = function(fn) {
  490. var isRegistered = fn.label && fn.label in this.registeredFunctions;
  491. if (!isRegistered) {
  492. lunr.utils.warn(
  493. "Function is not registered with pipeline. This may cause problems when serialising the index.\n",
  494. fn
  495. );
  496. }
  497. };
  498. /**
  499. * Loads a previously serialised pipeline.
  500. *
  501. * All functions to be loaded must already be registered with lunr.Pipeline.
  502. * If any function from the serialised data has not been registered then an
  503. * error will be thrown.
  504. *
  505. * @param {Object} serialised - The serialised pipeline to load.
  506. * @returns {lunr.Pipeline}
  507. */
  508. lunr.Pipeline.load = function(serialised) {
  509. var pipeline = new lunr.Pipeline();
  510. serialised.forEach(function(fnName) {
  511. var fn = lunr.Pipeline.registeredFunctions[fnName];
  512. if (fn) {
  513. pipeline.add(fn);
  514. } else {
  515. throw new Error("Cannot load unregistered function: " + fnName);
  516. }
  517. });
  518. return pipeline;
  519. };
  520. /**
  521. * Adds new functions to the end of the pipeline.
  522. *
  523. * Logs a warning if the function has not been registered.
  524. *
  525. * @param {lunr.PipelineFunction[]} functions - Any number of functions to add to the pipeline.
  526. */
  527. lunr.Pipeline.prototype.add = function() {
  528. var fns = Array.prototype.slice.call(arguments);
  529. fns.forEach(function(fn) {
  530. lunr.Pipeline.warnIfFunctionNotRegistered(fn);
  531. this._stack.push(fn);
  532. }, this);
  533. };
  534. /**
  535. * Adds a single function after a function that already exists in the
  536. * pipeline.
  537. *
  538. * Logs a warning if the function has not been registered.
  539. *
  540. * @param {lunr.PipelineFunction} existingFn - A function that already exists in the pipeline.
  541. * @param {lunr.PipelineFunction} newFn - The new function to add to the pipeline.
  542. */
  543. lunr.Pipeline.prototype.after = function(existingFn, newFn) {
  544. lunr.Pipeline.warnIfFunctionNotRegistered(newFn);
  545. var pos = this._stack.indexOf(existingFn);
  546. if (pos == -1) {
  547. throw new Error("Cannot find existingFn");
  548. }
  549. pos = pos + 1;
  550. this._stack.splice(pos, 0, newFn);
  551. };
  552. /**
  553. * Adds a single function before a function that already exists in the
  554. * pipeline.
  555. *
  556. * Logs a warning if the function has not been registered.
  557. *
  558. * @param {lunr.PipelineFunction} existingFn - A function that already exists in the pipeline.
  559. * @param {lunr.PipelineFunction} newFn - The new function to add to the pipeline.
  560. */
  561. lunr.Pipeline.prototype.before = function(existingFn, newFn) {
  562. lunr.Pipeline.warnIfFunctionNotRegistered(newFn);
  563. var pos = this._stack.indexOf(existingFn);
  564. if (pos == -1) {
  565. throw new Error("Cannot find existingFn");
  566. }
  567. this._stack.splice(pos, 0, newFn);
  568. };
  569. /**
  570. * Removes a function from the pipeline.
  571. *
  572. * @param {lunr.PipelineFunction} fn The function to remove from the pipeline.
  573. */
  574. lunr.Pipeline.prototype.remove = function(fn) {
  575. var pos = this._stack.indexOf(fn);
  576. if (pos == -1) {
  577. return;
  578. }
  579. this._stack.splice(pos, 1);
  580. };
  581. /**
  582. * Runs the current list of functions that make up the pipeline against the
  583. * passed tokens.
  584. *
  585. * @param {Array} tokens The tokens to run through the pipeline.
  586. * @returns {Array}
  587. */
  588. lunr.Pipeline.prototype.run = function(tokens) {
  589. var stackLength = this._stack.length;
  590. for (var i = 0; i < stackLength; i++) {
  591. var fn = this._stack[i];
  592. var memo = [];
  593. for (var j = 0; j < tokens.length; j++) {
  594. var result = fn(tokens[j], j, tokens);
  595. if (result === null || result === void 0 || result === "") continue;
  596. if (Array.isArray(result)) {
  597. for (var k = 0; k < result.length; k++) {
  598. memo.push(result[k]);
  599. }
  600. } else {
  601. memo.push(result);
  602. }
  603. }
  604. tokens = memo;
  605. }
  606. return tokens;
  607. };
  608. /**
  609. * Convenience method for passing a string through a pipeline and getting
  610. * strings out. This method takes care of wrapping the passed string in a
  611. * token and mapping the resulting tokens back to strings.
  612. *
  613. * @param {string} str - The string to pass through the pipeline.
  614. * @param {?object} metadata - Optional metadata to associate with the token
  615. * passed to the pipeline.
  616. * @returns {string[]}
  617. */
  618. lunr.Pipeline.prototype.runString = function(str, metadata) {
  619. var token = new lunr.Token(str, metadata);
  620. return this.run([token]).map(function(t) {
  621. return t.toString();
  622. });
  623. };
  624. /**
  625. * Resets the pipeline by removing any existing processors.
  626. *
  627. */
  628. lunr.Pipeline.prototype.reset = function() {
  629. this._stack = [];
  630. };
  631. /**
  632. * Returns a representation of the pipeline ready for serialisation.
  633. *
  634. * Logs a warning if the function has not been registered.
  635. *
  636. * @returns {Array}
  637. */
  638. lunr.Pipeline.prototype.toJSON = function() {
  639. return this._stack.map(function(fn) {
  640. lunr.Pipeline.warnIfFunctionNotRegistered(fn);
  641. return fn.label;
  642. });
  643. };
  644. /*!
  645. * lunr.Vector
  646. * Copyright (C) 2019 Oliver Nightingale
  647. */
  648. /**
  649. * A vector is used to construct the vector space of documents and queries. These
  650. * vectors support operations to determine the similarity between two documents or
  651. * a document and a query.
  652. *
  653. * Normally no parameters are required for initializing a vector, but in the case of
  654. * loading a previously dumped vector the raw elements can be provided to the constructor.
  655. *
  656. * For performance reasons vectors are implemented with a flat array, where an elements
  657. * index is immediately followed by its value. E.g. [index, value, index, value]. This
  658. * allows the underlying array to be as sparse as possible and still offer decent
  659. * performance when being used for vector calculations.
  660. *
  661. * @constructor
  662. * @param {Number[]} [elements] - The flat list of element index and element value pairs.
  663. */
  664. lunr.Vector = function(elements) {
  665. this._magnitude = 0;
  666. this.elements = elements || [];
  667. };
  668. /**
  669. * Calculates the position within the vector to insert a given index.
  670. *
  671. * This is used internally by insert and upsert. If there are duplicate indexes then
  672. * the position is returned as if the value for that index were to be updated, but it
  673. * is the callers responsibility to check whether there is a duplicate at that index
  674. *
  675. * @param {Number} insertIdx - The index at which the element should be inserted.
  676. * @returns {Number}
  677. */
  678. lunr.Vector.prototype.positionForIndex = function(index) {
  679. // For an empty vector the tuple can be inserted at the beginning
  680. if (this.elements.length == 0) {
  681. return 0;
  682. }
  683. var start = 0,
  684. end = this.elements.length / 2,
  685. sliceLength = end - start,
  686. pivotPoint = Math.floor(sliceLength / 2),
  687. pivotIndex = this.elements[pivotPoint * 2];
  688. while (sliceLength > 1) {
  689. if (pivotIndex < index) {
  690. start = pivotPoint;
  691. }
  692. if (pivotIndex > index) {
  693. end = pivotPoint;
  694. }
  695. if (pivotIndex == index) {
  696. break;
  697. }
  698. sliceLength = end - start;
  699. pivotPoint = start + Math.floor(sliceLength / 2);
  700. pivotIndex = this.elements[pivotPoint * 2];
  701. }
  702. if (pivotIndex == index) {
  703. return pivotPoint * 2;
  704. }
  705. if (pivotIndex > index) {
  706. return pivotPoint * 2;
  707. }
  708. if (pivotIndex < index) {
  709. return (pivotPoint + 1) * 2;
  710. }
  711. };
  712. /**
  713. * Inserts an element at an index within the vector.
  714. *
  715. * Does not allow duplicates, will throw an error if there is already an entry
  716. * for this index.
  717. *
  718. * @param {Number} insertIdx - The index at which the element should be inserted.
  719. * @param {Number} val - The value to be inserted into the vector.
  720. */
  721. lunr.Vector.prototype.insert = function(insertIdx, val) {
  722. this.upsert(insertIdx, val, function() {
  723. throw "duplicate index";
  724. });
  725. };
  726. /**
  727. * Inserts or updates an existing index within the vector.
  728. *
  729. * @param {Number} insertIdx - The index at which the element should be inserted.
  730. * @param {Number} val - The value to be inserted into the vector.
  731. * @param {function} fn - A function that is called for updates, the existing value and the
  732. * requested value are passed as arguments
  733. */
  734. lunr.Vector.prototype.upsert = function(insertIdx, val, fn) {
  735. this._magnitude = 0;
  736. var position = this.positionForIndex(insertIdx);
  737. if (this.elements[position] == insertIdx) {
  738. this.elements[position + 1] = fn(this.elements[position + 1], val);
  739. } else {
  740. this.elements.splice(position, 0, insertIdx, val);
  741. }
  742. };
  743. /**
  744. * Calculates the magnitude of this vector.
  745. *
  746. * @returns {Number}
  747. */
  748. lunr.Vector.prototype.magnitude = function() {
  749. if (this._magnitude) return this._magnitude;
  750. var sumOfSquares = 0,
  751. elementsLength = this.elements.length;
  752. for (var i = 1; i < elementsLength; i += 2) {
  753. var val = this.elements[i];
  754. sumOfSquares += val * val;
  755. }
  756. return (this._magnitude = Math.sqrt(sumOfSquares));
  757. };
  758. /**
  759. * Calculates the dot product of this vector and another vector.
  760. *
  761. * @param {lunr.Vector} otherVector - The vector to compute the dot product with.
  762. * @returns {Number}
  763. */
  764. lunr.Vector.prototype.dot = function(otherVector) {
  765. var dotProduct = 0,
  766. a = this.elements,
  767. b = otherVector.elements,
  768. aLen = a.length,
  769. bLen = b.length,
  770. aVal = 0,
  771. bVal = 0,
  772. i = 0,
  773. j = 0;
  774. while (i < aLen && j < bLen) {
  775. (aVal = a[i]), (bVal = b[j]);
  776. if (aVal < bVal) {
  777. i += 2;
  778. } else if (aVal > bVal) {
  779. j += 2;
  780. } else if (aVal == bVal) {
  781. dotProduct += a[i + 1] * b[j + 1];
  782. i += 2;
  783. j += 2;
  784. }
  785. }
  786. return dotProduct;
  787. };
  788. /**
  789. * Calculates the similarity between this vector and another vector.
  790. *
  791. * @param {lunr.Vector} otherVector - The other vector to calculate the
  792. * similarity with.
  793. * @returns {Number}
  794. */
  795. lunr.Vector.prototype.similarity = function(otherVector) {
  796. return this.dot(otherVector) / this.magnitude() || 0;
  797. };
  798. /**
  799. * Converts the vector to an array of the elements within the vector.
  800. *
  801. * @returns {Number[]}
  802. */
  803. lunr.Vector.prototype.toArray = function() {
  804. var output = new Array(this.elements.length / 2);
  805. for (var i = 1, j = 0; i < this.elements.length; i += 2, j++) {
  806. output[j] = this.elements[i];
  807. }
  808. return output;
  809. };
  810. /**
  811. * A JSON serializable representation of the vector.
  812. *
  813. * @returns {Number[]}
  814. */
  815. lunr.Vector.prototype.toJSON = function() {
  816. return this.elements;
  817. };
  818. /* eslint-disable */
  819. /*!
  820. * lunr.stemmer
  821. * Copyright (C) 2019 Oliver Nightingale
  822. * Includes code from - http://tartarus.org/~martin/PorterStemmer/js.txt
  823. */
  824. /**
  825. * lunr.stemmer is an english language stemmer, this is a JavaScript
  826. * implementation of the PorterStemmer taken from http://tartarus.org/~martin
  827. *
  828. * @static
  829. * @implements {lunr.PipelineFunction}
  830. * @param {lunr.Token} token - The string to stem
  831. * @returns {lunr.Token}
  832. * @see {@link lunr.Pipeline}
  833. * @function
  834. */
  835. lunr.stemmer = (function() {
  836. var step2list = {
  837. ational: "ate",
  838. tional: "tion",
  839. enci: "ence",
  840. anci: "ance",
  841. izer: "ize",
  842. bli: "ble",
  843. alli: "al",
  844. entli: "ent",
  845. eli: "e",
  846. ousli: "ous",
  847. ization: "ize",
  848. ation: "ate",
  849. ator: "ate",
  850. alism: "al",
  851. iveness: "ive",
  852. fulness: "ful",
  853. ousness: "ous",
  854. aliti: "al",
  855. iviti: "ive",
  856. biliti: "ble",
  857. logi: "log"
  858. },
  859. step3list = {
  860. icate: "ic",
  861. ative: "",
  862. alize: "al",
  863. iciti: "ic",
  864. ical: "ic",
  865. ful: "",
  866. ness: ""
  867. },
  868. c = "[^aeiou]", // consonant
  869. v = "[aeiouy]", // vowel
  870. C = c + "[^aeiouy]*", // consonant sequence
  871. V = v + "[aeiou]*", // vowel sequence
  872. mgr0 = "^(" + C + ")?" + V + C, // [C]VC... is m>0
  873. meq1 = "^(" + C + ")?" + V + C + "(" + V + ")?$", // [C]VC[V] is m=1
  874. mgr1 = "^(" + C + ")?" + V + C + V + C, // [C]VCVC... is m>1
  875. s_v = "^(" + C + ")?" + v; // vowel in stem
  876. var re_mgr0 = new RegExp(mgr0);
  877. var re_mgr1 = new RegExp(mgr1);
  878. var re_meq1 = new RegExp(meq1);
  879. var re_s_v = new RegExp(s_v);
  880. var re_1a = /^(.+?)(ss|i)es$/;
  881. var re2_1a = /^(.+?)([^s])s$/;
  882. var re_1b = /^(.+?)eed$/;
  883. var re2_1b = /^(.+?)(ed|ing)$/;
  884. var re_1b_2 = /.$/;
  885. var re2_1b_2 = /(at|bl|iz)$/;
  886. var re3_1b_2 = new RegExp("([^aeiouylsz])\\1$");
  887. var re4_1b_2 = new RegExp("^" + C + v + "[^aeiouwxy]$");
  888. var re_1c = /^(.+?[^aeiou])y$/;
  889. var re_2 = /^(.+?)(ational|tional|enci|anci|izer|bli|alli|entli|eli|ousli|ization|ation|ator|alism|iveness|fulness|ousness|aliti|iviti|biliti|logi)$/;
  890. var re_3 = /^(.+?)(icate|ative|alize|iciti|ical|ful|ness)$/;
  891. var re_4 = /^(.+?)(al|ance|ence|er|ic|able|ible|ant|ement|ment|ent|ou|ism|ate|iti|ous|ive|ize)$/;
  892. var re2_4 = /^(.+?)(s|t)(ion)$/;
  893. var re_5 = /^(.+?)e$/;
  894. var re_5_1 = /ll$/;
  895. var re3_5 = new RegExp("^" + C + v + "[^aeiouwxy]$");
  896. var porterStemmer = function porterStemmer(w) {
  897. var stem, suffix, firstch, re, re2, re3, re4;
  898. if (w.length < 3) {
  899. return w;
  900. }
  901. firstch = w.substr(0, 1);
  902. if (firstch == "y") {
  903. w = firstch.toUpperCase() + w.substr(1);
  904. }
  905. // Step 1a
  906. re = re_1a;
  907. re2 = re2_1a;
  908. if (re.test(w)) {
  909. w = w.replace(re, "$1$2");
  910. } else if (re2.test(w)) {
  911. w = w.replace(re2, "$1$2");
  912. }
  913. // Step 1b
  914. re = re_1b;
  915. re2 = re2_1b;
  916. if (re.test(w)) {
  917. var fp = re.exec(w);
  918. re = re_mgr0;
  919. if (re.test(fp[1])) {
  920. re = re_1b_2;
  921. w = w.replace(re, "");
  922. }
  923. } else if (re2.test(w)) {
  924. var fp = re2.exec(w);
  925. stem = fp[1];
  926. re2 = re_s_v;
  927. if (re2.test(stem)) {
  928. w = stem;
  929. re2 = re2_1b_2;
  930. re3 = re3_1b_2;
  931. re4 = re4_1b_2;
  932. if (re2.test(w)) {
  933. w = w + "e";
  934. } else if (re3.test(w)) {
  935. re = re_1b_2;
  936. w = w.replace(re, "");
  937. } else if (re4.test(w)) {
  938. w = w + "e";
  939. }
  940. }
  941. }
  942. // Step 1c - replace suffix y or Y by i if preceded by a non-vowel which is not the first letter of the word (so cry -> cri, by -> by, say -> say)
  943. re = re_1c;
  944. if (re.test(w)) {
  945. var fp = re.exec(w);
  946. stem = fp[1];
  947. w = stem + "i";
  948. }
  949. // Step 2
  950. re = re_2;
  951. if (re.test(w)) {
  952. var fp = re.exec(w);
  953. stem = fp[1];
  954. suffix = fp[2];
  955. re = re_mgr0;
  956. if (re.test(stem)) {
  957. w = stem + step2list[suffix];
  958. }
  959. }
  960. // Step 3
  961. re = re_3;
  962. if (re.test(w)) {
  963. var fp = re.exec(w);
  964. stem = fp[1];
  965. suffix = fp[2];
  966. re = re_mgr0;
  967. if (re.test(stem)) {
  968. w = stem + step3list[suffix];
  969. }
  970. }
  971. // Step 4
  972. re = re_4;
  973. re2 = re2_4;
  974. if (re.test(w)) {
  975. var fp = re.exec(w);
  976. stem = fp[1];
  977. re = re_mgr1;
  978. if (re.test(stem)) {
  979. w = stem;
  980. }
  981. } else if (re2.test(w)) {
  982. var fp = re2.exec(w);
  983. stem = fp[1] + fp[2];
  984. re2 = re_mgr1;
  985. if (re2.test(stem)) {
  986. w = stem;
  987. }
  988. }
  989. // Step 5
  990. re = re_5;
  991. if (re.test(w)) {
  992. var fp = re.exec(w);
  993. stem = fp[1];
  994. re = re_mgr1;
  995. re2 = re_meq1;
  996. re3 = re3_5;
  997. if (re.test(stem) || (re2.test(stem) && !re3.test(stem))) {
  998. w = stem;
  999. }
  1000. }
  1001. re = re_5_1;
  1002. re2 = re_mgr1;
  1003. if (re.test(w) && re2.test(w)) {
  1004. re = re_1b_2;
  1005. w = w.replace(re, "");
  1006. }
  1007. // and turn initial Y back to y
  1008. if (firstch == "y") {
  1009. w = firstch.toLowerCase() + w.substr(1);
  1010. }
  1011. return w;
  1012. };
  1013. return function(token) {
  1014. return token.update(porterStemmer);
  1015. };
  1016. })();
  1017. lunr.Pipeline.registerFunction(lunr.stemmer, "stemmer");
  1018. /*!
  1019. * lunr.stopWordFilter
  1020. * Copyright (C) 2019 Oliver Nightingale
  1021. */
  1022. /**
  1023. * lunr.generateStopWordFilter builds a stopWordFilter function from the provided
  1024. * list of stop words.
  1025. *
  1026. * The built in lunr.stopWordFilter is built using this generator and can be used
  1027. * to generate custom stopWordFilters for applications or non English languages.
  1028. *
  1029. * @function
  1030. * @param {Array} token The token to pass through the filter
  1031. * @returns {lunr.PipelineFunction}
  1032. * @see lunr.Pipeline
  1033. * @see lunr.stopWordFilter
  1034. */
  1035. lunr.generateStopWordFilter = function(stopWords) {
  1036. var words = stopWords.reduce(function(memo, stopWord) {
  1037. memo[stopWord] = stopWord;
  1038. return memo;
  1039. }, {});
  1040. return function(token) {
  1041. if (token && words[token.toString()] !== token.toString()) return token;
  1042. };
  1043. };
  1044. /**
  1045. * lunr.stopWordFilter is an English language stop word list filter, any words
  1046. * contained in the list will not be passed through the filter.
  1047. *
  1048. * This is intended to be used in the Pipeline. If the token does not pass the
  1049. * filter then undefined will be returned.
  1050. *
  1051. * @function
  1052. * @implements {lunr.PipelineFunction}
  1053. * @params {lunr.Token} token - A token to check for being a stop word.
  1054. * @returns {lunr.Token}
  1055. * @see {@link lunr.Pipeline}
  1056. */
  1057. lunr.stopWordFilter = lunr.generateStopWordFilter([
  1058. "a",
  1059. "able",
  1060. "about",
  1061. "across",
  1062. "after",
  1063. "all",
  1064. "almost",
  1065. "also",
  1066. "am",
  1067. "among",
  1068. "an",
  1069. "and",
  1070. "any",
  1071. "are",
  1072. "as",
  1073. "at",
  1074. "be",
  1075. "because",
  1076. "been",
  1077. "but",
  1078. "by",
  1079. "can",
  1080. "cannot",
  1081. "could",
  1082. "dear",
  1083. "did",
  1084. "do",
  1085. "does",
  1086. "either",
  1087. "else",
  1088. "ever",
  1089. "every",
  1090. "for",
  1091. "from",
  1092. "get",
  1093. "got",
  1094. "had",
  1095. "has",
  1096. "have",
  1097. "he",
  1098. "her",
  1099. "hers",
  1100. "him",
  1101. "his",
  1102. "how",
  1103. "however",
  1104. "i",
  1105. "if",
  1106. "in",
  1107. "into",
  1108. "is",
  1109. "it",
  1110. "its",
  1111. "just",
  1112. "least",
  1113. "let",
  1114. "like",
  1115. "likely",
  1116. "may",
  1117. "me",
  1118. "might",
  1119. "most",
  1120. "must",
  1121. "my",
  1122. "neither",
  1123. "no",
  1124. "nor",
  1125. "not",
  1126. "of",
  1127. "off",
  1128. "often",
  1129. "on",
  1130. "only",
  1131. "or",
  1132. "other",
  1133. "our",
  1134. "own",
  1135. "rather",
  1136. "said",
  1137. "say",
  1138. "says",
  1139. "she",
  1140. "should",
  1141. "since",
  1142. "so",
  1143. "some",
  1144. "than",
  1145. "that",
  1146. "the",
  1147. "their",
  1148. "them",
  1149. "then",
  1150. "there",
  1151. "these",
  1152. "they",
  1153. "this",
  1154. "tis",
  1155. "to",
  1156. "too",
  1157. "twas",
  1158. "us",
  1159. "wants",
  1160. "was",
  1161. "we",
  1162. "were",
  1163. "what",
  1164. "when",
  1165. "where",
  1166. "which",
  1167. "while",
  1168. "who",
  1169. "whom",
  1170. "why",
  1171. "will",
  1172. "with",
  1173. "would",
  1174. "yet",
  1175. "you",
  1176. "your"
  1177. ]);
  1178. lunr.Pipeline.registerFunction(lunr.stopWordFilter, "stopWordFilter");
  1179. /*!
  1180. * lunr.trimmer
  1181. * Copyright (C) 2019 Oliver Nightingale
  1182. */
  1183. /**
  1184. * lunr.trimmer is a pipeline function for trimming non word
  1185. * characters from the beginning and end of tokens before they
  1186. * enter the index.
  1187. *
  1188. * This implementation may not work correctly for non latin
  1189. * characters and should either be removed or adapted for use
  1190. * with languages with non-latin characters.
  1191. *
  1192. * @static
  1193. * @implements {lunr.PipelineFunction}
  1194. * @param {lunr.Token} token The token to pass through the filter
  1195. * @returns {lunr.Token}
  1196. * @see lunr.Pipeline
  1197. */
  1198. lunr.trimmer = function(token) {
  1199. return token.update(function(s) {
  1200. return s.replace(/^\W+/, "").replace(/\W+$/, "");
  1201. });
  1202. };
  1203. lunr.Pipeline.registerFunction(lunr.trimmer, "trimmer");
  1204. /*!
  1205. * lunr.TokenSet
  1206. * Copyright (C) 2019 Oliver Nightingale
  1207. */
  1208. /**
  1209. * A token set is used to store the unique list of all tokens
  1210. * within an index. Token sets are also used to represent an
  1211. * incoming query to the index, this query token set and index
  1212. * token set are then intersected to find which tokens to look
  1213. * up in the inverted index.
  1214. *
  1215. * A token set can hold multiple tokens, as in the case of the
  1216. * index token set, or it can hold a single token as in the
  1217. * case of a simple query token set.
  1218. *
  1219. * Additionally token sets are used to perform wildcard matching.
  1220. * Leading, contained and trailing wildcards are supported, and
  1221. * from this edit distance matching can also be provided.
  1222. *
  1223. * Token sets are implemented as a minimal finite state automata,
  1224. * where both common prefixes and suffixes are shared between tokens.
  1225. * This helps to reduce the space used for storing the token set.
  1226. *
  1227. * @constructor
  1228. */
  1229. lunr.TokenSet = function() {
  1230. this.final = false;
  1231. this.edges = {};
  1232. this.id = lunr.TokenSet._nextId;
  1233. lunr.TokenSet._nextId += 1;
  1234. };
  1235. /**
  1236. * Keeps track of the next, auto increment, identifier to assign
  1237. * to a new tokenSet.
  1238. *
  1239. * TokenSets require a unique identifier to be correctly minimised.
  1240. *
  1241. * @private
  1242. */
  1243. lunr.TokenSet._nextId = 1;
  1244. /**
  1245. * Creates a TokenSet instance from the given sorted array of words.
  1246. *
  1247. * @param {String[]} arr - A sorted array of strings to create the set from.
  1248. * @returns {lunr.TokenSet}
  1249. * @throws Will throw an error if the input array is not sorted.
  1250. */
  1251. lunr.TokenSet.fromArray = function(arr) {
  1252. var builder = new lunr.TokenSet.Builder();
  1253. for (var i = 0, len = arr.length; i < len; i++) {
  1254. builder.insert(arr[i]);
  1255. }
  1256. builder.finish();
  1257. return builder.root;
  1258. };
  1259. /**
  1260. * Creates a token set from a query clause.
  1261. *
  1262. * @private
  1263. * @param {Object} clause - A single clause from lunr.Query.
  1264. * @param {string} clause.term - The query clause term.
  1265. * @param {number} [clause.editDistance] - The optional edit distance for the term.
  1266. * @returns {lunr.TokenSet}
  1267. */
  1268. lunr.TokenSet.fromClause = function(clause) {
  1269. if ("editDistance" in clause) {
  1270. return lunr.TokenSet.fromFuzzyString(clause.term, clause.editDistance);
  1271. } else {
  1272. return lunr.TokenSet.fromString(clause.term);
  1273. }
  1274. };
  1275. /**
  1276. * Creates a token set representing a single string with a specified
  1277. * edit distance.
  1278. *
  1279. * Insertions, deletions, substitutions and transpositions are each
  1280. * treated as an edit distance of 1.
  1281. *
  1282. * Increasing the allowed edit distance will have a dramatic impact
  1283. * on the performance of both creating and intersecting these TokenSets.
  1284. * It is advised to keep the edit distance less than 3.
  1285. *
  1286. * @param {string} str - The string to create the token set from.
  1287. * @param {number} editDistance - The allowed edit distance to match.
  1288. * @returns {lunr.Vector}
  1289. */
  1290. lunr.TokenSet.fromFuzzyString = function(str, editDistance) {
  1291. var root = new lunr.TokenSet();
  1292. var stack = [
  1293. {
  1294. node: root,
  1295. editsRemaining: editDistance,
  1296. str: str
  1297. }
  1298. ];
  1299. while (stack.length) {
  1300. var frame = stack.pop();
  1301. // no edit
  1302. if (frame.str.length > 0) {
  1303. var char = frame.str.charAt(0),
  1304. noEditNode;
  1305. if (char in frame.node.edges) {
  1306. noEditNode = frame.node.edges[char];
  1307. } else {
  1308. noEditNode = new lunr.TokenSet();
  1309. frame.node.edges[char] = noEditNode;
  1310. }
  1311. if (frame.str.length == 1) {
  1312. noEditNode.final = true;
  1313. }
  1314. stack.push({
  1315. node: noEditNode,
  1316. editsRemaining: frame.editsRemaining,
  1317. str: frame.str.slice(1)
  1318. });
  1319. }
  1320. if (frame.editsRemaining == 0) {
  1321. continue;
  1322. }
  1323. // insertion
  1324. if ("*" in frame.node.edges) {
  1325. var insertionNode = frame.node.edges["*"];
  1326. } else {
  1327. var insertionNode = new lunr.TokenSet();
  1328. frame.node.edges["*"] = insertionNode;
  1329. }
  1330. if (frame.str.length == 0) {
  1331. insertionNode.final = true;
  1332. }
  1333. stack.push({
  1334. node: insertionNode,
  1335. editsRemaining: frame.editsRemaining - 1,
  1336. str: frame.str
  1337. });
  1338. // deletion
  1339. // can only do a deletion if we have enough edits remaining
  1340. // and if there are characters left to delete in the string
  1341. if (frame.str.length > 1) {
  1342. stack.push({
  1343. node: frame.node,
  1344. editsRemaining: frame.editsRemaining - 1,
  1345. str: frame.str.slice(1)
  1346. });
  1347. }
  1348. // deletion
  1349. // just removing the last character from the str
  1350. if (frame.str.length == 1) {
  1351. frame.node.final = true;
  1352. }
  1353. // substitution
  1354. // can only do a substitution if we have enough edits remaining
  1355. // and if there are characters left to substitute
  1356. if (frame.str.length >= 1) {
  1357. if ("*" in frame.node.edges) {
  1358. var substitutionNode = frame.node.edges["*"];
  1359. } else {
  1360. var substitutionNode = new lunr.TokenSet();
  1361. frame.node.edges["*"] = substitutionNode;
  1362. }
  1363. if (frame.str.length == 1) {
  1364. substitutionNode.final = true;
  1365. }
  1366. stack.push({
  1367. node: substitutionNode,
  1368. editsRemaining: frame.editsRemaining - 1,
  1369. str: frame.str.slice(1)
  1370. });
  1371. }
  1372. // transposition
  1373. // can only do a transposition if there are edits remaining
  1374. // and there are enough characters to transpose
  1375. if (frame.str.length > 1) {
  1376. var charA = frame.str.charAt(0),
  1377. charB = frame.str.charAt(1),
  1378. transposeNode;
  1379. if (charB in frame.node.edges) {
  1380. transposeNode = frame.node.edges[charB];
  1381. } else {
  1382. transposeNode = new lunr.TokenSet();
  1383. frame.node.edges[charB] = transposeNode;
  1384. }
  1385. if (frame.str.length == 1) {
  1386. transposeNode.final = true;
  1387. }
  1388. stack.push({
  1389. node: transposeNode,
  1390. editsRemaining: frame.editsRemaining - 1,
  1391. str: charA + frame.str.slice(2)
  1392. });
  1393. }
  1394. }
  1395. return root;
  1396. };
  1397. /**
  1398. * Creates a TokenSet from a string.
  1399. *
  1400. * The string may contain one or more wildcard characters (*)
  1401. * that will allow wildcard matching when intersecting with
  1402. * another TokenSet.
  1403. *
  1404. * @param {string} str - The string to create a TokenSet from.
  1405. * @returns {lunr.TokenSet}
  1406. */
  1407. lunr.TokenSet.fromString = function(str) {
  1408. var node = new lunr.TokenSet(),
  1409. root = node;
  1410. /*
  1411. * Iterates through all characters within the passed string
  1412. * appending a node for each character.
  1413. *
  1414. * When a wildcard character is found then a self
  1415. * referencing edge is introduced to continually match
  1416. * any number of any characters.
  1417. */
  1418. for (var i = 0, len = str.length; i < len; i++) {
  1419. var char = str[i],
  1420. final = i == len - 1;
  1421. if (char == "*") {
  1422. node.edges[char] = node;
  1423. node.final = final;
  1424. } else {
  1425. var next = new lunr.TokenSet();
  1426. next.final = final;
  1427. node.edges[char] = next;
  1428. node = next;
  1429. }
  1430. }
  1431. return root;
  1432. };
  1433. /**
  1434. * Converts this TokenSet into an array of strings
  1435. * contained within the TokenSet.
  1436. *
  1437. * This is not intended to be used on a TokenSet that
  1438. * contains wildcards, in these cases the results are
  1439. * undefined and are likely to cause an infinite loop.
  1440. *
  1441. * @returns {string[]}
  1442. */
  1443. lunr.TokenSet.prototype.toArray = function() {
  1444. var words = [];
  1445. var stack = [
  1446. {
  1447. prefix: "",
  1448. node: this
  1449. }
  1450. ];
  1451. while (stack.length) {
  1452. var frame = stack.pop(),
  1453. edges = Object.keys(frame.node.edges),
  1454. len = edges.length;
  1455. if (frame.node.final) {
  1456. /* In Safari, at this point the prefix is sometimes corrupted, see:
  1457. * https://github.com/olivernn/lunr.js/issues/279 Calling any
  1458. * String.prototype method forces Safari to "cast" this string to what
  1459. * it's supposed to be, fixing the bug. */
  1460. frame.prefix.charAt(0);
  1461. words.push(frame.prefix);
  1462. }
  1463. for (var i = 0; i < len; i++) {
  1464. var edge = edges[i];
  1465. stack.push({
  1466. prefix: frame.prefix.concat(edge),
  1467. node: frame.node.edges[edge]
  1468. });
  1469. }
  1470. }
  1471. return words;
  1472. };
  1473. /**
  1474. * Generates a string representation of a TokenSet.
  1475. *
  1476. * This is intended to allow TokenSets to be used as keys
  1477. * in objects, largely to aid the construction and minimisation
  1478. * of a TokenSet. As such it is not designed to be a human
  1479. * friendly representation of the TokenSet.
  1480. *
  1481. * @returns {string}
  1482. */
  1483. lunr.TokenSet.prototype.toString = function() {
  1484. // NOTE: Using Object.keys here as this.edges is very likely
  1485. // to enter 'hash-mode' with many keys being added
  1486. //
  1487. // avoiding a for-in loop here as it leads to the function
  1488. // being de-optimised (at least in V8). From some simple
  1489. // benchmarks the performance is comparable, but allowing
  1490. // V8 to optimize may mean easy performance wins in the future.
  1491. if (this._str) {
  1492. return this._str;
  1493. }
  1494. var str = this.final ? "1" : "0",
  1495. labels = Object.keys(this.edges).sort(),
  1496. len = labels.length;
  1497. for (var i = 0; i < len; i++) {
  1498. var label = labels[i],
  1499. node = this.edges[label];
  1500. str = str + label + node.id;
  1501. }
  1502. return str;
  1503. };
  1504. /**
  1505. * Returns a new TokenSet that is the intersection of
  1506. * this TokenSet and the passed TokenSet.
  1507. *
  1508. * This intersection will take into account any wildcards
  1509. * contained within the TokenSet.
  1510. *
  1511. * @param {lunr.TokenSet} b - An other TokenSet to intersect with.
  1512. * @returns {lunr.TokenSet}
  1513. */
  1514. lunr.TokenSet.prototype.intersect = function(b) {
  1515. var output = new lunr.TokenSet(),
  1516. frame = undefined;
  1517. var stack = [
  1518. {
  1519. qNode: b,
  1520. output: output,
  1521. node: this
  1522. }
  1523. ];
  1524. while (stack.length) {
  1525. frame = stack.pop();
  1526. // NOTE: As with the #toString method, we are using
  1527. // Object.keys and a for loop instead of a for-in loop
  1528. // as both of these objects enter 'hash' mode, causing
  1529. // the function to be de-optimised in V8
  1530. var qEdges = Object.keys(frame.qNode.edges),
  1531. qLen = qEdges.length,
  1532. nEdges = Object.keys(frame.node.edges),
  1533. nLen = nEdges.length;
  1534. for (var q = 0; q < qLen; q++) {
  1535. var qEdge = qEdges[q];
  1536. for (var n = 0; n < nLen; n++) {
  1537. var nEdge = nEdges[n];
  1538. if (nEdge == qEdge || qEdge == "*") {
  1539. var node = frame.node.edges[nEdge],
  1540. qNode = frame.qNode.edges[qEdge],
  1541. final = node.final && qNode.final,
  1542. next = undefined;
  1543. if (nEdge in frame.output.edges) {
  1544. // an edge already exists for this character
  1545. // no need to create a new node, just set the finality
  1546. // bit unless this node is already final
  1547. next = frame.output.edges[nEdge];
  1548. next.final = next.final || final;
  1549. } else {
  1550. // no edge exists yet, must create one
  1551. // set the finality bit and insert it
  1552. // into the output
  1553. next = new lunr.TokenSet();
  1554. next.final = final;
  1555. frame.output.edges[nEdge] = next;
  1556. }
  1557. stack.push({
  1558. qNode: qNode,
  1559. output: next,
  1560. node: node
  1561. });
  1562. }
  1563. }
  1564. }
  1565. }
  1566. return output;
  1567. };
  1568. lunr.TokenSet.Builder = function() {
  1569. this.previousWord = "";
  1570. this.root = new lunr.TokenSet();
  1571. this.uncheckedNodes = [];
  1572. this.minimizedNodes = {};
  1573. };
  1574. lunr.TokenSet.Builder.prototype.insert = function(word) {
  1575. var node,
  1576. commonPrefix = 0;
  1577. if (word < this.previousWord) {
  1578. throw new Error("Out of order word insertion");
  1579. }
  1580. for (var i = 0; i < word.length && i < this.previousWord.length; i++) {
  1581. if (word[i] != this.previousWord[i]) break;
  1582. commonPrefix++;
  1583. }
  1584. this.minimize(commonPrefix);
  1585. if (this.uncheckedNodes.length == 0) {
  1586. node = this.root;
  1587. } else {
  1588. node = this.uncheckedNodes[this.uncheckedNodes.length - 1].child;
  1589. }
  1590. for (var i = commonPrefix; i < word.length; i++) {
  1591. var nextNode = new lunr.TokenSet(),
  1592. char = word[i];
  1593. node.edges[char] = nextNode;
  1594. this.uncheckedNodes.push({
  1595. parent: node,
  1596. char: char,
  1597. child: nextNode
  1598. });
  1599. node = nextNode;
  1600. }
  1601. node.final = true;
  1602. this.previousWord = word;
  1603. };
  1604. lunr.TokenSet.Builder.prototype.finish = function() {
  1605. this.minimize(0);
  1606. };
  1607. lunr.TokenSet.Builder.prototype.minimize = function(downTo) {
  1608. for (var i = this.uncheckedNodes.length - 1; i >= downTo; i--) {
  1609. var node = this.uncheckedNodes[i],
  1610. childKey = node.child.toString();
  1611. if (childKey in this.minimizedNodes) {
  1612. node.parent.edges[node.char] = this.minimizedNodes[childKey];
  1613. } else {
  1614. // Cache the key for this node since
  1615. // we know it can't change anymore
  1616. node.child._str = childKey;
  1617. this.minimizedNodes[childKey] = node.child;
  1618. }
  1619. this.uncheckedNodes.pop();
  1620. }
  1621. };
  1622. /*!
  1623. * lunr.Index
  1624. * Copyright (C) 2019 Oliver Nightingale
  1625. */
  1626. /**
  1627. * An index contains the built index of all documents and provides a query interface
  1628. * to the index.
  1629. *
  1630. * Usually instances of lunr.Index will not be created using this constructor, instead
  1631. * lunr.Builder should be used to construct new indexes, or lunr.Index.load should be
  1632. * used to load previously built and serialized indexes.
  1633. *
  1634. * @constructor
  1635. * @param {Object} attrs - The attributes of the built search index.
  1636. * @param {Object} attrs.invertedIndex - An index of term/field to document reference.
  1637. * @param {Object<string, lunr.Vector>} attrs.fieldVectors - Field vectors
  1638. * @param {lunr.TokenSet} attrs.tokenSet - An set of all corpus tokens.
  1639. * @param {string[]} attrs.fields - The names of indexed document fields.
  1640. * @param {lunr.Pipeline} attrs.pipeline - The pipeline to use for search terms.
  1641. */
  1642. lunr.Index = function(attrs) {
  1643. this.invertedIndex = attrs.invertedIndex;
  1644. this.fieldVectors = attrs.fieldVectors;
  1645. this.tokenSet = attrs.tokenSet;
  1646. this.fields = attrs.fields;
  1647. this.pipeline = attrs.pipeline;
  1648. };
  1649. /**
  1650. * A result contains details of a document matching a search query.
  1651. * @typedef {Object} lunr.Index~Result
  1652. * @property {string} ref - The reference of the document this result represents.
  1653. * @property {number} score - A number between 0 and 1 representing how similar this document is to the query.
  1654. * @property {lunr.MatchData} matchData - Contains metadata about this match including which term(s) caused the match.
  1655. */
  1656. /**
  1657. * Although lunr provides the ability to create queries using lunr.Query, it also provides a simple
  1658. * query language which itself is parsed into an instance of lunr.Query.
  1659. *
  1660. * For programmatically building queries it is advised to directly use lunr.Query, the query language
  1661. * is best used for human entered text rather than program generated text.
  1662. *
  1663. * At its simplest queries can just be a single term, e.g. `hello`, multiple terms are also supported
  1664. * and will be combined with OR, e.g `hello world` will match documents that contain either 'hello'
  1665. * or 'world', though those that contain both will rank higher in the results.
  1666. *
  1667. * Wildcards can be included in terms to match one or more unspecified characters, these wildcards can
  1668. * be inserted anywhere within the term, and more than one wildcard can exist in a single term. Adding
  1669. * wildcards will increase the number of documents that will be found but can also have a negative
  1670. * impact on query performance, especially with wildcards at the beginning of a term.
  1671. *
  1672. * Terms can be restricted to specific fields, e.g. `title:hello`, only documents with the term
  1673. * hello in the title field will match this query. Using a field not present in the index will lead
  1674. * to an error being thrown.
  1675. *
  1676. * Modifiers can also be added to terms, lunr supports edit distance and boost modifiers on terms. A term
  1677. * boost will make documents matching that term score higher, e.g. `foo^5`. Edit distance is also supported
  1678. * to provide fuzzy matching, e.g. 'hello~2' will match documents with hello with an edit distance of 2.
  1679. * Avoid large values for edit distance to improve query performance.
  1680. *
  1681. * Each term also supports a presence modifier. By default a term's presence in document is optional, however
  1682. * this can be changed to either required or prohibited. For a term's presence to be required in a document the
  1683. * term should be prefixed with a '+', e.g. `+foo bar` is a search for documents that must contain 'foo' and
  1684. * optionally contain 'bar'. Conversely a leading '-' sets the terms presence to prohibited, i.e. it must not
  1685. * appear in a document, e.g. `-foo bar` is a search for documents that do not contain 'foo' but may contain 'bar'.
  1686. *
  1687. * To escape special characters the backslash character '\' can be used, this allows searches to include
  1688. * characters that would normally be considered modifiers, e.g. `foo\~2` will search for a term "foo~2" instead
  1689. * of attempting to apply a boost of 2 to the search term "foo".
  1690. *
  1691. * @typedef {string} lunr.Index~QueryString
  1692. * @example <caption>Simple single term query</caption>
  1693. * hello
  1694. * @example <caption>Multiple term query</caption>
  1695. * hello world
  1696. * @example <caption>term scoped to a field</caption>
  1697. * title:hello
  1698. * @example <caption>term with a boost of 10</caption>
  1699. * hello^10
  1700. * @example <caption>term with an edit distance of 2</caption>
  1701. * hello~2
  1702. * @example <caption>terms with presence modifiers</caption>
  1703. * -foo +bar baz
  1704. */
  1705. /**
  1706. * Performs a search against the index using lunr query syntax.
  1707. *
  1708. * Results will be returned sorted by their score, the most relevant results
  1709. * will be returned first. For details on how the score is calculated, please see
  1710. * the {@link https://lunrjs.com/guides/searching.html#scoring|guide}.
  1711. *
  1712. * For more programmatic querying use lunr.Index#query.
  1713. *
  1714. * @param {lunr.Index~QueryString} queryString - A string containing a lunr query.
  1715. * @throws {lunr.QueryParseError} If the passed query string cannot be parsed.
  1716. * @returns {lunr.Index~Result[]}
  1717. */
  1718. lunr.Index.prototype.search = function(queryString) {
  1719. return this.query(function(query) {
  1720. var parser = new lunr.QueryParser(queryString, query);
  1721. parser.parse();
  1722. });
  1723. };
  1724. /**
  1725. * A query builder callback provides a query object to be used to express
  1726. * the query to perform on the index.
  1727. *
  1728. * @callback lunr.Index~queryBuilder
  1729. * @param {lunr.Query} query - The query object to build up.
  1730. * @this lunr.Query
  1731. */
  1732. /**
  1733. * Performs a query against the index using the yielded lunr.Query object.
  1734. *
  1735. * If performing programmatic queries against the index, this method is preferred
  1736. * over lunr.Index#search so as to avoid the additional query parsing overhead.
  1737. *
  1738. * A query object is yielded to the supplied function which should be used to
  1739. * express the query to be run against the index.
  1740. *
  1741. * Note that although this function takes a callback parameter it is _not_ an
  1742. * asynchronous operation, the callback is just yielded a query object to be
  1743. * customized.
  1744. *
  1745. * @param {lunr.Index~queryBuilder} fn - A function that is used to build the query.
  1746. * @returns {lunr.Index~Result[]}
  1747. */
  1748. lunr.Index.prototype.query = function(fn) {
  1749. // for each query clause
  1750. // * process terms
  1751. // * expand terms from token set
  1752. // * find matching documents and metadata
  1753. // * get document vectors
  1754. // * score documents
  1755. var query = new lunr.Query(this.fields),
  1756. matchingFields = Object.create(null),
  1757. queryVectors = Object.create(null),
  1758. termFieldCache = Object.create(null),
  1759. requiredMatches = Object.create(null),
  1760. prohibitedMatches = Object.create(null);
  1761. /*
  1762. * To support field level boosts a query vector is created per
  1763. * field. An empty vector is eagerly created to support negated
  1764. * queries.
  1765. */
  1766. for (var i = 0; i < this.fields.length; i++) {
  1767. queryVectors[this.fields[i]] = new lunr.Vector();
  1768. }
  1769. fn.call(query, query);
  1770. for (var i = 0; i < query.clauses.length; i++) {
  1771. /*
  1772. * Unless the pipeline has been disabled for this term, which is
  1773. * the case for terms with wildcards, we need to pass the clause
  1774. * term through the search pipeline. A pipeline returns an array
  1775. * of processed terms. Pipeline functions may expand the passed
  1776. * term, which means we may end up performing multiple index lookups
  1777. * for a single query term.
  1778. */
  1779. var clause = query.clauses[i],
  1780. terms = null,
  1781. clauseMatches = lunr.Set.complete;
  1782. if (clause.usePipeline) {
  1783. terms = this.pipeline.runString(clause.term, {
  1784. fields: clause.fields
  1785. });
  1786. } else {
  1787. terms = [clause.term];
  1788. }
  1789. for (var m = 0; m < terms.length; m++) {
  1790. var term = terms[m];
  1791. /*
  1792. * Each term returned from the pipeline needs to use the same query
  1793. * clause object, e.g. the same boost and or edit distance. The
  1794. * simplest way to do this is to re-use the clause object but mutate
  1795. * its term property.
  1796. */
  1797. clause.term = term;
  1798. /*
  1799. * From the term in the clause we create a token set which will then
  1800. * be used to intersect the indexes token set to get a list of terms
  1801. * to lookup in the inverted index
  1802. */
  1803. var termTokenSet = lunr.TokenSet.fromClause(clause),
  1804. expandedTerms = this.tokenSet.intersect(termTokenSet).toArray();
  1805. /*
  1806. * If a term marked as required does not exist in the tokenSet it is
  1807. * impossible for the search to return any matches. We set all the field
  1808. * scoped required matches set to empty and stop examining any further
  1809. * clauses.
  1810. */
  1811. if (
  1812. expandedTerms.length === 0 &&
  1813. clause.presence === lunr.Query.presence.REQUIRED
  1814. ) {
  1815. for (var k = 0; k < clause.fields.length; k++) {
  1816. var field = clause.fields[k];
  1817. requiredMatches[field] = lunr.Set.empty;
  1818. }
  1819. break;
  1820. }
  1821. for (var j = 0; j < expandedTerms.length; j++) {
  1822. /*
  1823. * For each term get the posting and termIndex, this is required for
  1824. * building the query vector.
  1825. */
  1826. var expandedTerm = expandedTerms[j],
  1827. posting = this.invertedIndex[expandedTerm],
  1828. termIndex = posting._index;
  1829. for (var k = 0; k < clause.fields.length; k++) {
  1830. /*
  1831. * For each field that this query term is scoped by (by default
  1832. * all fields are in scope) we need to get all the document refs
  1833. * that have this term in that field.
  1834. *
  1835. * The posting is the entry in the invertedIndex for the matching
  1836. * term from above.
  1837. */
  1838. var field = clause.fields[k],
  1839. fieldPosting = posting[field],
  1840. matchingDocumentRefs = Object.keys(fieldPosting),
  1841. termField = expandedTerm + "/" + field,
  1842. matchingDocumentsSet = new lunr.Set(matchingDocumentRefs);
  1843. /*
  1844. * if the presence of this term is required ensure that the matching
  1845. * documents are added to the set of required matches for this clause.
  1846. *
  1847. */
  1848. if (clause.presence == lunr.Query.presence.REQUIRED) {
  1849. clauseMatches = clauseMatches.union(matchingDocumentsSet);
  1850. if (requiredMatches[field] === undefined) {
  1851. requiredMatches[field] = lunr.Set.complete;
  1852. }
  1853. }
  1854. /*
  1855. * if the presence of this term is prohibited ensure that the matching
  1856. * documents are added to the set of prohibited matches for this field,
  1857. * creating that set if it does not yet exist.
  1858. */
  1859. if (clause.presence == lunr.Query.presence.PROHIBITED) {
  1860. if (prohibitedMatches[field] === undefined) {
  1861. prohibitedMatches[field] = lunr.Set.empty;
  1862. }
  1863. prohibitedMatches[field] = prohibitedMatches[field].union(
  1864. matchingDocumentsSet
  1865. );
  1866. /*
  1867. * Prohibited matches should not be part of the query vector used for
  1868. * similarity scoring and no metadata should be extracted so we continue
  1869. * to the next field
  1870. */
  1871. continue;
  1872. }
  1873. /*
  1874. * The query field vector is populated using the termIndex found for
  1875. * the term and a unit value with the appropriate boost applied.
  1876. * Using upsert because there could already be an entry in the vector
  1877. * for the term we are working with. In that case we just add the scores
  1878. * together.
  1879. */
  1880. queryVectors[field].upsert(termIndex, clause.boost, function(a, b) {
  1881. return a + b;
  1882. });
  1883. /**
  1884. * If we've already seen this term, field combo then we've already collected
  1885. * the matching documents and metadata, no need to go through all that again
  1886. */
  1887. if (termFieldCache[termField]) {
  1888. continue;
  1889. }
  1890. for (var l = 0; l < matchingDocumentRefs.length; l++) {
  1891. /*
  1892. * All metadata for this term/field/document triple
  1893. * are then extracted and collected into an instance
  1894. * of lunr.MatchData ready to be returned in the query
  1895. * results
  1896. */
  1897. var matchingDocumentRef = matchingDocumentRefs[l],
  1898. matchingFieldRef = new lunr.FieldRef(
  1899. matchingDocumentRef,
  1900. field
  1901. ),
  1902. metadata = fieldPosting[matchingDocumentRef],
  1903. fieldMatch;
  1904. if (
  1905. (fieldMatch = matchingFields[matchingFieldRef]) === undefined
  1906. ) {
  1907. matchingFields[matchingFieldRef] = new lunr.MatchData(
  1908. expandedTerm,
  1909. field,
  1910. metadata
  1911. );
  1912. } else {
  1913. fieldMatch.add(expandedTerm, field, metadata);
  1914. }
  1915. }
  1916. termFieldCache[termField] = true;
  1917. }
  1918. }
  1919. }
  1920. /**
  1921. * If the presence was required we need to update the requiredMatches field sets.
  1922. * We do this after all fields for the term have collected their matches because
  1923. * the clause terms presence is required in _any_ of the fields not _all_ of the
  1924. * fields.
  1925. */
  1926. if (clause.presence === lunr.Query.presence.REQUIRED) {
  1927. for (var k = 0; k < clause.fields.length; k++) {
  1928. var field = clause.fields[k];
  1929. requiredMatches[field] = requiredMatches[field].intersect(
  1930. clauseMatches
  1931. );
  1932. }
  1933. }
  1934. }
  1935. /**
  1936. * Need to combine the field scoped required and prohibited
  1937. * matching documents into a global set of required and prohibited
  1938. * matches
  1939. */
  1940. var allRequiredMatches = lunr.Set.complete,
  1941. allProhibitedMatches = lunr.Set.empty;
  1942. for (var i = 0; i < this.fields.length; i++) {
  1943. var field = this.fields[i];
  1944. if (requiredMatches[field]) {
  1945. allRequiredMatches = allRequiredMatches.intersect(
  1946. requiredMatches[field]
  1947. );
  1948. }
  1949. if (prohibitedMatches[field]) {
  1950. allProhibitedMatches = allProhibitedMatches.union(
  1951. prohibitedMatches[field]
  1952. );
  1953. }
  1954. }
  1955. var matchingFieldRefs = Object.keys(matchingFields),
  1956. results = [],
  1957. matches = Object.create(null);
  1958. /*
  1959. * If the query is negated (contains only prohibited terms)
  1960. * we need to get _all_ fieldRefs currently existing in the
  1961. * index. This is only done when we know that the query is
  1962. * entirely prohibited terms to avoid any cost of getting all
  1963. * fieldRefs unnecessarily.
  1964. *
  1965. * Additionally, blank MatchData must be created to correctly
  1966. * populate the results.
  1967. */
  1968. if (query.isNegated()) {
  1969. matchingFieldRefs = Object.keys(this.fieldVectors);
  1970. for (var i = 0; i < matchingFieldRefs.length; i++) {
  1971. var matchingFieldRef = matchingFieldRefs[i];
  1972. var fieldRef = lunr.FieldRef.fromString(matchingFieldRef);
  1973. matchingFields[matchingFieldRef] = new lunr.MatchData();
  1974. }
  1975. }
  1976. for (var i = 0; i < matchingFieldRefs.length; i++) {
  1977. /*
  1978. * Currently we have document fields that match the query, but we
  1979. * need to return documents. The matchData and scores are combined
  1980. * from multiple fields belonging to the same document.
  1981. *
  1982. * Scores are calculated by field, using the query vectors created
  1983. * above, and combined into a final document score using addition.
  1984. */
  1985. var fieldRef = lunr.FieldRef.fromString(matchingFieldRefs[i]),
  1986. docRef = fieldRef.docRef;
  1987. if (!allRequiredMatches.contains(docRef)) {
  1988. continue;
  1989. }
  1990. if (allProhibitedMatches.contains(docRef)) {
  1991. continue;
  1992. }
  1993. var fieldVector = this.fieldVectors[fieldRef],
  1994. score = queryVectors[fieldRef.fieldName].similarity(fieldVector),
  1995. docMatch;
  1996. if ((docMatch = matches[docRef]) !== undefined) {
  1997. docMatch.score += score;
  1998. docMatch.matchData.combine(matchingFields[fieldRef]);
  1999. } else {
  2000. var match = {
  2001. ref: docRef,
  2002. score: score,
  2003. matchData: matchingFields[fieldRef]
  2004. };
  2005. matches[docRef] = match;
  2006. results.push(match);
  2007. }
  2008. }
  2009. /*
  2010. * Sort the results objects by score, highest first.
  2011. */
  2012. return results.sort(function(a, b) {
  2013. return b.score - a.score;
  2014. });
  2015. };
  2016. /**
  2017. * Prepares the index for JSON serialization.
  2018. *
  2019. * The schema for this JSON blob will be described in a
  2020. * separate JSON schema file.
  2021. *
  2022. * @returns {Object}
  2023. */
  2024. lunr.Index.prototype.toJSON = function() {
  2025. var invertedIndex = Object.keys(this.invertedIndex)
  2026. .sort()
  2027. .map(function(term) {
  2028. return [term, this.invertedIndex[term]];
  2029. }, this);
  2030. var fieldVectors = Object.keys(this.fieldVectors).map(function(ref) {
  2031. return [ref, this.fieldVectors[ref].toJSON()];
  2032. }, this);
  2033. return {
  2034. version: lunr.version,
  2035. fields: this.fields,
  2036. fieldVectors: fieldVectors,
  2037. invertedIndex: invertedIndex,
  2038. pipeline: this.pipeline.toJSON()
  2039. };
  2040. };
  2041. /**
  2042. * Loads a previously serialized lunr.Index
  2043. *
  2044. * @param {Object} serializedIndex - A previously serialized lunr.Index
  2045. * @returns {lunr.Index}
  2046. */
  2047. lunr.Index.load = function(serializedIndex) {
  2048. var attrs = {},
  2049. fieldVectors = {},
  2050. serializedVectors = serializedIndex.fieldVectors,
  2051. invertedIndex = Object.create(null),
  2052. serializedInvertedIndex = serializedIndex.invertedIndex,
  2053. tokenSetBuilder = new lunr.TokenSet.Builder(),
  2054. pipeline = lunr.Pipeline.load(serializedIndex.pipeline);
  2055. if (serializedIndex.version != lunr.version) {
  2056. lunr.utils.warn(
  2057. "Version mismatch when loading serialised index. Current version of lunr '" +
  2058. lunr.version +
  2059. "' does not match serialized index '" +
  2060. serializedIndex.version +
  2061. "'"
  2062. );
  2063. }
  2064. for (var i = 0; i < serializedVectors.length; i++) {
  2065. var tuple = serializedVectors[i],
  2066. ref = tuple[0],
  2067. elements = tuple[1];
  2068. fieldVectors[ref] = new lunr.Vector(elements);
  2069. }
  2070. for (var i = 0; i < serializedInvertedIndex.length; i++) {
  2071. var tuple = serializedInvertedIndex[i],
  2072. term = tuple[0],
  2073. posting = tuple[1];
  2074. tokenSetBuilder.insert(term);
  2075. invertedIndex[term] = posting;
  2076. }
  2077. tokenSetBuilder.finish();
  2078. attrs.fields = serializedIndex.fields;
  2079. attrs.fieldVectors = fieldVectors;
  2080. attrs.invertedIndex = invertedIndex;
  2081. attrs.tokenSet = tokenSetBuilder.root;
  2082. attrs.pipeline = pipeline;
  2083. return new lunr.Index(attrs);
  2084. };
  2085. /*!
  2086. * lunr.Builder
  2087. * Copyright (C) 2019 Oliver Nightingale
  2088. */
  2089. /**
  2090. * lunr.Builder performs indexing on a set of documents and
  2091. * returns instances of lunr.Index ready for querying.
  2092. *
  2093. * All configuration of the index is done via the builder, the
  2094. * fields to index, the document reference, the text processing
  2095. * pipeline and document scoring parameters are all set on the
  2096. * builder before indexing.
  2097. *
  2098. * @constructor
  2099. * @property {string} _ref - Internal reference to the document reference field.
  2100. * @property {string[]} _fields - Internal reference to the document fields to index.
  2101. * @property {object} invertedIndex - The inverted index maps terms to document fields.
  2102. * @property {object} documentTermFrequencies - Keeps track of document term frequencies.
  2103. * @property {object} documentLengths - Keeps track of the length of documents added to the index.
  2104. * @property {lunr.tokenizer} tokenizer - Function for splitting strings into tokens for indexing.
  2105. * @property {lunr.Pipeline} pipeline - The pipeline performs text processing on tokens before indexing.
  2106. * @property {lunr.Pipeline} searchPipeline - A pipeline for processing search terms before querying the index.
  2107. * @property {number} documentCount - Keeps track of the total number of documents indexed.
  2108. * @property {number} _b - A parameter to control field length normalization, setting this to 0 disabled normalization, 1 fully normalizes field lengths, the default value is 0.75.
  2109. * @property {number} _k1 - A parameter to control how quickly an increase in term frequency results in term frequency saturation, the default value is 1.2.
  2110. * @property {number} termIndex - A counter incremented for each unique term, used to identify a terms position in the vector space.
  2111. * @property {array} metadataWhitelist - A list of metadata keys that have been whitelisted for entry in the index.
  2112. */
  2113. lunr.Builder = function() {
  2114. this._ref = "id";
  2115. this._fields = Object.create(null);
  2116. this._documents = Object.create(null);
  2117. this.invertedIndex = Object.create(null);
  2118. this.fieldTermFrequencies = {};
  2119. this.fieldLengths = {};
  2120. this.tokenizer = lunr.tokenizer;
  2121. this.pipeline = new lunr.Pipeline();
  2122. this.searchPipeline = new lunr.Pipeline();
  2123. this.documentCount = 0;
  2124. this._b = 0.75;
  2125. this._k1 = 1.2;
  2126. this.termIndex = 0;
  2127. this.metadataWhitelist = [];
  2128. };
  2129. /**
  2130. * Sets the document field used as the document reference. Every document must have this field.
  2131. * The type of this field in the document should be a string, if it is not a string it will be
  2132. * coerced into a string by calling toString.
  2133. *
  2134. * The default ref is 'id'.
  2135. *
  2136. * The ref should _not_ be changed during indexing, it should be set before any documents are
  2137. * added to the index. Changing it during indexing can lead to inconsistent results.
  2138. *
  2139. * @param {string} ref - The name of the reference field in the document.
  2140. */
  2141. lunr.Builder.prototype.ref = function(ref) {
  2142. this._ref = ref;
  2143. };
  2144. /**
  2145. * A function that is used to extract a field from a document.
  2146. *
  2147. * Lunr expects a field to be at the top level of a document, if however the field
  2148. * is deeply nested within a document an extractor function can be used to extract
  2149. * the right field for indexing.
  2150. *
  2151. * @callback fieldExtractor
  2152. * @param {object} doc - The document being added to the index.
  2153. * @returns {?(string|object|object[])} obj - The object that will be indexed for this field.
  2154. * @example <caption>Extracting a nested field</caption>
  2155. * function (doc) { return doc.nested.field }
  2156. */
  2157. /**
  2158. * Adds a field to the list of document fields that will be indexed. Every document being
  2159. * indexed should have this field. Null values for this field in indexed documents will
  2160. * not cause errors but will limit the chance of that document being retrieved by searches.
  2161. *
  2162. * All fields should be added before adding documents to the index. Adding fields after
  2163. * a document has been indexed will have no effect on already indexed documents.
  2164. *
  2165. * Fields can be boosted at build time. This allows terms within that field to have more
  2166. * importance when ranking search results. Use a field boost to specify that matches within
  2167. * one field are more important than other fields.
  2168. *
  2169. * @param {string} fieldName - The name of a field to index in all documents.
  2170. * @param {object} attributes - Optional attributes associated with this field.
  2171. * @param {number} [attributes.boost=1] - Boost applied to all terms within this field.
  2172. * @param {fieldExtractor} [attributes.extractor] - Function to extract a field from a document.
  2173. * @throws {RangeError} fieldName cannot contain unsupported characters '/'
  2174. */
  2175. lunr.Builder.prototype.field = function(fieldName, attributes) {
  2176. if (/\//.test(fieldName)) {
  2177. throw new RangeError(
  2178. "Field '" + fieldName + "' contains illegal character '/'"
  2179. );
  2180. }
  2181. this._fields[fieldName] = attributes || {};
  2182. };
  2183. /**
  2184. * A parameter to tune the amount of field length normalisation that is applied when
  2185. * calculating relevance scores. A value of 0 will completely disable any normalisation
  2186. * and a value of 1 will fully normalise field lengths. The default is 0.75. Values of b
  2187. * will be clamped to the range 0 - 1.
  2188. *
  2189. * @param {number} number - The value to set for this tuning parameter.
  2190. */
  2191. lunr.Builder.prototype.b = function(number) {
  2192. if (number < 0) {
  2193. this._b = 0;
  2194. } else if (number > 1) {
  2195. this._b = 1;
  2196. } else {
  2197. this._b = number;
  2198. }
  2199. };
  2200. /**
  2201. * A parameter that controls the speed at which a rise in term frequency results in term
  2202. * frequency saturation. The default value is 1.2. Setting this to a higher value will give
  2203. * slower saturation levels, a lower value will result in quicker saturation.
  2204. *
  2205. * @param {number} number - The value to set for this tuning parameter.
  2206. */
  2207. lunr.Builder.prototype.k1 = function(number) {
  2208. this._k1 = number;
  2209. };
  2210. /**
  2211. * Adds a document to the index.
  2212. *
  2213. * Before adding fields to the index the index should have been fully setup, with the document
  2214. * ref and all fields to index already having been specified.
  2215. *
  2216. * The document must have a field name as specified by the ref (by default this is 'id') and
  2217. * it should have all fields defined for indexing, though null or undefined values will not
  2218. * cause errors.
  2219. *
  2220. * Entire documents can be boosted at build time. Applying a boost to a document indicates that
  2221. * this document should rank higher in search results than other documents.
  2222. *
  2223. * @param {object} doc - The document to add to the index.
  2224. * @param {object} attributes - Optional attributes associated with this document.
  2225. * @param {number} [attributes.boost=1] - Boost applied to all terms within this document.
  2226. */
  2227. lunr.Builder.prototype.add = function(doc, attributes) {
  2228. var docRef = doc[this._ref],
  2229. fields = Object.keys(this._fields);
  2230. this._documents[docRef] = attributes || {};
  2231. this.documentCount += 1;
  2232. for (var i = 0; i < fields.length; i++) {
  2233. var fieldName = fields[i],
  2234. extractor = this._fields[fieldName].extractor,
  2235. field = extractor ? extractor(doc) : doc[fieldName],
  2236. tokens = this.tokenizer(field, {
  2237. fields: [fieldName]
  2238. }),
  2239. terms = this.pipeline.run(tokens),
  2240. fieldRef = new lunr.FieldRef(docRef, fieldName),
  2241. fieldTerms = Object.create(null);
  2242. this.fieldTermFrequencies[fieldRef] = fieldTerms;
  2243. this.fieldLengths[fieldRef] = 0;
  2244. // store the length of this field for this document
  2245. this.fieldLengths[fieldRef] += terms.length;
  2246. // calculate term frequencies for this field
  2247. for (var j = 0; j < terms.length; j++) {
  2248. var term = terms[j];
  2249. if (fieldTerms[term] == undefined) {
  2250. fieldTerms[term] = 0;
  2251. }
  2252. fieldTerms[term] += 1;
  2253. // add to inverted index
  2254. // create an initial posting if one doesn't exist
  2255. if (this.invertedIndex[term] == undefined) {
  2256. var posting = Object.create(null);
  2257. posting["_index"] = this.termIndex;
  2258. this.termIndex += 1;
  2259. for (var k = 0; k < fields.length; k++) {
  2260. posting[fields[k]] = Object.create(null);
  2261. }
  2262. this.invertedIndex[term] = posting;
  2263. }
  2264. // add an entry for this term/fieldName/docRef to the invertedIndex
  2265. if (this.invertedIndex[term][fieldName][docRef] == undefined) {
  2266. this.invertedIndex[term][fieldName][docRef] = Object.create(null);
  2267. }
  2268. // store all whitelisted metadata about this token in the
  2269. // inverted index
  2270. for (var l = 0; l < this.metadataWhitelist.length; l++) {
  2271. var metadataKey = this.metadataWhitelist[l],
  2272. metadata = term.metadata[metadataKey];
  2273. if (
  2274. this.invertedIndex[term][fieldName][docRef][metadataKey] ==
  2275. undefined
  2276. ) {
  2277. this.invertedIndex[term][fieldName][docRef][metadataKey] = [];
  2278. }
  2279. this.invertedIndex[term][fieldName][docRef][metadataKey].push(
  2280. metadata
  2281. );
  2282. }
  2283. }
  2284. }
  2285. };
  2286. /**
  2287. * Calculates the average document length for this index
  2288. *
  2289. * @private
  2290. */
  2291. lunr.Builder.prototype.calculateAverageFieldLengths = function() {
  2292. var fieldRefs = Object.keys(this.fieldLengths),
  2293. numberOfFields = fieldRefs.length,
  2294. accumulator = {},
  2295. documentsWithField = {};
  2296. for (var i = 0; i < numberOfFields; i++) {
  2297. var fieldRef = lunr.FieldRef.fromString(fieldRefs[i]),
  2298. field = fieldRef.fieldName;
  2299. documentsWithField[field] || (documentsWithField[field] = 0);
  2300. documentsWithField[field] += 1;
  2301. accumulator[field] || (accumulator[field] = 0);
  2302. accumulator[field] += this.fieldLengths[fieldRef];
  2303. }
  2304. var fields = Object.keys(this._fields);
  2305. for (var i = 0; i < fields.length; i++) {
  2306. var fieldName = fields[i];
  2307. accumulator[fieldName] =
  2308. accumulator[fieldName] / documentsWithField[fieldName];
  2309. }
  2310. this.averageFieldLength = accumulator;
  2311. };
  2312. /**
  2313. * Builds a vector space model of every document using lunr.Vector
  2314. *
  2315. * @private
  2316. */
  2317. lunr.Builder.prototype.createFieldVectors = function() {
  2318. var fieldVectors = {},
  2319. fieldRefs = Object.keys(this.fieldTermFrequencies),
  2320. fieldRefsLength = fieldRefs.length,
  2321. termIdfCache = Object.create(null);
  2322. for (var i = 0; i < fieldRefsLength; i++) {
  2323. var fieldRef = lunr.FieldRef.fromString(fieldRefs[i]),
  2324. fieldName = fieldRef.fieldName,
  2325. fieldLength = this.fieldLengths[fieldRef],
  2326. fieldVector = new lunr.Vector(),
  2327. termFrequencies = this.fieldTermFrequencies[fieldRef],
  2328. terms = Object.keys(termFrequencies),
  2329. termsLength = terms.length;
  2330. var fieldBoost = this._fields[fieldName].boost || 1,
  2331. docBoost = this._documents[fieldRef.docRef].boost || 1;
  2332. for (var j = 0; j < termsLength; j++) {
  2333. var term = terms[j],
  2334. tf = termFrequencies[term],
  2335. termIndex = this.invertedIndex[term]._index,
  2336. idf,
  2337. score,
  2338. scoreWithPrecision;
  2339. if (termIdfCache[term] === undefined) {
  2340. idf = lunr.idf(this.invertedIndex[term], this.documentCount);
  2341. termIdfCache[term] = idf;
  2342. } else {
  2343. idf = termIdfCache[term];
  2344. }
  2345. score =
  2346. (idf * ((this._k1 + 1) * tf)) /
  2347. (this._k1 *
  2348. (1 -
  2349. this._b +
  2350. this._b * (fieldLength / this.averageFieldLength[fieldName])) +
  2351. tf);
  2352. score *= fieldBoost;
  2353. score *= docBoost;
  2354. scoreWithPrecision = Math.round(score * 1000) / 1000;
  2355. // Converts 1.23456789 to 1.234.
  2356. // Reducing the precision so that the vectors take up less
  2357. // space when serialised. Doing it now so that they behave
  2358. // the same before and after serialisation. Also, this is
  2359. // the fastest approach to reducing a number's precision in
  2360. // JavaScript.
  2361. fieldVector.insert(termIndex, scoreWithPrecision);
  2362. }
  2363. fieldVectors[fieldRef] = fieldVector;
  2364. }
  2365. this.fieldVectors = fieldVectors;
  2366. };
  2367. /**
  2368. * Creates a token set of all tokens in the index using lunr.TokenSet
  2369. *
  2370. * @private
  2371. */
  2372. lunr.Builder.prototype.createTokenSet = function() {
  2373. this.tokenSet = lunr.TokenSet.fromArray(
  2374. Object.keys(this.invertedIndex).sort()
  2375. );
  2376. };
  2377. /**
  2378. * Builds the index, creating an instance of lunr.Index.
  2379. *
  2380. * This completes the indexing process and should only be called
  2381. * once all documents have been added to the index.
  2382. *
  2383. * @returns {lunr.Index}
  2384. */
  2385. lunr.Builder.prototype.build = function() {
  2386. this.calculateAverageFieldLengths();
  2387. this.createFieldVectors();
  2388. this.createTokenSet();
  2389. return new lunr.Index({
  2390. invertedIndex: this.invertedIndex,
  2391. fieldVectors: this.fieldVectors,
  2392. tokenSet: this.tokenSet,
  2393. fields: Object.keys(this._fields),
  2394. pipeline: this.searchPipeline
  2395. });
  2396. };
  2397. /**
  2398. * Applies a plugin to the index builder.
  2399. *
  2400. * A plugin is a function that is called with the index builder as its context.
  2401. * Plugins can be used to customise or extend the behaviour of the index
  2402. * in some way. A plugin is just a function, that encapsulated the custom
  2403. * behaviour that should be applied when building the index.
  2404. *
  2405. * The plugin function will be called with the index builder as its argument, additional
  2406. * arguments can also be passed when calling use. The function will be called
  2407. * with the index builder as its context.
  2408. *
  2409. * @param {Function} plugin The plugin to apply.
  2410. */
  2411. lunr.Builder.prototype.use = function(fn) {
  2412. var args = Array.prototype.slice.call(arguments, 1);
  2413. args.unshift(this);
  2414. fn.apply(this, args);
  2415. };
  2416. /**
  2417. * Contains and collects metadata about a matching document.
  2418. * A single instance of lunr.MatchData is returned as part of every
  2419. * lunr.Index~Result.
  2420. *
  2421. * @constructor
  2422. * @param {string} term - The term this match data is associated with
  2423. * @param {string} field - The field in which the term was found
  2424. * @param {object} metadata - The metadata recorded about this term in this field
  2425. * @property {object} metadata - A cloned collection of metadata associated with this document.
  2426. * @see {@link lunr.Index~Result}
  2427. */
  2428. lunr.MatchData = function(term, field, metadata) {
  2429. var clonedMetadata = Object.create(null),
  2430. metadataKeys = Object.keys(metadata || {});
  2431. // Cloning the metadata to prevent the original
  2432. // being mutated during match data combination.
  2433. // Metadata is kept in an array within the inverted
  2434. // index so cloning the data can be done with
  2435. // Array#slice
  2436. for (var i = 0; i < metadataKeys.length; i++) {
  2437. var key = metadataKeys[i];
  2438. clonedMetadata[key] = metadata[key].slice();
  2439. }
  2440. this.metadata = Object.create(null);
  2441. if (term !== undefined) {
  2442. this.metadata[term] = Object.create(null);
  2443. this.metadata[term][field] = clonedMetadata;
  2444. }
  2445. };
  2446. /**
  2447. * An instance of lunr.MatchData will be created for every term that matches a
  2448. * document. However only one instance is required in a lunr.Index~Result. This
  2449. * method combines metadata from another instance of lunr.MatchData with this
  2450. * objects metadata.
  2451. *
  2452. * @param {lunr.MatchData} otherMatchData - Another instance of match data to merge with this one.
  2453. * @see {@link lunr.Index~Result}
  2454. */
  2455. lunr.MatchData.prototype.combine = function(otherMatchData) {
  2456. var terms = Object.keys(otherMatchData.metadata);
  2457. for (var i = 0; i < terms.length; i++) {
  2458. var term = terms[i],
  2459. fields = Object.keys(otherMatchData.metadata[term]);
  2460. if (this.metadata[term] == undefined) {
  2461. this.metadata[term] = Object.create(null);
  2462. }
  2463. for (var j = 0; j < fields.length; j++) {
  2464. var field = fields[j],
  2465. keys = Object.keys(otherMatchData.metadata[term][field]);
  2466. if (this.metadata[term][field] == undefined) {
  2467. this.metadata[term][field] = Object.create(null);
  2468. }
  2469. for (var k = 0; k < keys.length; k++) {
  2470. var key = keys[k];
  2471. if (this.metadata[term][field][key] == undefined) {
  2472. this.metadata[term][field][key] =
  2473. otherMatchData.metadata[term][field][key];
  2474. } else {
  2475. this.metadata[term][field][key] = this.metadata[term][field][
  2476. key
  2477. ].concat(otherMatchData.metadata[term][field][key]);
  2478. }
  2479. }
  2480. }
  2481. }
  2482. };
  2483. /**
  2484. * Add metadata for a term/field pair to this instance of match data.
  2485. *
  2486. * @param {string} term - The term this match data is associated with
  2487. * @param {string} field - The field in which the term was found
  2488. * @param {object} metadata - The metadata recorded about this term in this field
  2489. */
  2490. lunr.MatchData.prototype.add = function(term, field, metadata) {
  2491. if (!(term in this.metadata)) {
  2492. this.metadata[term] = Object.create(null);
  2493. this.metadata[term][field] = metadata;
  2494. return;
  2495. }
  2496. if (!(field in this.metadata[term])) {
  2497. this.metadata[term][field] = metadata;
  2498. return;
  2499. }
  2500. var metadataKeys = Object.keys(metadata);
  2501. for (var i = 0; i < metadataKeys.length; i++) {
  2502. var key = metadataKeys[i];
  2503. if (key in this.metadata[term][field]) {
  2504. this.metadata[term][field][key] = this.metadata[term][field][
  2505. key
  2506. ].concat(metadata[key]);
  2507. } else {
  2508. this.metadata[term][field][key] = metadata[key];
  2509. }
  2510. }
  2511. };
  2512. /**
  2513. * A lunr.Query provides a programmatic way of defining queries to be performed
  2514. * against a {@link lunr.Index}.
  2515. *
  2516. * Prefer constructing a lunr.Query using the {@link lunr.Index#query} method
  2517. * so the query object is pre-initialized with the right index fields.
  2518. *
  2519. * @constructor
  2520. * @property {lunr.Query~Clause[]} clauses - An array of query clauses.
  2521. * @property {string[]} allFields - An array of all available fields in a lunr.Index.
  2522. */
  2523. lunr.Query = function(allFields) {
  2524. this.clauses = [];
  2525. this.allFields = allFields;
  2526. };
  2527. /**
  2528. * Constants for indicating what kind of automatic wildcard insertion will be used when constructing a query clause.
  2529. *
  2530. * This allows wildcards to be added to the beginning and end of a term without having to manually do any string
  2531. * concatenation.
  2532. *
  2533. * The wildcard constants can be bitwise combined to select both leading and trailing wildcards.
  2534. *
  2535. * @constant
  2536. * @default
  2537. * @property {number} wildcard.NONE - The term will have no wildcards inserted, this is the default behaviour
  2538. * @property {number} wildcard.LEADING - Prepend the term with a wildcard, unless a leading wildcard already exists
  2539. * @property {number} wildcard.TRAILING - Append a wildcard to the term, unless a trailing wildcard already exists
  2540. * @see lunr.Query~Clause
  2541. * @see lunr.Query#clause
  2542. * @see lunr.Query#term
  2543. * @example <caption>query term with trailing wildcard</caption>
  2544. * query.term('foo', { wildcard: lunr.Query.wildcard.TRAILING })
  2545. * @example <caption>query term with leading and trailing wildcard</caption>
  2546. * query.term('foo', {
  2547. * wildcard: lunr.Query.wildcard.LEADING | lunr.Query.wildcard.TRAILING
  2548. * })
  2549. */
  2550. lunr.Query.wildcard = new String("*");
  2551. lunr.Query.wildcard.NONE = 0;
  2552. lunr.Query.wildcard.LEADING = 1;
  2553. lunr.Query.wildcard.TRAILING = 2;
  2554. /**
  2555. * Constants for indicating what kind of presence a term must have in matching documents.
  2556. *
  2557. * @constant
  2558. * @enum {number}
  2559. * @see lunr.Query~Clause
  2560. * @see lunr.Query#clause
  2561. * @see lunr.Query#term
  2562. * @example <caption>query term with required presence</caption>
  2563. * query.term('foo', { presence: lunr.Query.presence.REQUIRED })
  2564. */
  2565. lunr.Query.presence = {
  2566. /**
  2567. * Term's presence in a document is optional, this is the default value.
  2568. */
  2569. OPTIONAL: 1,
  2570. /**
  2571. * Term's presence in a document is required, documents that do not contain
  2572. * this term will not be returned.
  2573. */
  2574. REQUIRED: 2,
  2575. /**
  2576. * Term's presence in a document is prohibited, documents that do contain
  2577. * this term will not be returned.
  2578. */
  2579. PROHIBITED: 3
  2580. };
  2581. /**
  2582. * A single clause in a {@link lunr.Query} contains a term and details on how to
  2583. * match that term against a {@link lunr.Index}.
  2584. *
  2585. * @typedef {Object} lunr.Query~Clause
  2586. * @property {string[]} fields - The fields in an index this clause should be matched against.
  2587. * @property {number} [boost=1] - Any boost that should be applied when matching this clause.
  2588. * @property {number} [editDistance] - Whether the term should have fuzzy matching applied, and how fuzzy the match should be.
  2589. * @property {boolean} [usePipeline] - Whether the term should be passed through the search pipeline.
  2590. * @property {number} [wildcard=lunr.Query.wildcard.NONE] - Whether the term should have wildcards appended or prepended.
  2591. * @property {number} [presence=lunr.Query.presence.OPTIONAL] - The terms presence in any matching documents.
  2592. */
  2593. /**
  2594. * Adds a {@link lunr.Query~Clause} to this query.
  2595. *
  2596. * Unless the clause contains the fields to be matched all fields will be matched. In addition
  2597. * a default boost of 1 is applied to the clause.
  2598. *
  2599. * @param {lunr.Query~Clause} clause - The clause to add to this query.
  2600. * @see lunr.Query~Clause
  2601. * @returns {lunr.Query}
  2602. */
  2603. lunr.Query.prototype.clause = function(clause) {
  2604. if (!("fields" in clause)) {
  2605. clause.fields = this.allFields;
  2606. }
  2607. if (!("boost" in clause)) {
  2608. clause.boost = 1;
  2609. }
  2610. if (!("usePipeline" in clause)) {
  2611. clause.usePipeline = true;
  2612. }
  2613. if (!("wildcard" in clause)) {
  2614. clause.wildcard = lunr.Query.wildcard.NONE;
  2615. }
  2616. if (
  2617. clause.wildcard & lunr.Query.wildcard.LEADING &&
  2618. clause.term.charAt(0) != lunr.Query.wildcard
  2619. ) {
  2620. clause.term = "*" + clause.term;
  2621. }
  2622. if (
  2623. clause.wildcard & lunr.Query.wildcard.TRAILING &&
  2624. clause.term.slice(-1) != lunr.Query.wildcard
  2625. ) {
  2626. clause.term = "" + clause.term + "*";
  2627. }
  2628. if (!("presence" in clause)) {
  2629. clause.presence = lunr.Query.presence.OPTIONAL;
  2630. }
  2631. this.clauses.push(clause);
  2632. return this;
  2633. };
  2634. /**
  2635. * A negated query is one in which every clause has a presence of
  2636. * prohibited. These queries require some special processing to return
  2637. * the expected results.
  2638. *
  2639. * @returns boolean
  2640. */
  2641. lunr.Query.prototype.isNegated = function() {
  2642. for (var i = 0; i < this.clauses.length; i++) {
  2643. if (this.clauses[i].presence != lunr.Query.presence.PROHIBITED) {
  2644. return false;
  2645. }
  2646. }
  2647. return true;
  2648. };
  2649. /**
  2650. * Adds a term to the current query, under the covers this will create a {@link lunr.Query~Clause}
  2651. * to the list of clauses that make up this query.
  2652. *
  2653. * The term is used as is, i.e. no tokenization will be performed by this method. Instead conversion
  2654. * to a token or token-like string should be done before calling this method.
  2655. *
  2656. * The term will be converted to a string by calling `toString`. Multiple terms can be passed as an
  2657. * array, each term in the array will share the same options.
  2658. *
  2659. * @param {object|object[]} term - The term(s) to add to the query.
  2660. * @param {object} [options] - Any additional properties to add to the query clause.
  2661. * @returns {lunr.Query}
  2662. * @see lunr.Query#clause
  2663. * @see lunr.Query~Clause
  2664. * @example <caption>adding a single term to a query</caption>
  2665. * query.term("foo")
  2666. * @example <caption>adding a single term to a query and specifying search fields, term boost and automatic trailing wildcard</caption>
  2667. * query.term("foo", {
  2668. * fields: ["title"],
  2669. * boost: 10,
  2670. * wildcard: lunr.Query.wildcard.TRAILING
  2671. * })
  2672. * @example <caption>using lunr.tokenizer to convert a string to tokens before using them as terms</caption>
  2673. * query.term(lunr.tokenizer("foo bar"))
  2674. */
  2675. lunr.Query.prototype.term = function(term, options) {
  2676. if (Array.isArray(term)) {
  2677. term.forEach(function(t) {
  2678. this.term(t, lunr.utils.clone(options));
  2679. }, this);
  2680. return this;
  2681. }
  2682. var clause = options || {};
  2683. clause.term = term.toString();
  2684. this.clause(clause);
  2685. return this;
  2686. };
  2687. lunr.QueryParseError = function(message, start, end) {
  2688. this.name = "QueryParseError";
  2689. this.message = message;
  2690. this.start = start;
  2691. this.end = end;
  2692. };
  2693. lunr.QueryParseError.prototype = new Error();
  2694. lunr.QueryLexer = function(str) {
  2695. this.lexemes = [];
  2696. this.str = str;
  2697. this.length = str.length;
  2698. this.pos = 0;
  2699. this.start = 0;
  2700. this.escapeCharPositions = [];
  2701. };
  2702. lunr.QueryLexer.prototype.run = function() {
  2703. var state = lunr.QueryLexer.lexText;
  2704. while (state) {
  2705. state = state(this);
  2706. }
  2707. };
  2708. lunr.QueryLexer.prototype.sliceString = function() {
  2709. var subSlices = [],
  2710. sliceStart = this.start,
  2711. sliceEnd = this.pos;
  2712. for (var i = 0; i < this.escapeCharPositions.length; i++) {
  2713. sliceEnd = this.escapeCharPositions[i];
  2714. subSlices.push(this.str.slice(sliceStart, sliceEnd));
  2715. sliceStart = sliceEnd + 1;
  2716. }
  2717. subSlices.push(this.str.slice(sliceStart, this.pos));
  2718. this.escapeCharPositions.length = 0;
  2719. return subSlices.join("");
  2720. };
  2721. lunr.QueryLexer.prototype.emit = function(type) {
  2722. this.lexemes.push({
  2723. type: type,
  2724. str: this.sliceString(),
  2725. start: this.start,
  2726. end: this.pos
  2727. });
  2728. this.start = this.pos;
  2729. };
  2730. lunr.QueryLexer.prototype.escapeCharacter = function() {
  2731. this.escapeCharPositions.push(this.pos - 1);
  2732. this.pos += 1;
  2733. };
  2734. lunr.QueryLexer.prototype.next = function() {
  2735. if (this.pos >= this.length) {
  2736. return lunr.QueryLexer.EOS;
  2737. }
  2738. var char = this.str.charAt(this.pos);
  2739. this.pos += 1;
  2740. return char;
  2741. };
  2742. lunr.QueryLexer.prototype.width = function() {
  2743. return this.pos - this.start;
  2744. };
  2745. lunr.QueryLexer.prototype.ignore = function() {
  2746. if (this.start == this.pos) {
  2747. this.pos += 1;
  2748. }
  2749. this.start = this.pos;
  2750. };
  2751. lunr.QueryLexer.prototype.backup = function() {
  2752. this.pos -= 1;
  2753. };
  2754. lunr.QueryLexer.prototype.acceptDigitRun = function() {
  2755. var char, charCode;
  2756. do {
  2757. char = this.next();
  2758. charCode = char.charCodeAt(0);
  2759. } while (charCode > 47 && charCode < 58);
  2760. if (char != lunr.QueryLexer.EOS) {
  2761. this.backup();
  2762. }
  2763. };
  2764. lunr.QueryLexer.prototype.more = function() {
  2765. return this.pos < this.length;
  2766. };
  2767. lunr.QueryLexer.EOS = "EOS";
  2768. lunr.QueryLexer.FIELD = "FIELD";
  2769. lunr.QueryLexer.TERM = "TERM";
  2770. lunr.QueryLexer.EDIT_DISTANCE = "EDIT_DISTANCE";
  2771. lunr.QueryLexer.BOOST = "BOOST";
  2772. lunr.QueryLexer.PRESENCE = "PRESENCE";
  2773. lunr.QueryLexer.lexField = function(lexer) {
  2774. lexer.backup();
  2775. lexer.emit(lunr.QueryLexer.FIELD);
  2776. lexer.ignore();
  2777. return lunr.QueryLexer.lexText;
  2778. };
  2779. lunr.QueryLexer.lexTerm = function(lexer) {
  2780. if (lexer.width() > 1) {
  2781. lexer.backup();
  2782. lexer.emit(lunr.QueryLexer.TERM);
  2783. }
  2784. lexer.ignore();
  2785. if (lexer.more()) {
  2786. return lunr.QueryLexer.lexText;
  2787. }
  2788. };
  2789. lunr.QueryLexer.lexEditDistance = function(lexer) {
  2790. lexer.ignore();
  2791. lexer.acceptDigitRun();
  2792. lexer.emit(lunr.QueryLexer.EDIT_DISTANCE);
  2793. return lunr.QueryLexer.lexText;
  2794. };
  2795. lunr.QueryLexer.lexBoost = function(lexer) {
  2796. lexer.ignore();
  2797. lexer.acceptDigitRun();
  2798. lexer.emit(lunr.QueryLexer.BOOST);
  2799. return lunr.QueryLexer.lexText;
  2800. };
  2801. lunr.QueryLexer.lexEOS = function(lexer) {
  2802. if (lexer.width() > 0) {
  2803. lexer.emit(lunr.QueryLexer.TERM);
  2804. }
  2805. };
  2806. // This matches the separator used when tokenising fields
  2807. // within a document. These should match otherwise it is
  2808. // not possible to search for some tokens within a document.
  2809. //
  2810. // It is possible for the user to change the separator on the
  2811. // tokenizer so it _might_ clash with any other of the special
  2812. // characters already used within the search string, e.g. :.
  2813. //
  2814. // This means that it is possible to change the separator in
  2815. // such a way that makes some words unsearchable using a search
  2816. // string.
  2817. lunr.QueryLexer.termSeparator = lunr.tokenizer.separator;
  2818. lunr.QueryLexer.lexText = function(lexer) {
  2819. while (true) {
  2820. var char = lexer.next();
  2821. if (char == lunr.QueryLexer.EOS) {
  2822. return lunr.QueryLexer.lexEOS;
  2823. }
  2824. // Escape character is '\'
  2825. if (char.charCodeAt(0) == 92) {
  2826. lexer.escapeCharacter();
  2827. continue;
  2828. }
  2829. if (char == ":") {
  2830. return lunr.QueryLexer.lexField;
  2831. }
  2832. if (char == "~") {
  2833. lexer.backup();
  2834. if (lexer.width() > 0) {
  2835. lexer.emit(lunr.QueryLexer.TERM);
  2836. }
  2837. return lunr.QueryLexer.lexEditDistance;
  2838. }
  2839. if (char == "^") {
  2840. lexer.backup();
  2841. if (lexer.width() > 0) {
  2842. lexer.emit(lunr.QueryLexer.TERM);
  2843. }
  2844. return lunr.QueryLexer.lexBoost;
  2845. }
  2846. // "+" indicates term presence is required
  2847. // checking for length to ensure that only
  2848. // leading "+" are considered
  2849. if (char == "+" && lexer.width() === 1) {
  2850. lexer.emit(lunr.QueryLexer.PRESENCE);
  2851. return lunr.QueryLexer.lexText;
  2852. }
  2853. // "-" indicates term presence is prohibited
  2854. // checking for length to ensure that only
  2855. // leading "-" are considered
  2856. if (char == "-" && lexer.width() === 1) {
  2857. lexer.emit(lunr.QueryLexer.PRESENCE);
  2858. return lunr.QueryLexer.lexText;
  2859. }
  2860. if (char.match(lunr.QueryLexer.termSeparator)) {
  2861. return lunr.QueryLexer.lexTerm;
  2862. }
  2863. }
  2864. };
  2865. lunr.QueryParser = function(str, query) {
  2866. this.lexer = new lunr.QueryLexer(str);
  2867. this.query = query;
  2868. this.currentClause = {};
  2869. this.lexemeIdx = 0;
  2870. };
  2871. lunr.QueryParser.prototype.parse = function() {
  2872. this.lexer.run();
  2873. this.lexemes = this.lexer.lexemes;
  2874. var state = lunr.QueryParser.parseClause;
  2875. while (state) {
  2876. state = state(this);
  2877. }
  2878. return this.query;
  2879. };
  2880. lunr.QueryParser.prototype.peekLexeme = function() {
  2881. return this.lexemes[this.lexemeIdx];
  2882. };
  2883. lunr.QueryParser.prototype.consumeLexeme = function() {
  2884. var lexeme = this.peekLexeme();
  2885. this.lexemeIdx += 1;
  2886. return lexeme;
  2887. };
  2888. lunr.QueryParser.prototype.nextClause = function() {
  2889. var completedClause = this.currentClause;
  2890. this.query.clause(completedClause);
  2891. this.currentClause = {};
  2892. };
  2893. lunr.QueryParser.parseClause = function(parser) {
  2894. var lexeme = parser.peekLexeme();
  2895. if (lexeme == undefined) {
  2896. return;
  2897. }
  2898. switch (lexeme.type) {
  2899. case lunr.QueryLexer.PRESENCE:
  2900. return lunr.QueryParser.parsePresence;
  2901. case lunr.QueryLexer.FIELD:
  2902. return lunr.QueryParser.parseField;
  2903. case lunr.QueryLexer.TERM:
  2904. return lunr.QueryParser.parseTerm;
  2905. default:
  2906. var errorMessage =
  2907. "expected either a field or a term, found " + lexeme.type;
  2908. if (lexeme.str.length >= 1) {
  2909. errorMessage += " with value '" + lexeme.str + "'";
  2910. }
  2911. throw new lunr.QueryParseError(errorMessage, lexeme.start, lexeme.end);
  2912. }
  2913. };
  2914. lunr.QueryParser.parsePresence = function(parser) {
  2915. var lexeme = parser.consumeLexeme();
  2916. if (lexeme == undefined) {
  2917. return;
  2918. }
  2919. switch (lexeme.str) {
  2920. case "-":
  2921. parser.currentClause.presence = lunr.Query.presence.PROHIBITED;
  2922. break;
  2923. case "+":
  2924. parser.currentClause.presence = lunr.Query.presence.REQUIRED;
  2925. break;
  2926. default:
  2927. var errorMessage = "unrecognised presence operator'" + lexeme.str + "'";
  2928. throw new lunr.QueryParseError(errorMessage, lexeme.start, lexeme.end);
  2929. }
  2930. var nextLexeme = parser.peekLexeme();
  2931. if (nextLexeme == undefined) {
  2932. var errorMessage = "expecting term or field, found nothing";
  2933. throw new lunr.QueryParseError(errorMessage, lexeme.start, lexeme.end);
  2934. }
  2935. switch (nextLexeme.type) {
  2936. case lunr.QueryLexer.FIELD:
  2937. return lunr.QueryParser.parseField;
  2938. case lunr.QueryLexer.TERM:
  2939. return lunr.QueryParser.parseTerm;
  2940. default:
  2941. var errorMessage =
  2942. "expecting term or field, found '" + nextLexeme.type + "'";
  2943. throw new lunr.QueryParseError(
  2944. errorMessage,
  2945. nextLexeme.start,
  2946. nextLexeme.end
  2947. );
  2948. }
  2949. };
  2950. lunr.QueryParser.parseField = function(parser) {
  2951. var lexeme = parser.consumeLexeme();
  2952. if (lexeme == undefined) {
  2953. return;
  2954. }
  2955. if (parser.query.allFields.indexOf(lexeme.str) == -1) {
  2956. var possibleFields = parser.query.allFields
  2957. .map(function(f) {
  2958. return "'" + f + "'";
  2959. })
  2960. .join(", "),
  2961. errorMessage =
  2962. "unrecognised field '" +
  2963. lexeme.str +
  2964. "', possible fields: " +
  2965. possibleFields;
  2966. throw new lunr.QueryParseError(errorMessage, lexeme.start, lexeme.end);
  2967. }
  2968. parser.currentClause.fields = [lexeme.str];
  2969. var nextLexeme = parser.peekLexeme();
  2970. if (nextLexeme == undefined) {
  2971. var errorMessage = "expecting term, found nothing";
  2972. throw new lunr.QueryParseError(errorMessage, lexeme.start, lexeme.end);
  2973. }
  2974. switch (nextLexeme.type) {
  2975. case lunr.QueryLexer.TERM:
  2976. return lunr.QueryParser.parseTerm;
  2977. default:
  2978. var errorMessage = "expecting term, found '" + nextLexeme.type + "'";
  2979. throw new lunr.QueryParseError(
  2980. errorMessage,
  2981. nextLexeme.start,
  2982. nextLexeme.end
  2983. );
  2984. }
  2985. };
  2986. lunr.QueryParser.parseTerm = function(parser) {
  2987. var lexeme = parser.consumeLexeme();
  2988. if (lexeme == undefined) {
  2989. return;
  2990. }
  2991. parser.currentClause.term = lexeme.str.toLowerCase();
  2992. if (lexeme.str.indexOf("*") != -1) {
  2993. parser.currentClause.usePipeline = false;
  2994. }
  2995. var nextLexeme = parser.peekLexeme();
  2996. if (nextLexeme == undefined) {
  2997. parser.nextClause();
  2998. return;
  2999. }
  3000. switch (nextLexeme.type) {
  3001. case lunr.QueryLexer.TERM:
  3002. parser.nextClause();
  3003. return lunr.QueryParser.parseTerm;
  3004. case lunr.QueryLexer.FIELD:
  3005. parser.nextClause();
  3006. return lunr.QueryParser.parseField;
  3007. case lunr.QueryLexer.EDIT_DISTANCE:
  3008. return lunr.QueryParser.parseEditDistance;
  3009. case lunr.QueryLexer.BOOST:
  3010. return lunr.QueryParser.parseBoost;
  3011. case lunr.QueryLexer.PRESENCE:
  3012. parser.nextClause();
  3013. return lunr.QueryParser.parsePresence;
  3014. default:
  3015. var errorMessage = "Unexpected lexeme type '" + nextLexeme.type + "'";
  3016. throw new lunr.QueryParseError(
  3017. errorMessage,
  3018. nextLexeme.start,
  3019. nextLexeme.end
  3020. );
  3021. }
  3022. };
  3023. lunr.QueryParser.parseEditDistance = function(parser) {
  3024. var lexeme = parser.consumeLexeme();
  3025. if (lexeme == undefined) {
  3026. return;
  3027. }
  3028. var editDistance = parseInt(lexeme.str, 10);
  3029. if (isNaN(editDistance)) {
  3030. var errorMessage = "edit distance must be numeric";
  3031. throw new lunr.QueryParseError(errorMessage, lexeme.start, lexeme.end);
  3032. }
  3033. parser.currentClause.editDistance = editDistance;
  3034. var nextLexeme = parser.peekLexeme();
  3035. if (nextLexeme == undefined) {
  3036. parser.nextClause();
  3037. return;
  3038. }
  3039. switch (nextLexeme.type) {
  3040. case lunr.QueryLexer.TERM:
  3041. parser.nextClause();
  3042. return lunr.QueryParser.parseTerm;
  3043. case lunr.QueryLexer.FIELD:
  3044. parser.nextClause();
  3045. return lunr.QueryParser.parseField;
  3046. case lunr.QueryLexer.EDIT_DISTANCE:
  3047. return lunr.QueryParser.parseEditDistance;
  3048. case lunr.QueryLexer.BOOST:
  3049. return lunr.QueryParser.parseBoost;
  3050. case lunr.QueryLexer.PRESENCE:
  3051. parser.nextClause();
  3052. return lunr.QueryParser.parsePresence;
  3053. default:
  3054. var errorMessage = "Unexpected lexeme type '" + nextLexeme.type + "'";
  3055. throw new lunr.QueryParseError(
  3056. errorMessage,
  3057. nextLexeme.start,
  3058. nextLexeme.end
  3059. );
  3060. }
  3061. };
  3062. lunr.QueryParser.parseBoost = function(parser) {
  3063. var lexeme = parser.consumeLexeme();
  3064. if (lexeme == undefined) {
  3065. return;
  3066. }
  3067. var boost = parseInt(lexeme.str, 10);
  3068. if (isNaN(boost)) {
  3069. var errorMessage = "boost must be numeric";
  3070. throw new lunr.QueryParseError(errorMessage, lexeme.start, lexeme.end);
  3071. }
  3072. parser.currentClause.boost = boost;
  3073. var nextLexeme = parser.peekLexeme();
  3074. if (nextLexeme == undefined) {
  3075. parser.nextClause();
  3076. return;
  3077. }
  3078. switch (nextLexeme.type) {
  3079. case lunr.QueryLexer.TERM:
  3080. parser.nextClause();
  3081. return lunr.QueryParser.parseTerm;
  3082. case lunr.QueryLexer.FIELD:
  3083. parser.nextClause();
  3084. return lunr.QueryParser.parseField;
  3085. case lunr.QueryLexer.EDIT_DISTANCE:
  3086. return lunr.QueryParser.parseEditDistance;
  3087. case lunr.QueryLexer.BOOST:
  3088. return lunr.QueryParser.parseBoost;
  3089. case lunr.QueryLexer.PRESENCE:
  3090. parser.nextClause();
  3091. return lunr.QueryParser.parsePresence;
  3092. default:
  3093. var errorMessage = "Unexpected lexeme type '" + nextLexeme.type + "'";
  3094. throw new lunr.QueryParseError(
  3095. errorMessage,
  3096. nextLexeme.start,
  3097. nextLexeme.end
  3098. );
  3099. }
  3100. };
  3101. /**
  3102. * export the module via AMD, CommonJS or as a browser global
  3103. * Export code from https://github.com/umdjs/umd/blob/master/returnExports.js
  3104. */
  3105. (function(root, factory) {
  3106. if (typeof define === "function" && define.amd) {
  3107. // AMD. Register as an anonymous module.
  3108. define(factory);
  3109. } else if (typeof exports === "object") {
  3110. /**
  3111. * Node. Does not work with strict CommonJS, but
  3112. * only CommonJS-like enviroments that support module.exports,
  3113. * like Node.
  3114. */
  3115. module.exports = factory();
  3116. } else {
  3117. // Browser globals (root is window)
  3118. root.lunr = factory();
  3119. }
  3120. })(this, function() {
  3121. /**
  3122. * Just return a value to define the module export.
  3123. * This example returns an object, but the module
  3124. * can return a function as the exported value.
  3125. */
  3126. return lunr;
  3127. });
  3128. })();