Finds a particular index at which one sequence occurs in another sequence. Both
the source sequence and the target sequence are expressed in terms other
sequences S’ and T’ with offset and length parameters. This function is designed
to wrap the KMP machinery in a sufficiently general way that all library
sequence searches can use it. It is unlikely you have cause to call it directly:
prefer functions such as StringBuilder#indexOf and Seq#lastIndexOf.
Finds a particular index at which one sequence occurs in another sequence. Like
indexOf
, but finds the latest occurrence rather than earliest.
/* __ *\
** ________ ___ / / ___ Scala API **
** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL **
** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ **
** /____/\___/_/ |_/____/_/ | | **
** |/ **
\* */
package scala
package collection
import immutable. { List , Range }
import generic._
import parallel.ParSeq
import scala.math.Ordering
/** A template trait for sequences of type `Seq[A]`
* $seqInfo
*
* @define seqInfo
* Sequences are special cases of iterable collections of class `Iterable`.
* Unlike iterables, sequences always have a defined order of elements.
* Sequences provide a method `apply` for indexing. Indices range from `0` up to the `length` of
* a sequence. Sequences support a number of methods to find occurrences of elements or subsequences, including
* `segmentLength`, `prefixLength`, `indexWhere`, `indexOf`, `lastIndexWhere`, `lastIndexOf`,
* `startsWith`, `endsWith`, `indexOfSlice`.
*
* Another way to see a sequence is as a `PartialFunction` from `Int` values
* to the element type of the sequence. The `isDefinedAt` method of a sequence
* returns `true` for the interval from `0` until `length`.
*
* Sequences can be accessed in reverse order of their elements, using methods
* `reverse` and `reverseIterator`.
*
* Sequences have two principal subtraits, `IndexedSeq` and `LinearSeq`, which give different guarantees for performance.
* An `IndexedSeq` provides fast random-access of elements and a fast `length` operation.
* A `LinearSeq` provides fast access only to the first element via `head`, but also
* has a fast `tail` operation.
*
* @tparam A the element type of the collection
* @tparam Repr the type of the actual collection containing the elements.
*
* @author Martin Odersky
* @author Matthias Zenger
* @version 1.0, 16/07/2003
* @since 2.8
*
* @define Coll `Seq`
* @define coll sequence
* @define thatinfo the class of the returned collection. Where possible, `That` is
* the same class as the current collection class `Repr`, but this
* depends on the element type `B` being admissible for that class,
* which means that an implicit instance of type `CanBuildFrom[Repr, B, That]`
* is found.
* @define bfinfo an implicit value of class `CanBuildFrom` which determines the
* result class `That` from the current representation type `Repr`
* and the new element type `B`.
* @define orderDependent
* @define orderDependentFold
*/
trait SeqLike [ +A , +Repr ] extends Any with IterableLike [ A , Repr ] with GenSeqLike [ A , Repr ] with Parallelizable [ A , ParSeq [ A ]] { self =>
override protected [ this ] def thisCollection : Seq [ A ] = this . asInstanceOf [ Seq [ A ]]
override protected [ this ] def toCollection ( repr : Repr ) : Seq [ A ] = repr . asInstanceOf [ Seq [ A ]]
def length : Int
def apply ( idx : Int ) : A
protected [ this ] override def parCombiner = ParSeq . newCombiner [ A ]
/** Compares the length of this $coll to a test value.
*
* @param len the test value that gets compared with the length.
* @return A value `x` where
* {{{
* x < 0 if this.length < len
* x == 0 if this.length == len
* x > 0 if this.length > len
* }}}
* The method as implemented here does not call `length` directly; its running time
* is `O(length min len)` instead of `O(length)`. The method should be overwritten
* if computing `length` is cheap.
*/
def lengthCompare ( len : Int ) : Int = {
if ( len < 0 ) 1
else {
var i = 0
val it = iterator
while ( it . hasNext ) {
if ( i == len ) return if ( it . hasNext ) 1 else 0
it . next ()
i += 1
}
i - len
}
}
override /*IterableLike*/ def isEmpty : Boolean = lengthCompare ( 0 ) == 0
/** The size of this $coll, equivalent to `length`.
*
* $willNotTerminateInf
*/
override def size = length
def segmentLength ( p : A => Boolean , from : Int ) : Int = {
var i = 0
val it = iterator . drop ( from )
while ( it . hasNext && p ( it . next ()))
i += 1
i
}
def indexWhere ( p : A => Boolean , from : Int ) : Int = {
var i = from
val it = iterator . drop ( from )
while ( it . hasNext ) {
if ( p ( it . next ())) return i
else i += 1
}
- 1
}
def lastIndexWhere ( p : A => Boolean , end : Int ) : Int = {
var i = length - 1
val it = reverseIterator
while ( it . hasNext && { val elem = it . next (); ( i > end || ! p ( elem )) }) i -= 1
i
}
/** Iterates over distinct permutations.
*
* @return An Iterator which traverses the distinct permutations of this $coll.
* @example `"abb".permutations = Iterator(abb, bab, bba)`
*/
def permutations : Iterator [ Repr ] =
if ( isEmpty ) Iterator ( repr )
else new PermutationsItr
/** Iterates over combinations. A _combination_ of length `n` is a subsequence of
* the original sequence, with the elements taken in order. Thus, `"xy"` and `"yy"`
* are both length-2 combinations of `"xyy"`, but `"yx"` is not. If there is
* more than one way to generate the same subsequence, only one will be returned.
*
* For example, `"xyyy"` has three different ways to generate `"xy"` depending on
* whether the first, second, or third `"y"` is selected. However, since all are
* identical, only one will be chosen. Which of the three will be taken is an
* implementation detail that is not defined.
*
* @return An Iterator which traverses the possible n-element combinations of this $coll.
* @example `"abbbc".combinations(2) = Iterator(ab, ac, bb, bc)`
*/
def combinations ( n : Int ) : Iterator [ Repr ] =
if ( n < 0 || n > size ) Iterator . empty
else new CombinationsItr ( n )
private class PermutationsItr extends AbstractIterator [ Repr ] {
private [ this ] val ( elms , idxs ) = init ()
private var _hasNext = true
def hasNext = _hasNext
def next () : Repr = {
if (! hasNext )
Iterator . empty . next ()
val forcedElms = new mutable . ArrayBuffer [ A ]( elms . size ) ++= elms
val result = ( self . newBuilder ++= forcedElms ). result ()
var i = idxs . length - 2
while ( i >= 0 && idxs ( i ) >= idxs ( i + 1 ))
i -= 1
if ( i < 0 )
_hasNext = false
else {
var j = idxs . length - 1
while ( idxs ( j ) <= idxs ( i )) j -= 1
swap ( i , j )
val len = ( idxs . length - i ) / 2
var k = 1
while ( k <= len ) {
swap ( i + k , idxs . length - k )
k += 1
}
}
result
}
private def swap ( i : Int , j : Int ) {
val tmpI = idxs ( i )
idxs ( i ) = idxs ( j )
idxs ( j ) = tmpI
val tmpE = elms ( i )
elms ( i ) = elms ( j )
elms ( j ) = tmpE
}
private [ this ] def init () = {
val m = mutable . HashMap [ A , Int ]()
val ( es , is ) = ( thisCollection map ( e => ( e , m . getOrElseUpdate ( e , m . size ))) sortBy ( _ . _2 )). unzip
( es . toBuffer , is . toArray )
}
}
private class CombinationsItr ( n : Int ) extends AbstractIterator [ Repr ] {
// generating all nums such that:
// (1) nums(0) + .. + nums(length-1) = n
// (2) 0 <= nums(i) <= cnts(i), where 0 <= i <= cnts.length-1
private val ( elms , cnts , nums ) = init ()
private val offs = cnts . scanLeft ( 0 )( _ + _ )
private var _hasNext = true
def hasNext = _hasNext
def next () : Repr = {
if (! hasNext )
Iterator . empty . next ()
/* Calculate this result. */
val buf = self . newBuilder
for ( k <- 0 until nums . length ; j <- 0 until nums ( k ))
buf += elms ( offs ( k )+ j )
val res = buf . result ()
/* Prepare for the next call to next. */
var idx = nums . length - 1
while ( idx >= 0 && nums ( idx ) == cnts ( idx ))
idx -= 1
idx = nums . lastIndexWhere ( _ > 0 , idx - 1 )
if ( idx < 0 )
_hasNext = false
else {
var sum = nums . slice ( idx + 1 , nums . length ). sum + 1
nums ( idx ) -= 1
for ( k <- ( idx + 1 ) until nums . length ) {
nums ( k ) = sum min cnts ( k )
sum -= nums ( k )
}
}
res
}
/** Rearrange seq to newSeq a0a0..a0a1..a1...ak..ak such that
* seq.count(_ == aj) == cnts(j)
*
* @return (newSeq,cnts,nums)
*/
private def init () : ( IndexedSeq [ A ], Array [ Int ], Array [ Int ]) = {
val m = mutable . HashMap [ A , Int ]()
// e => (e, weight(e))
val ( es , is ) = ( thisCollection map ( e => ( e , m . getOrElseUpdate ( e , m . size ))) sortBy ( _ . _2 )). unzip
val cs = new Array [ Int ]( m . size )
is foreach ( i => cs ( i ) += 1 )
val ns = new Array [ Int ]( cs . length )
var r = n
0 until ns . length foreach { k =>
ns ( k ) = r min cs ( k )
r -= ns ( k )
}
( es . toIndexedSeq , cs , ns )
}
}
def reverse : Repr = {
var xs : List [ A ] = List ()
for ( x <- this )
xs = x :: xs
val b = newBuilder
b . sizeHint ( this )
for ( x <- xs )
b += x
b . result ()
}
def reverseMap [ B , That ]( f : A => B )( implicit bf : CanBuildFrom [ Repr , B , That ]) : That = {
var xs : List [ A ] = List ()
for ( x <- this )
xs = x :: xs
val b = bf ( repr )
for ( x <- xs )
b += f ( x )
b . result ()
}
/** An iterator yielding elements in reversed order.
*
* $willNotTerminateInf
*
* Note: `xs.reverseIterator` is the same as `xs.reverse.iterator` but might be more efficient.
*
* @return an iterator yielding the elements of this $coll in reversed order
*/
def reverseIterator : Iterator [ A ] = toCollection ( reverse ). iterator
def startsWith [ B ]( that : GenSeq [ B ], offset : Int ) : Boolean = {
val i = this . iterator drop offset
val j = that . iterator
while ( j . hasNext && i . hasNext )
if ( i . next != j . next )
return false
! j . hasNext
}
def endsWith [ B ]( that : GenSeq [ B ]) : Boolean = {
val i = this . iterator . drop ( length - that . length )
val j = that . iterator
while ( i . hasNext && j . hasNext )
if ( i . next != j . next )
return false
! j . hasNext
}
/** Finds first index where this $coll contains a given sequence as a slice.
* $mayNotTerminateInf
* @param that the sequence to test
* @return the first index such that the elements of this $coll starting at this index
* match the elements of sequence `that`, or `-1` of no such subsequence exists.
*/
def indexOfSlice [ B >: A ]( that : GenSeq [ B ]) : Int = indexOfSlice ( that , 0 )
/** Finds first index after or at a start index where this $coll contains a given sequence as a slice.
* $mayNotTerminateInf
* @param that the sequence to test
* @param from the start index
* @return the first index `>= from` such that the elements of this $coll starting at this index
* match the elements of sequence `that`, or `-1` of no such subsequence exists.
*/
def indexOfSlice [ B >: A ]( that : GenSeq [ B ], from : Int ) : Int =
if ( this . hasDefiniteSize && that . hasDefiniteSize ) {
val l = length
val tl = that . length
val clippedFrom = math . max ( 0 , from )
if ( from > l ) - 1
else if ( tl < 1 ) clippedFrom
else if ( l < tl ) - 1
else SeqLike . kmpSearch ( thisCollection , clippedFrom , l , that . seq , 0 , tl , forward = true )
}
else {
var i = from
var s : Seq [ A ] = thisCollection drop i
while (! s . isEmpty ) {
if ( s startsWith that )
return i
i += 1
s = s . tail
}
- 1
}
/** Finds last index where this $coll contains a given sequence as a slice.
* $willNotTerminateInf
* @param that the sequence to test
* @return the last index such that the elements of this $coll starting a this index
* match the elements of sequence `that`, or `-1` of no such subsequence exists.
*/
def lastIndexOfSlice [ B >: A ]( that : GenSeq [ B ]) : Int = lastIndexOfSlice ( that , length )
/** Finds last index before or at a given end index where this $coll contains a given sequence as a slice.
* @param that the sequence to test
* @param end the end index
* @return the last index `<= end` such that the elements of this $coll starting at this index
* match the elements of sequence `that`, or `-1` of no such subsequence exists.
*/
def lastIndexOfSlice [ B >: A ]( that : GenSeq [ B ], end : Int ) : Int = {
val l = length
val tl = that . length
val clippedL = math . min ( l - tl , end )
if ( end < 0 ) - 1
else if ( tl < 1 ) clippedL
else if ( l < tl ) - 1
else SeqLike . kmpSearch ( thisCollection , 0 , clippedL + tl , that . seq , 0 , tl , forward = false )
}
/** Tests whether this $coll contains a given sequence as a slice.
* $mayNotTerminateInf
* @param that the sequence to test
* @return `true` if this $coll contains a slice with the same elements
* as `that`, otherwise `false`.
*/
def containsSlice [ B ]( that : GenSeq [ B ]) : Boolean = indexOfSlice ( that ) != - 1
/** Tests whether this $coll contains a given value as an element.
* $mayNotTerminateInf
*
* @param elem the element to test.
* @return `true` if this $coll has an element that is equal (as
* determined by `==`) to `elem`, `false` otherwise.
*/
def contains [ A1 >: A ]( elem : A1 ) : Boolean = exists ( _ == elem )
/** Produces a new sequence which contains all elements of this $coll and also all elements of
* a given sequence. `xs union ys` is equivalent to `xs ++ ys`.
*
* @param that the sequence to add.
* @tparam B the element type of the returned $coll.
* @tparam That $thatinfo
* @param bf $bfinfo
* @return a new collection of type `That` which contains all elements of this $coll
* followed by all elements of `that`.
* @usecase def union(that: Seq[A]): $Coll[A]
* @inheritdoc
*
* Another way to express this
* is that `xs union ys` computes the order-preserving multi-set union of `xs` and `ys`.
* `union` is hence a counter-part of `diff` and `intersect` which also work on multi-sets.
*
* $willNotTerminateInf
*
* @return a new $coll which contains all elements of this $coll
* followed by all elements of `that`.
*/
override def union [ B >: A , That ]( that : GenSeq [ B ])( implicit bf : CanBuildFrom [ Repr , B , That ]) : That =
this ++ that
/** Computes the multiset difference between this $coll and another sequence.
*
* @param that the sequence of elements to remove
* @tparam B the element type of the returned $coll.
* @return a new collection of type `That` which contains all elements of this $coll
* except some of occurrences of elements that also appear in `that`.
* If an element value `x` appears
* ''n'' times in `that`, then the first ''n'' occurrences of `x` will not form
* part of the result, but any following occurrences will.
* @usecase def diff(that: Seq[A]): $Coll[A]
* @inheritdoc
*
* $willNotTerminateInf
*
* @return a new $coll which contains all elements of this $coll
* except some of occurrences of elements that also appear in `that`.
* If an element value `x` appears
* ''n'' times in `that`, then the first ''n'' occurrences of `x` will not form
* part of the result, but any following occurrences will.
*/
def diff [ B >: A ]( that : GenSeq [ B ]) : Repr = {
val occ = occCounts ( that . seq )
val b = newBuilder
for ( x <- this ) {
val ox = occ ( x ) // Avoid multiple map lookups
if ( ox == 0 ) b += x
else occ ( x ) = ox - 1
}
b . result ()
}
/** Computes the multiset intersection between this $coll and another sequence.
*
* @param that the sequence of elements to intersect with.
* @tparam B the element type of the returned $coll.
* @return a new collection of type `That` which contains all elements of this $coll
* which also appear in `that`.
* If an element value `x` appears
* ''n'' times in `that`, then the first ''n'' occurrences of `x` will be retained
* in the result, but any following occurrences will be omitted.
* @usecase def intersect(that: Seq[A]): $Coll[A]
* @inheritdoc
*
* $mayNotTerminateInf
*
* @return a new $coll which contains all elements of this $coll
* which also appear in `that`.
* If an element value `x` appears
* ''n'' times in `that`, then the first ''n'' occurrences of `x` will be retained
* in the result, but any following occurrences will be omitted.
*/
def intersect [ B >: A ]( that : GenSeq [ B ]) : Repr = {
val occ = occCounts ( that . seq )
val b = newBuilder
for ( x <- this ) {
val ox = occ ( x ) // Avoid multiple map lookups
if ( ox > 0 ) {
b += x
occ ( x ) = ox - 1
}
}
b . result ()
}
private def occCounts [ B ]( sq : Seq [ B ]) : mutable.Map [ B , Int ] = {
val occ = new mutable . HashMap [ B , Int ] { override def default ( k : B ) = 0 }
for ( y <- sq ) occ ( y ) += 1
occ
}
/** Builds a new $coll from this $coll without any duplicate elements.
* $willNotTerminateInf
*
* @return A new $coll which contains the first occurrence of every element of this $coll.
*/
def distinct : Repr = {
val b = newBuilder
val seen = mutable . HashSet [ A ]()
for ( x <- this ) {
if (! seen ( x )) {
b += x
seen += x
}
}
b . result ()
}
def patch [ B >: A , That ]( from : Int , patch : GenSeq [ B ], replaced : Int )( implicit bf : CanBuildFrom [ Repr , B , That ]) : That = {
val b = bf ( repr )
var i = 0
val it = this . iterator
while ( i < from && it . hasNext ) {
b += it . next ()
i += 1
}
b ++= patch . seq
i = replaced
while ( i > 0 && it . hasNext ) {
it . next ()
i -= 1
}
while ( it . hasNext ) b += it . next ()
b . result ()
}
def updated [ B >: A , That ]( index : Int , elem : B )( implicit bf : CanBuildFrom [ Repr , B , That ]) : That = {
if ( index < 0 ) throw new IndexOutOfBoundsException ( index . toString )
val b = bf ( repr )
var i = 0
val it = this . iterator
while ( i < index && it . hasNext ) {
b += it . next ()
i += 1
}
if (! it . hasNext ) throw new IndexOutOfBoundsException ( index . toString )
b += elem
it . next ()
while ( it . hasNext ) b += it . next ()
b . result ()
}
def +:[ B >: A , That ]( elem : B )( implicit bf : CanBuildFrom [ Repr , B , That ]) : That = {
val b = bf ( repr )
b += elem
b ++= thisCollection
b . result ()
}
def :+[ B >: A , That ]( elem : B )( implicit bf : CanBuildFrom [ Repr , B , That ]) : That = {
val b = bf ( repr )
b ++= thisCollection
b += elem
b . result ()
}
def padTo [ B >: A , That ]( len : Int , elem : B )( implicit bf : CanBuildFrom [ Repr , B , That ]) : That = {
val b = bf ( repr )
val L = length
b . sizeHint ( math . max ( L , len ))
var diff = len - L
b ++= thisCollection
while ( diff > 0 ) {
b += elem
diff -= 1
}
b . result ()
}
def corresponds [ B ]( that : GenSeq [ B ])( p : ( A , B ) => Boolean ) : Boolean = {
val i = this . iterator
val j = that . iterator
while ( i . hasNext && j . hasNext )
if (! p ( i . next (), j . next ()))
return false
! i . hasNext && ! j . hasNext
}
/** Sorts this $coll according to a comparison function.
* $willNotTerminateInf
*
* The sort is stable. That is, elements that are equal (as determined by
* `lt`) appear in the same order in the sorted sequence as in the original.
*
* @param lt the comparison function which tests whether
* its first argument precedes its second argument in
* the desired ordering.
* @return a $coll consisting of the elements of this $coll
* sorted according to the comparison function `lt`.
* @example {{{
* List("Steve", "Tom", "John", "Bob").sortWith(_.compareTo(_) < 0) =
* List("Bob", "John", "Steve", "Tom")
* }}}
*/
def sortWith ( lt : ( A , A ) => Boolean ) : Repr = sorted ( Ordering fromLessThan lt )
/** Sorts this $Coll according to the Ordering which results from transforming
* an implicitly given Ordering with a transformation function.
* @see [[scala.math.Ordering]]
* $willNotTerminateInf
* @param f the transformation function mapping elements
* to some other domain `B`.
* @param ord the ordering assumed on domain `B`.
* @tparam B the target type of the transformation `f`, and the type where
* the ordering `ord` is defined.
* @return a $coll consisting of the elements of this $coll
* sorted according to the ordering where `x < y` if
* `ord.lt(f(x), f(y))`.
*
* @example {{{
* val words = "The quick brown fox jumped over the lazy dog".split(' ')
* // this works because scala.Ordering will implicitly provide an Ordering[Tuple2[Int, Char]]
* words.sortBy(x => (x.length, x.head))
* res0: Array[String] = Array(The, dog, fox, the, lazy, over, brown, quick, jumped)
* }}}
*/
def sortBy [ B ]( f : A => B )( implicit ord : Ordering [ B ]) : Repr = sorted ( ord on f )
/** Sorts this $coll according to an Ordering.
*
* The sort is stable. That is, elements that are equal (as determined by
* `lt`) appear in the same order in the sorted sequence as in the original.
*
* @see [[scala.math.Ordering]]
*
* @param ord the ordering to be used to compare elements.
* @return a $coll consisting of the elements of this $coll
* sorted according to the ordering `ord`.
*/
def sorted [ B >: A ]( implicit ord : Ordering [ B ]) : Repr = {
val len = this . length
val b = newBuilder
if ( len == 1 ) b ++= this
else if ( len > 1 ) {
b . sizeHint ( len )
val arr = new Array [ AnyRef ]( len ) // Previously used ArraySeq for more compact but slower code
var i = 0
for ( x <- this ) {
arr ( i ) = x . asInstanceOf [ AnyRef ]
i += 1
}
java . util . Arrays . sort ( arr , ord . asInstanceOf [ Ordering [ Object ]])
i = 0
while ( i < arr . length ) {
b += arr ( i ). asInstanceOf [ A ]
i += 1
}
}
b . result ()
}
/** Converts this $coll to a sequence.
* $willNotTerminateInf
*
* A new collection will not be built; in particular, lazy sequences will stay lazy.
*/
override def toSeq : Seq [ A ] = thisCollection
/** Produces the range of all indices of this sequence.
*
* @return a `Range` value from `0` to one less than the length of this $coll.
*/
def indices : Range = 0 until length
override def view = new SeqView [ A , Repr ] {
protected lazy val underlying = self . repr
override def iterator = self . iterator
override def length = self . length
override def apply ( idx : Int ) = self . apply ( idx )
}
override def view ( from : Int , until : Int ) = view . slice ( from , until )
/* Need to override string, so that it's not the Function1's string that gets mixed in.
*/
override def toString = super [ IterableLike ]. toString
}
/** The companion object for trait `SeqLike`.
*/
object SeqLike {
// KMP search utilities
/** Make sure a target sequence has fast, correctly-ordered indexing for KMP.
*
* @author Rex Kerr
* @since 2.10
* @param W The target sequence
* @param n0 The first element in the target sequence that we should use
* @param n1 The far end of the target sequence that we should use (exclusive)
* @return Target packed in an IndexedSeq (taken from iterator unless W already is an IndexedSeq)
*/
private def kmpOptimizeWord [ B ]( W : Seq [ B ], n0 : Int , n1 : Int , forward : Boolean ) = W match {
case iso : IndexedSeq [ _ ] =>
// Already optimized for indexing--use original (or custom view of original)
if ( forward && n0 == 0 && n1 == W . length ) iso . asInstanceOf [ IndexedSeq [ B ]]
else if ( forward ) new AbstractSeq [ B ] with IndexedSeq [ B ] {
val length = n1 - n0
def apply ( x : Int ) = iso ( n0 + x ). asInstanceOf [ B ]
}
else new AbstractSeq [ B ] with IndexedSeq [ B ] {
def length = n1 - n0
def apply ( x : Int ) = iso ( n1 - 1 - x ). asInstanceOf [ B ]
}
case _ =>
// W is probably bad at indexing. Pack in array (in correct orientation)
// Would be marginally faster to special-case each direction
new AbstractSeq [ B ] with IndexedSeq [ B ] {
private [ this ] val Warr = new Array [ AnyRef ]( n1 - n0 )
private [ this ] val delta = if ( forward ) 1 else - 1
private [ this ] val done = if ( forward ) n1 - n0 else - 1
val wit = W . iterator . drop ( n0 )
var i = if ( forward ) 0 else ( n1 - n0 - 1 )
while ( i != done ) {
Warr ( i ) = wit . next (). asInstanceOf [ AnyRef ]
i += delta
}
val length = n1 - n0
def apply ( x : Int ) = Warr ( x ). asInstanceOf [ B ]
}
}
/** Make a jump table for KMP search.
*
* @author paulp, Rex Kerr
* @since 2.10
* @param Wopt The target sequence, as at least an IndexedSeq
* @param wlen Just in case we're only IndexedSeq and not IndexedSeqOptimized
* @return KMP jump table for target sequence
*/
private def kmpJumpTable [ B ]( Wopt : IndexedSeq [ B ], wlen : Int ) = {
val arr = new Array [ Int ]( wlen )
var pos = 2
var cnd = 0
arr ( 0 ) = - 1
arr ( 1 ) = 0
while ( pos < wlen ) {
if ( Wopt ( pos - 1 ) == Wopt ( cnd )) {
arr ( pos ) = cnd + 1
pos += 1
cnd += 1
}
else if ( cnd > 0 ) {
cnd = arr ( cnd )
}
else {
arr ( pos ) = 0
pos += 1
}
}
arr
}
/** A KMP implementation, based on the undoubtedly reliable wikipedia entry.
* Note: I made this private to keep it from entering the API. That can be reviewed.
*
* @author paulp, Rex Kerr
* @since 2.10
* @param S Sequence that may contain target
* @param m0 First index of S to consider
* @param m1 Last index of S to consider (exclusive)
* @param W Target sequence
* @param n0 First index of W to match
* @param n1 Last index of W to match (exclusive)
* @param forward Direction of search (from beginning==true, from end==false)
* @return Index of start of sequence if found, -1 if not (relative to beginning of S, not m0).
*/
private def kmpSearch [ B ]( S : Seq [ B ], m0 : Int , m1 : Int , W : Seq [ B ], n0 : Int , n1 : Int , forward : Boolean ) : Int = {
// Check for redundant case when target has single valid element
def clipR ( x : Int , y : Int ) = if ( x < y ) x else - 1
def clipL ( x : Int , y : Int ) = if ( x > y ) x else - 1
if ( n1 == n0 + 1 ) {
if ( forward )
clipR ( S . indexOf ( W ( n0 ), m0 ), m1 )
else
clipL ( S . lastIndexOf ( W ( n0 ), m1 - 1 ), m0 - 1 )
}
// Check for redundant case when both sequences are same size
else if ( m1 - m0 == n1 - n0 ) {
// Accepting a little slowness for the uncommon case.
if ( S . view . slice ( m0 , m1 ) == W . view . slice ( n0 , n1 )) m0
else - 1
}
// Now we know we actually need KMP search, so do it
else S match {
case xs : IndexedSeq [ _ ] =>
// We can index into S directly; it should be adequately fast
val Wopt = kmpOptimizeWord ( W , n0 , n1 , forward )
val T = kmpJumpTable ( Wopt , n1 - n0 )
var i , m = 0
val zero = if ( forward ) m0 else m1 - 1
val delta = if ( forward ) 1 else - 1
while ( i + m < m1 - m0 ) {
if ( Wopt ( i ) == S ( zero + delta *( i + m ))) {
i += 1
if ( i == n1 - n0 ) return ( if ( forward ) m + m0 else m1 - m - i )
}
else {
val ti = T ( i )
m += i - ti
if ( i > 0 ) i = ti
}
}
- 1
case _ =>
// We had better not index into S directly!
val iter = S . iterator . drop ( m0 )
val Wopt = kmpOptimizeWord ( W , n0 , n1 , forward = true )
val T = kmpJumpTable ( Wopt , n1 - n0 )
val cache = new Array [ AnyRef ]( n1 - n0 ) // Ring buffer--need a quick way to do a look-behind
var largest = 0
var i , m = 0
var answer = - 1
while ( m + m0 + n1 - n0 <= m1 ) {
while ( i + m >= largest ) {
cache ( largest %( n1 - n0 )) = iter . next (). asInstanceOf [ AnyRef ]
largest += 1
}
if ( Wopt ( i ) == cache (( i + m )%( n1 - n0 ))) {
i += 1
if ( i == n1 - n0 ) {
if ( forward ) return m + m0
else {
i -= 1
answer = m + m0
val ti = T ( i )
m += i - ti
if ( i > 0 ) i = ti
}
}
}
else {
val ti = T ( i )
m += i - ti
if ( i > 0 ) i = ti
}
}
answer
}
}
/** Finds a particular index at which one sequence occurs in another sequence.
* Both the source sequence and the target sequence are expressed in terms
* other sequences S' and T' with offset and length parameters. This
* function is designed to wrap the KMP machinery in a sufficiently general
* way that all library sequence searches can use it. It is unlikely you
* have cause to call it directly: prefer functions such as StringBuilder#indexOf
* and Seq#lastIndexOf.
*
* @param source the sequence to search in
* @param sourceOffset the starting offset in source
* @param sourceCount the length beyond sourceOffset to search
* @param target the sequence being searched for
* @param targetOffset the starting offset in target
* @param targetCount the length beyond targetOffset which makes up the target string
* @param fromIndex the smallest index at which the target sequence may start
*
* @return the applicable index in source where target exists, or -1 if not found
*/
def indexOf [ B ](
source : Seq [ B ], sourceOffset : Int , sourceCount : Int ,
target : Seq [ B ], targetOffset : Int , targetCount : Int ,
fromIndex : Int
) : Int = {
// Fiddle with variables to match previous behavior and use kmpSearch
// Doing LOTS of max/min, both clearer and faster to use math._
val slen = source . length
val clippedFrom = math . max ( 0 , fromIndex )
val s0 = math . min ( slen , sourceOffset + clippedFrom )
val s1 = math . min ( slen , s0 + sourceCount )
val tlen = target . length
val t0 = math . min ( tlen , targetOffset )
val t1 = math . min ( tlen , t0 + targetCount )
// Error checking
if ( clippedFrom > slen - sourceOffset ) - 1 // Cannot return an index in range
else if ( t1 - t0 < 1 ) s0 // Empty, matches first available position
else if ( s1 - s0 < t1 - t0 ) - 1 // Source is too short to find target
else {
// Nontrivial search
val ans = kmpSearch ( source , s0 , s1 , target , t0 , t1 , forward = true )
if ( ans < 0 ) ans else ans - math . min ( slen , sourceOffset )
}
}
/** Finds a particular index at which one sequence occurs in another sequence.
* Like `indexOf`, but finds the latest occurrence rather than earliest.
*
* @see [[scala.collection.SeqLike]], method `indexOf`
*/
def lastIndexOf [ B ](
source : Seq [ B ], sourceOffset : Int , sourceCount : Int ,
target : Seq [ B ], targetOffset : Int , targetCount : Int ,
fromIndex : Int
) : Int = {
// Fiddle with variables to match previous behavior and use kmpSearch
// Doing LOTS of max/min, both clearer and faster to use math._
val slen = source . length
val tlen = target . length
val s0 = math . min ( slen , sourceOffset )
val s1 = math . min ( slen , s0 + sourceCount )
val clippedFrom = math . min ( s1 - s0 , fromIndex )
val t0 = math . min ( tlen , targetOffset )
val t1 = math . min ( tlen , t0 + targetCount )
val fixed_s1 = math . min ( s1 , s0 + clippedFrom + ( t1 - t0 ) - 1 )
// Error checking
if ( clippedFrom < 0 ) - 1 // Cannot return an index in range
else if ( t1 - t0 < 1 ) s0 + clippedFrom // Empty, matches last available position
else if ( fixed_s1 - s0 < t1 - t0 ) - 1 // Source is too short to find target
else {
// Nontrivial search
val ans = kmpSearch ( source , s0 , fixed_s1 , target , t0 , t1 , forward = false )
if ( ans < 0 ) ans else ans - s0
}
}
}