mirror of
https://github.com/elastic/elasticsearch.git
synced 2025-04-20 13:17:31 -04:00
fix/hack remaining filter and analysis issues
This commit is contained in:
parent
df53448856
commit
0f8740a782
7 changed files with 58 additions and 27 deletions
|
@ -23,6 +23,7 @@ import org.apache.lucene.index.LeafReader;
|
|||
import org.apache.lucene.search.DocIdSet;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.BitSet;
|
||||
import org.apache.lucene.util.BitDocIdSet;
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
|
@ -81,11 +82,10 @@ public class DocIdSets {
|
|||
}
|
||||
// TODO: should we use WAH8DocIdSet like Lucene?
|
||||
FixedBitSet fixedBitSet = new FixedBitSet(reader.maxDoc());
|
||||
do {
|
||||
fixedBitSet.set(doc);
|
||||
doc = it.nextDoc();
|
||||
} while (doc != DocIdSetIterator.NO_MORE_DOCS);
|
||||
return new BitDocIdSet(fixedBitSet);
|
||||
it = set.iterator();
|
||||
long cost = it.cost();
|
||||
fixedBitSet.or(it);
|
||||
return new BitDocIdSet(fixedBitSet, cost);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -114,4 +114,29 @@ public class DocIdSets {
|
|||
set.or(iterator);
|
||||
return set;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a new DocIDSet if you have no idea of the cardinality,
|
||||
* and are afraid of the cost of computing the cost.
|
||||
* @deprecated remove usages of this.
|
||||
*/
|
||||
@Deprecated
|
||||
public static BitDocIdSet newDocIDSet(BitSet bs) {
|
||||
final int cost;
|
||||
if (bs instanceof FixedBitSet) {
|
||||
cost = guessCost((FixedBitSet) bs);
|
||||
} else {
|
||||
cost = bs.approximateCardinality();
|
||||
}
|
||||
return new BitDocIdSet(bs, cost);
|
||||
}
|
||||
|
||||
// nocommit: we should instead base this always on cost of clauses and stuff???
|
||||
private static int guessCost(FixedBitSet bs) {
|
||||
if (bs.length() < 8192) {
|
||||
return bs.cardinality();
|
||||
} else {
|
||||
return bs.length() / 8192 * new FixedBitSet(bs.getBits(), 8192).cardinality();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -25,6 +25,7 @@ import org.apache.lucene.search.DocIdSet;
|
|||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.search.Filter;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.BitSetIterator;
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
import org.elasticsearch.common.lucene.docset.AllDocIdSet;
|
||||
import org.elasticsearch.common.lucene.docset.DocIdSets;
|
||||
|
@ -179,7 +180,7 @@ public class XBooleanFilter extends Filter implements Iterable<FilterClause> {
|
|||
|
||||
if (!hasBits) {
|
||||
if (!fastOrClauses.isEmpty()) {
|
||||
DocIdSetIterator it = res.iterator();
|
||||
DocIdSetIterator it = new BitSetIterator(res, 0);
|
||||
at_least_one_should_clause_iter:
|
||||
for (int setDoc = it.nextDoc(); setDoc != DocIdSetIterator.NO_MORE_DOCS; setDoc = it.nextDoc()) {
|
||||
for (ResultClause fastOrClause : fastOrClauses) {
|
||||
|
@ -199,7 +200,7 @@ public class XBooleanFilter extends Filter implements Iterable<FilterClause> {
|
|||
if (hasShouldClauses && !hasNonEmptyShouldClause) {
|
||||
return null;
|
||||
} else {
|
||||
return res;
|
||||
return DocIdSets.newDocIDSet(res);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -244,7 +245,7 @@ public class XBooleanFilter extends Filter implements Iterable<FilterClause> {
|
|||
} else {
|
||||
Bits bits = clause.bits;
|
||||
// use the "res" to drive the iteration
|
||||
DocIdSetIterator it = res.iterator();
|
||||
DocIdSetIterator it = new BitSetIterator(res, 0);
|
||||
for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.nextDoc()) {
|
||||
if (!bits.get(doc)) {
|
||||
res.clear(doc);
|
||||
|
@ -262,7 +263,7 @@ public class XBooleanFilter extends Filter implements Iterable<FilterClause> {
|
|||
} else {
|
||||
Bits bits = clause.bits;
|
||||
// let res drive the iteration
|
||||
DocIdSetIterator it = res.iterator();
|
||||
DocIdSetIterator it = new BitSetIterator(res, 0);
|
||||
for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.nextDoc()) {
|
||||
if (bits.get(doc)) {
|
||||
res.clear(doc);
|
||||
|
@ -277,7 +278,7 @@ public class XBooleanFilter extends Filter implements Iterable<FilterClause> {
|
|||
// clause must match in order for a doc to be a match. What we do here is checking if matched docs match with
|
||||
// any should filter. TODO: Add an option to have disable minimum_should_match=1 behaviour
|
||||
if (!slowOrClauses.isEmpty() || !fastOrClauses.isEmpty()) {
|
||||
DocIdSetIterator it = res.iterator();
|
||||
DocIdSetIterator it = new BitSetIterator(res, 0);
|
||||
at_least_one_should_clause_iter:
|
||||
for (int setDoc = it.nextDoc(); setDoc != DocIdSetIterator.NO_MORE_DOCS; setDoc = it.nextDoc()) {
|
||||
for (ResultClause fastOrClause : fastOrClauses) {
|
||||
|
@ -303,7 +304,7 @@ public class XBooleanFilter extends Filter implements Iterable<FilterClause> {
|
|||
if (hasShouldClauses && !hasNonEmptyShouldClause) {
|
||||
return null;
|
||||
} else {
|
||||
return res;
|
||||
return DocIdSets.newDocIDSet(res);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -91,8 +91,10 @@ public class KeepWordFilterFactory extends AbstractTokenFilterFactory {
|
|||
public TokenStream create(TokenStream tokenStream) {
|
||||
if (version.onOrAfter(Version.LUCENE_4_4)) {
|
||||
return new KeepWordFilter(tokenStream, keepWords);
|
||||
} else {
|
||||
// nocommit: what happened here?
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
return new KeepWordFilter(version, enablePositionIncrements, tokenStream, keepWords);
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -57,7 +57,9 @@ public class LengthTokenFilterFactory extends AbstractTokenFilterFactory {
|
|||
public TokenStream create(TokenStream tokenStream) {
|
||||
if (version.onOrAfter(Version.LUCENE_4_4)) {
|
||||
return new LengthFilter(tokenStream, min, max);
|
||||
} else {
|
||||
// nocommit: what happened here?
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
return new LengthFilter(version, enablePositionIncrements, tokenStream, min, max);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -64,8 +64,11 @@ public class StopTokenFilterFactory extends AbstractTokenFilterFactory {
|
|||
@Override
|
||||
public TokenStream create(TokenStream tokenStream) {
|
||||
if (removeTrailing) {
|
||||
StopFilter filter = new StopFilter(version, tokenStream, stopWords);
|
||||
filter.setEnablePositionIncrements(enablePositionIncrements);
|
||||
StopFilter filter = new StopFilter(tokenStream, stopWords);
|
||||
if (enablePositionIncrements == false) {
|
||||
// nocommit: what happened here?
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
return filter;
|
||||
} else {
|
||||
return new SuggestStopFilter(tokenStream, stopWords);
|
||||
|
|
|
@ -19,14 +19,12 @@
|
|||
|
||||
package org.elasticsearch.index.search.geo;
|
||||
|
||||
import org.apache.lucene.util.BitDocIdSet;
|
||||
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.search.DocIdSet;
|
||||
import org.apache.lucene.search.Filter;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.BitSet;
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
import org.apache.lucene.util.BitDocIdSet;
|
||||
import org.elasticsearch.ElasticsearchIllegalArgumentException;
|
||||
import org.elasticsearch.common.geo.GeoPoint;
|
||||
import org.elasticsearch.common.lucene.docset.DocIdSets;
|
||||
|
@ -81,9 +79,9 @@ public class IndexedGeoBoundingBoxFilter {
|
|||
}
|
||||
} else {
|
||||
if (main == null) {
|
||||
main = (FixedBitSet) set;
|
||||
main = ((BitDocIdSet) set).bits();
|
||||
} else {
|
||||
main.or((FixedBitSet) set);
|
||||
main.or(set.iterator());
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -92,7 +90,7 @@ public class IndexedGeoBoundingBoxFilter {
|
|||
return null;
|
||||
}
|
||||
main.and(set.iterator());
|
||||
return main;
|
||||
return DocIdSets.newDocIDSet(main);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -129,19 +127,19 @@ public class IndexedGeoBoundingBoxFilter {
|
|||
}
|
||||
|
||||
@Override
|
||||
public FixedBitSet getDocIdSet(LeafReaderContext context, Bits acceptedDocs) throws IOException {
|
||||
FixedBitSet main;
|
||||
public BitDocIdSet getDocIdSet(LeafReaderContext context, Bits acceptedDocs) throws IOException {
|
||||
BitSet main;
|
||||
DocIdSet set = lonFilter.getDocIdSet(context, acceptedDocs);
|
||||
if (DocIdSets.isEmpty(set)) {
|
||||
return null;
|
||||
}
|
||||
main = (FixedBitSet) set;
|
||||
main = ((BitDocIdSet) set).bits();
|
||||
set = latFilter.getDocIdSet(context, acceptedDocs);
|
||||
if (DocIdSets.isEmpty(set)) {
|
||||
return null;
|
||||
}
|
||||
main.and(set.iterator());
|
||||
return main;
|
||||
return DocIdSets.newDocIDSet(main);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -75,7 +75,6 @@ public class StopTokenFilterTests extends ElasticsearchTokenStreamTestCase {
|
|||
tokenizer.setReader(new StringReader("foo bar"));
|
||||
TokenStream create = tokenFilter.create(tokenizer);
|
||||
assertThat(create, instanceOf(StopFilter.class));
|
||||
assertThat(((StopFilter)create).getEnablePositionIncrements(), equalTo(true));
|
||||
}
|
||||
|
||||
@Test
|
||||
|
@ -90,7 +89,8 @@ public class StopTokenFilterTests extends ElasticsearchTokenStreamTestCase {
|
|||
tokenizer.setReader(new StringReader("foo bar"));
|
||||
TokenStream create = tokenFilter.create(tokenizer);
|
||||
assertThat(create, instanceOf(StopFilter.class));
|
||||
assertThat(((StopFilter)create).getEnablePositionIncrements(), equalTo(false));
|
||||
// nocommit: was posInc=false actually supported in 4.3 in lucene (other than for ancient back compat?)
|
||||
fail("what happened here, and what to do about it");
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue