博客
关于我
强烈建议你试试无所不能的chatGPT,快点击我
BroadcastShardOperationFailedException TooManyClauses[maxClauseCount is set to
阅读量:5945 次
发布时间:2019-06-19

本文共 17931 字,大约阅读时间需要 59 分钟。

hot3.png

BroadcastShardOperationFailedException TooManyClauses[maxClauseCount is set to 博客分类: 异常

luncen 查询条件不能大于1024

package org.apache.lucene.search;/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements.  See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License.  You may obtain a copy of the License at * *     http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */import java.io.IOException;import java.util.ArrayList;import java.util.Iterator;import java.util.List;import java.util.Set;import org.apache.lucene.index.AtomicReaderContext;import org.apache.lucene.index.IndexReader;import org.apache.lucene.index.Term;import org.apache.lucene.search.BooleanClause.Occur;import org.apache.lucene.search.similarities.Similarity;import org.apache.lucene.util.Bits;import org.apache.lucene.util.ToStringUtils;/** A Query that matches documents matching boolean combinations of other  * queries, e.g. {@link TermQuery}s, {@link PhraseQuery}s or other  * BooleanQuerys.  */public class BooleanQuery extends Query implements Iterable
{ private static int maxClauseCount = 10240; /** Thrown when an attempt is made to add more than {@link * #getMaxClauseCount()} clauses. This typically happens if * a PrefixQuery, FuzzyQuery, WildcardQuery, or TermRangeQuery * is expanded to many terms during search. */ public static class TooManyClauses extends RuntimeException { public TooManyClauses() { super("maxClauseCount is set to " + maxClauseCount); } } /** Return the maximum number of clauses permitted, 1024 by default. * Attempts to add more than the permitted number of clauses cause {@link * TooManyClauses} to be thrown. * @see #setMaxClauseCount(int) */ public static int getMaxClauseCount() { return maxClauseCount; } /** * Set the maximum number of clauses permitted per BooleanQuery. * Default value is 1024. */ public static void setMaxClauseCount(int maxClauseCount) { if (maxClauseCount < 1) { throw new IllegalArgumentException("maxClauseCount must be >= 1"); } BooleanQuery.maxClauseCount = maxClauseCount; } private ArrayList
clauses = new ArrayList
(); private final boolean disableCoord; /** Constructs an empty boolean query. */ public BooleanQuery() { disableCoord = false; } /** Constructs an empty boolean query. * * {@link Similarity#coord(int,int)} may be disabled in scoring, as * appropriate. For example, this score factor does not make sense for most * automatically generated queries, like {@link WildcardQuery} and {@link * FuzzyQuery}. * * @param disableCoord disables {@link Similarity#coord(int,int)} in scoring. */ public BooleanQuery(boolean disableCoord) { this.disableCoord = disableCoord; } /** Returns true iff {@link Similarity#coord(int,int)} is disabled in * scoring for this query instance. * @see #BooleanQuery(boolean) */ public boolean isCoordDisabled() { return disableCoord; } /** * Specifies a minimum number of the optional BooleanClauses * which must be satisfied. * *

* By default no optional clauses are necessary for a match * (unless there are no required clauses). If this method is used, * then the specified number of clauses is required. *

*

* Use of this method is totally independent of specifying that * any specific clauses are required (or prohibited). This number will * only be compared against the number of matching optional clauses. *

* * @param min the number of optional clauses that must match */ public void setMinimumNumberShouldMatch(int min) { this.minNrShouldMatch = min; } protected int minNrShouldMatch = 0; /** * Gets the minimum number of the optional BooleanClauses * which must be satisfied. */ public int getMinimumNumberShouldMatch() { return minNrShouldMatch; } /** Adds a clause to a boolean query. * * @throws TooManyClauses if the new number of clauses exceeds the maximum clause number * @see #getMaxClauseCount() */ public void add(Query query, BooleanClause.Occur occur) { add(new BooleanClause(query, occur)); } /** Adds a clause to a boolean query. * @throws TooManyClauses if the new number of clauses exceeds the maximum clause number * @see #getMaxClauseCount() */ public void add(BooleanClause clause) { if (clauses.size() >= maxClauseCount) { throw new TooManyClauses(); } clauses.add(clause); } /** Returns the set of clauses in this query. */ public BooleanClause[] getClauses() { return clauses.toArray(new BooleanClause[clauses.size()]); } /** Returns the list of clauses in this query. */ public List
clauses() { return clauses; } /** Returns an iterator on the clauses in this query. It implements the {@link Iterable} interface to * make it possible to do: *
for (BooleanClause clause : booleanQuery) {}
*/ @Override public final Iterator
iterator() { return clauses().iterator(); } /** * Expert: the Weight for BooleanQuery, used to * normalize, score and explain these queries. * *

NOTE: this API and implementation is subject to * change suddenly in the next release.

*/ protected class BooleanWeight extends Weight { /** The Similarity implementation. */ protected Similarity similarity; protected ArrayList
weights; protected int maxCoord; // num optional + num required private final boolean disableCoord; public BooleanWeight(IndexSearcher searcher, boolean disableCoord) throws IOException { this.similarity = searcher.getSimilarity(); this.disableCoord = disableCoord; weights = new ArrayList
(clauses.size()); for (int i = 0 ; i < clauses.size(); i++) { BooleanClause c = clauses.get(i); Weight w = c.getQuery().createWeight(searcher); weights.add(w); if (!c.isProhibited()) { maxCoord++; } } } @Override public Query getQuery() { return BooleanQuery.this; } @Override public float getValueForNormalization() throws IOException { float sum = 0.0f; for (int i = 0 ; i < weights.size(); i++) { // call sumOfSquaredWeights for all clauses in case of side effects float s = weights.get(i).getValueForNormalization(); // sum sub weights if (!clauses.get(i).isProhibited()) { // only add to sum for non-prohibited clauses sum += s; } } sum *= getBoost() * getBoost(); // boost each sub-weight return sum ; } public float coord(int overlap, int maxOverlap) { // LUCENE-4300: in most cases of maxOverlap=1, BQ rewrites itself away, // so coord() is not applied. But when BQ cannot optimize itself away // for a single clause (minNrShouldMatch, prohibited clauses, etc), its // important not to apply coord(1,1) for consistency, it might not be 1.0F return maxOverlap == 1 ? 1F : similarity.coord(overlap, maxOverlap); } @Override public void normalize(float norm, float topLevelBoost) { topLevelBoost *= getBoost(); // incorporate boost for (Weight w : weights) { // normalize all clauses, (even if prohibited in case of side affects) w.normalize(norm, topLevelBoost); } } @Override public Explanation explain(AtomicReaderContext context, int doc) throws IOException { final int minShouldMatch = BooleanQuery.this.getMinimumNumberShouldMatch(); ComplexExplanation sumExpl = new ComplexExplanation(); sumExpl.setDescription("sum of:"); int coord = 0; float sum = 0.0f; boolean fail = false; int shouldMatchCount = 0; Iterator
cIter = clauses.iterator(); for (Iterator
wIter = weights.iterator(); wIter.hasNext();) { Weight w = wIter.next(); BooleanClause c = cIter.next(); if (w.scorer(context, true, true, context.reader().getLiveDocs()) == null) { if (c.isRequired()) { fail = true; Explanation r = new Explanation(0.0f, "no match on required clause (" + c.getQuery().toString() + ")"); sumExpl.addDetail(r); } continue; } Explanation e = w.explain(context, doc); if (e.isMatch()) { if (!c.isProhibited()) { sumExpl.addDetail(e); sum += e.getValue(); coord++; } else { Explanation r = new Explanation(0.0f, "match on prohibited clause (" + c.getQuery().toString() + ")"); r.addDetail(e); sumExpl.addDetail(r); fail = true; } if (c.getOccur() == Occur.SHOULD) { shouldMatchCount++; } } else if (c.isRequired()) { Explanation r = new Explanation(0.0f, "no match on required clause (" + c.getQuery().toString() + ")"); r.addDetail(e); sumExpl.addDetail(r); fail = true; } } if (fail) { sumExpl.setMatch(Boolean.FALSE); sumExpl.setValue(0.0f); sumExpl.setDescription ("Failure to meet condition(s) of required/prohibited clause(s)"); return sumExpl; } else if (shouldMatchCount < minShouldMatch) { sumExpl.setMatch(Boolean.FALSE); sumExpl.setValue(0.0f); sumExpl.setDescription("Failure to match minimum number "+ "of optional clauses: " + minShouldMatch); return sumExpl; } sumExpl.setMatch(0 < coord ? Boolean.TRUE : Boolean.FALSE); sumExpl.setValue(sum); final float coordFactor = disableCoord ? 1.0f : coord(coord, maxCoord); if (coordFactor == 1.0f) { return sumExpl; // eliminate wrapper } else { ComplexExplanation result = new ComplexExplanation(sumExpl.isMatch(), sum*coordFactor, "product of:"); result.addDetail(sumExpl); result.addDetail(new Explanation(coordFactor, "coord("+coord+"/"+maxCoord+")")); return result; } } @Override public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder, boolean topScorer, Bits acceptDocs) throws IOException { List
required = new ArrayList
(); List
prohibited = new ArrayList
(); List
optional = new ArrayList
(); Iterator
cIter = clauses.iterator(); for (Weight w : weights) { BooleanClause c = cIter.next(); Scorer subScorer = w.scorer(context, true, false, acceptDocs); if (subScorer == null) { if (c.isRequired()) { return null; } } else if (c.isRequired()) { required.add(subScorer); } else if (c.isProhibited()) { prohibited.add(subScorer); } else { optional.add(subScorer); } } // NOTE: we could also use BooleanScorer, if we knew // this BooleanQuery was embedded in another // BooleanQuery that was also using BooleanScorer (ie, // BooleanScorer can nest). But this is hard to // detect and we never do so today... (ie, we only // return BooleanScorer for topScorer): // Check if we can and should return a BooleanScorer // TODO: (LUCENE-4872) in some cases BooleanScorer may be faster for minNrShouldMatch // but the same is even true of pure conjunctions... if (!scoreDocsInOrder && topScorer && required.size() == 0 && minNrShouldMatch <= 1) { return new BooleanScorer(this, disableCoord, minNrShouldMatch, optional, prohibited, maxCoord); } if (required.size() == 0 && optional.size() == 0) { // no required and optional clauses. return null; } else if (optional.size() < minNrShouldMatch) { // either >1 req scorer, or there are 0 req scorers and at least 1 // optional scorer. Therefore if there are not enough optional scorers // no documents will be matched by the query return null; } // simple conjunction if (optional.size() == 0 && prohibited.size() == 0) { float coord = disableCoord ? 1.0f : coord(required.size(), maxCoord); return new ConjunctionScorer(this, required.toArray(new Scorer[required.size()]), coord); } // simple disjunction if (required.size() == 0 && prohibited.size() == 0 && minNrShouldMatch <= 1 && optional.size() > 1) { float coord[] = new float[optional.size()+1]; for (int i = 0; i < coord.length; i++) { coord[i] = disableCoord ? 1.0f : coord(i, maxCoord); } return new DisjunctionSumScorer(this, optional.toArray(new Scorer[optional.size()]), coord); } // Return a BooleanScorer2 return new BooleanScorer2(this, disableCoord, minNrShouldMatch, required, prohibited, optional, maxCoord); } @Override public boolean scoresDocsOutOfOrder() { for (BooleanClause c : clauses) { if (c.isRequired()) { return false; // BS2 (in-order) will be used by scorer() } } // scorer() will return an out-of-order scorer if requested. return true; } } @Override public Weight createWeight(IndexSearcher searcher) throws IOException { return new BooleanWeight(searcher, disableCoord); } @Override public Query rewrite(IndexReader reader) throws IOException { if (minNrShouldMatch == 0 && clauses.size() == 1) { // optimize 1-clause queries BooleanClause c = clauses.get(0); if (!c.isProhibited()) { // just return clause Query query = c.getQuery().rewrite(reader); // rewrite first if (getBoost() != 1.0f) { // incorporate boost if (query == c.getQuery()) { // if rewrite was no-op query = query.clone(); // then clone before boost } // Since the BooleanQuery only has 1 clause, the BooleanQuery will be // written out. Therefore the rewritten Query's boost must incorporate both // the clause's boost, and the boost of the BooleanQuery itself query.setBoost(getBoost() * query.getBoost()); } return query; } } BooleanQuery clone = null; // recursively rewrite for (int i = 0 ; i < clauses.size(); i++) { BooleanClause c = clauses.get(i); Query query = c.getQuery().rewrite(reader); if (query != c.getQuery()) { // clause rewrote: must clone if (clone == null) { // The BooleanQuery clone is lazily initialized so only initialize // it if a rewritten clause differs from the original clause (and hasn't been // initialized already). If nothing differs, the clone isn't needlessly created clone = this.clone(); } clone.clauses.set(i, new BooleanClause(query, c.getOccur())); } } if (clone != null) { return clone; // some clauses rewrote } else { return this; // no clauses rewrote } } // inherit javadoc @Override public void extractTerms(Set
terms) { for (BooleanClause clause : clauses) { if (clause.getOccur() != Occur.MUST_NOT) { clause.getQuery().extractTerms(terms); } } } @Override @SuppressWarnings("unchecked") public BooleanQuery clone() { BooleanQuery clone = (BooleanQuery)super.clone(); clone.clauses = (ArrayList
) this.clauses.clone(); return clone; } /** Prints a user-readable version of this query. */ @Override public String toString(String field) { StringBuilder buffer = new StringBuilder(); boolean needParens= getBoost() != 1.0 || getMinimumNumberShouldMatch() > 0; if (needParens) { buffer.append("("); } for (int i = 0 ; i < clauses.size(); i++) { BooleanClause c = clauses.get(i); if (c.isProhibited()) { buffer.append("-"); } else if (c.isRequired()) { buffer.append("+"); } Query subQuery = c.getQuery(); if (subQuery != null) { if (subQuery instanceof BooleanQuery) { // wrap sub-bools in parens buffer.append("("); buffer.append(subQuery.toString(field)); buffer.append(")"); } else { buffer.append(subQuery.toString(field)); } } else { buffer.append("null"); } if (i != clauses.size()-1) { buffer.append(" "); } } if (needParens) { buffer.append(")"); } if (getMinimumNumberShouldMatch()>0) { buffer.append('~'); buffer.append(getMinimumNumberShouldMatch()); } if (getBoost() != 1.0f) { buffer.append(ToStringUtils.boost(getBoost())); } return buffer.toString(); } /** Returns true iff
o is equal to this. */ @Override public boolean equals(Object o) { if (!(o instanceof BooleanQuery)) { return false; } BooleanQuery other = (BooleanQuery)o; return this.getBoost() == other.getBoost() && this.clauses.equals(other.clauses) && this.getMinimumNumberShouldMatch() == other.getMinimumNumberShouldMatch() && this.disableCoord == other.disableCoord; } /** Returns a hash code value for this object.*/ @Override public int hashCode() { return Float.floatToIntBits(getBoost()) ^ clauses.hashCode() + getMinimumNumberShouldMatch() + (disableCoord ? 17:0); } }

   

下面是在网上看到得一个解决方法: 

可以通过设置: 
BooleanQuery.setMaxClauseCount(10000); 
来解决问题,但是这样带来的问题是会使得内存开销加大。容易出现OutOfMemory的异常所以需要非常谨慎处理。 

Lucene在做大量term值查询时, 如果这值过多, 超1024个term的话, 会出现

TooManyClauses[maxClauseCount is set to 1024] 的异常,因此建议在term过多的情况下采用filter, 而不是query。

以下是该情形在ES中的测试。

 

 

Java代码  
收藏代码
  1. Settings defaultSettings = ImmutableSettings.settingsBuilder().put("client.transport.sniff"true).build();  
  2.         Settings finalSettings = ImmutableSettings.settingsBuilder().put(defaultSettings)  
  3.                 .put("name", NetworkUtils.getLocalAddress().getHostName()).build();  
  4.         TransportClient tmp = new TransportClient(finalSettings);  
  5.         Client client = tmp.addTransportAddress(new InetSocketTransportAddress("127.0.0.1"9300));  
  6.         //demo 100万数据  
  7.         for (int i = 0; i < 1000000; i++)  
  8.         {  
  9.             client.prepareIndex("test2""book",String.valueOf(i)).setSource("bookid", String.valueOf(i), "booktype", String.valueOf(i%10000)).execute()  
  10.             .actionGet();  
  11.         }  
  12.         //demo 近1万个term  
  13.         String[] values = new String[10000];  
  14.         for (int i = 1; i < 10000; i++)  
  15.         {  
  16.             values[i] = String.valueOf(i);  
  17.         }  
  18.         //terms query  
  19.         //TermsQueryBuilder termQueryBuilder = new TermsQueryBuilder("booktype", values);  
  20.         TermsFilterBuilder termsFilterBuilder = new TermsFilterBuilder("booktype", values);  
  21. //      SearchResponse searchResponse = client.prepareSearch().setIndices("test2").setQuery(termQueryBuilder)  
  22. //              .setFrom(0).setSize(100).execute().actionGet();  
  23.         //terms filter  
  24.         SearchResponse searchResponse = client.prepareSearch().setIndices("test2").setQuery(QueryBuilders.matchAllQuery()).setFilter(termsFilterBuilder)  
  25.                 .setFrom(0).setSize(100).execute().actionGet();  
  26.         SearchHits hits = searchResponse.getHits();  
  27.         System.out.println(hits.totalHits());  
  28.         for (SearchHit searchHit : hits)  
  29.         {  
  30.             System.out.println(searchHit.getId() + ":" + searchHit.getSource().get("booktype"));  
  31.         }  

 上述结果会发现, 用TermsQueryBuilder查询的话, 会出现TooManyClauses的异常, 因为设置了9999个term值。因此,当term过多时,建议采用filter, 而不是query. 

 http://lucene-group.group.iteye.com/group/topic/10555

http://maxrocray.iteye.com/blog/1860946

转载于:https://my.oschina.net/xiaominmin/blog/1597101

你可能感兴趣的文章
(转)Windows xp 下如何压缩tar.gz格式
查看>>
《JavaScript高级程序设计》阅读笔记(六):ECMAScript中的运算符(二)
查看>>
玩Linux的第一天
查看>>
QQ互联OAuth2.0 .NET SDK 发布以及网站QQ登陆示例代码
查看>>
计算当天时间过去了几秒
查看>>
POJ 1082 Calendar Game
查看>>
js实现表格隔行变色,鼠标在该行放上移走的变色效果,还有全选,反选等
查看>>
android之JSON解析(二)
查看>>
Sharepoint学习笔记—Authentication-- 更改现有Sharepoint网站的认证方式,让其支持FBA:2.修改阶段...
查看>>
主流浏览器版本发布历史
查看>>
从源代码编译里程碑的 ICS ROM
查看>>
c#实现DES加密与解密
查看>>
"remote:error:refusing to update checked out branch:refs/heads/master"的解决办法
查看>>
百度域名信息
查看>>
php数据库配置文件一般做法
查看>>
【线性代数】正交投影
查看>>
CentOS下挂载U盘
查看>>
基于SQL和PYTHON的数据库数据查询select语句
查看>>
Java使用iText生成word文件的完美解决方案(亲测可行)
查看>>
如何高效地向Redis插入大量的数据
查看>>