CodeForge QQ客服 CodeForge 400电话 客服电话 4006316121

Estimate.java ( 文件浏览 )

  • 发布于2016-05-17
  • 浏览次数:0
  • 下载次数:0
  • 下载需 1 积分
  • 侵权举报
			package net.paoding.analysis.analyzer.estimate;

import java.io.IOException;
import java.io.PrintStream;
import java.io.Reader;
import java.io.StringReader;
import java.util.Iterator;
import java.util.LinkedList;

import net.paoding.analysis.analyzer.PaodingTokenizer;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenStream;

public class Estimate {

	private Analyzer analyzer;
	private String print;
	private PrintGate printGate;
	

	public Estimate() {

		this.setPrint("50");//默认只打印前50行分词效果
	
}

	public Estimate(Analyzer analyzer) {

		setAnalyzer(analyzer);
		this.setPrint("50");//默认只打印前50行分词效果
	
}

	public void setAnalyzer(Analyzer analyzer) {

		this.analyzer = analyzer;
	
}

	public Analyzer getAnalyzer() {

		return analyzer;
	
}

	public void setPrint(String print) {

		if (print == null || print.length() == 0 || print.equalsIgnoreCase("null") || print.equalsIgnoreCase("no")) {

			printGate = null;
			this.print = null;
		
}
		else {

			printGate = new LinePrintGate();
			printGate.setPrint(print, 10);
			this.print = print;
		
}
	
}

	public String getPrint() {

		return print;
	
}

	public void test(String input) {

		this.test(System.out, input);
	
}
	
	public void test(PrintStream out, String input) {

		Reader reader = new StringReaderEx(input);
		this.test(out, reader);
	
}

	public void test(PrintStream out, Reader reader) {

		try {

			long begin = System.currentTimeMillis();
			TokenStream ts = analyzer.tokenStream("", reader);
			Token token;
			LinkedList list = new LinkedList();
			int wordsCount = 0;
			while ((token = ts.next()) != null) {

				if (printGate != null && printGate.filter(wordsCount)) {

					list.add(new CToken(token, wordsCount));
				
}
				wordsCount++;
			
}
			long end = System.currentTimeMillis();
			int c = 0;
			if (list.size() > 0) {

				Iterator iter = list.iterator();
				CToken ctoken;
				while (iter.hasNext()) {

					ctoken = (CToken) iter.next();
					c = ctoken.i;
					token = ctoken.t;
					if (c % 10 == 0) {

						if (c != 0) {

							out.println();
						
}
						out.print((c/10 + 1)+ ":\t");
					
}
					out.print(token.termText() + "/");
				
}
			
}
			if (wordsCount == 0) {

				System.out.println("\tAll are noise characters or words");
			
} else {

				if (c % 10 != 1) {

					System.out.println();
				
}
				String inputLength = "<未知>";
				if (reader instanceof StringReaderEx) {

					inputLength = "" + ((StringReaderEx) reader).inputLength;
				
}
				else if (ts instanceof PaodingTokenizer) {

					inputLength = "" + ((PaodingTokenizer) ts).getInputLength();
				
}
				System.out.println();
				System.out.println("\t分词器" + analyzer.getClass().getName());
				System.out.println("\t内容长度 " + inputLength + "字符, 分 " + wordsCount
						+ "个词");
				System.out.println("\t分词耗时 " + (end - begin) + "ms ");
			
}
		
} catch (IOException e) {

			e.printStackTrace();
		
}
		finally {

			try {

				reader.close();
			
} catch (IOException e) {

			
}
		
}
	
}
	
	//-------------------------------------------
	
	static class CToken {

		Token t;
		int i;
		
		CToken(Token t, int i) {

			this.t = t;
			this.i = i;
		
}
	
}

	static interface PrintGate {

		public void setPrint(String print, int unitSize);
		boolean filter(int count);
	
}
	
	static class PrintGateToken implements PrintGate {

		private int begin;
		private int end;
		public void setBegin(int begin) {

			this.begin = begin;
		
}
		public void setEnd(int end) {

			this.end = end;
		
}

		public void setPrint(String print, int unitSize) {

			int i = print.indexOf('-');
			if (i > 0) {

				int bv = Integer.parseInt(print.substring(0, i));
				int ev = Integer.parseInt(print.substring(i + 1));
				setBegin(unitSize * (Math.abs(bv) - 1) );//第5行,是从第40开始的
				setEnd(unitSize * Math.abs(ev));//到第10行,是截止于100(不包含该边界)
			
}
			else {

				setBegin(0);
				int v = Integer.parseInt(print);
				setEnd(unitSize * (Math.abs(v)));
			
}
		
}
		public boolean filter(int count) {

			return count >= begin && count < end;
		
}
	
}
	
	static class LinePrintGate implements PrintGate {


		private PrintGate[] list;
		
		public void setPrint(String print, int unitSize) {

			String[] prints = print.split(",");
			list = new PrintGate[prints.length];
			for (int i = 0; i < prints.length; i++) {

				PrintGateToken pg = new PrintGateToken();
				pg.setPrint(prints[i], unitSize);
				list[i] = pg;
			
}
		
}
		
		public boolean filter(int count) {

			for (int i = 0; i < list.length; i++) {

				if (list[i].filter(count)) {

					return true;
				
}
			
}
			return false;
		
}
		
	
}
	
	static class StringReaderEx extends StringReader {

		private int inputLength;
		public StringReaderEx(String s) {

			super(s);
			inputLength = s.length();
		
}
	
}
	

}
			
...
展开> <收缩

下载源码到电脑,阅读使用更方便

1 积分

快速下载
还剩0行未阅读,继续阅读
Ʋ

源码文件列表

温馨提示: 点击源码文件名可预览文件内容哦 ^_^
...
名称 大小 修改日期
SimpleReadListener2.java.svn-base2.53 kB2012-10-10 10:55
SimpleReadListener.java.svn-base2.72 kB2012-10-10 10:55
ReadListener.java.svn-base936.00 B2012-10-10 10:55
FileWordsReader.java.svn-base3.95 kB2012-10-10 10:55
Difference.java.svn-base2.82 kB2012-10-10 10:55
Detector.java.svn-base3.14 kB2012-10-10 10:55
Node.java.svn-base1.88 kB2012-10-10 10:55
DifferenceListener.java.svn-base879.00 B2012-10-10 10:55
Snapshot.java.svn-base5.98 kB2012-10-10 10:55
ExtensionFileFilter.java.svn-base1.22 kB2012-10-10 10:55
Estimate.java.svn-base4.94 kB2012-10-10 10:55
TryPaodingAnalyzer.java.svn-base10.66 kB2012-10-10 10:55
MaxWordLengthTokenCollector.java.svn-base2.43 kB2012-10-10 10:55
MostWordsTokenCollector.java.svn-base2.88 kB2012-10-10 10:55
SortingDictionariesCompiler.java.svn-base7.04 kB2012-10-10 10:55
CompiledFileDictionaries.java.svn-base8.25 kB2012-10-10 10:55
MostWordsModeDictionariesCompiler.java.svn-base9.05 kB2012-10-10 10:55
all-wcprops854.00 B2012-10-10 10:55
format2.00 B2012-10-10 10:55
entries847.00 B2012-10-10 10:55
all-wcprops1.13 kB2012-10-10 10:55
format2.00 B2012-10-10 10:55
entries1.07 kB2012-10-10 10:55
all-wcprops449.00 B2012-10-10 10:55
format2.00 B2012-10-10 10:55
entries555.00 B2012-10-10 10:55
TokenCollector.java.svn-base966.00 B2012-10-10 10:55
PaodingAnalyzerBean.java.svn-base4.05 kB2012-10-10 10:55
PaodingAnalyzer.java.svn-base4.46 kB2012-10-10 10:55
PaodingTokenizer.java.svn-base5.00 kB2012-10-10 10:55
all-wcprops1.03 kB2012-10-10 10:55
format2.00 B2012-10-10 10:55
entries1.00 kB2012-10-10 10:55
PaodingAnalysisException.java.svn-base1.16 kB2012-10-10 10:55
KnifeBox.java.svn-base2.47 kB2012-10-10 10:55
LetterKnife.java.svn-base1.50 kB2012-10-10 10:55
PaodingMaker.java.svn-base21.26 kB2012-10-10 10:55
CharSet.java.svn-base2.13 kB2012-10-10 10:55
CombinatoricsKnife.java.svn-base10.65 kB2012-10-10 10:55
DictionariesCompiler.java.svn-base1.28 kB2012-10-10 10:55
FileDictionaries.java.svn-base12.74 kB2012-10-10 10:55
Dictionaries.java.svn-base1.85 kB2012-10-10 10:55
Knife.java.svn-base5.80 kB2012-10-10 10:55
Beef.java.svn-base3.84 kB2012-10-10 10:55
SmartKnifeBox.java.svn-base974.00 B2012-10-10 10:55
Collector.java.svn-base1.55 kB2012-10-10 10:55
Paoding.java.svn-base1.35 kB2012-10-10 10:55
FakeKnife.java.svn-base2.08 kB2012-10-10 10:55
CJKKnife.java.svn-base14.72 kB2012-10-10 10:55
NumberKnife.java.svn-base4.38 kB2012-10-10 10:55
DictionariesWare.java.svn-base853.00 B2012-10-10 10:55
FileDictionariesDifferenceListener.java.svn-base2.42 kB2012-10-10 10:55
CollectorStdoutImpl.java.svn-base1.18 kB2012-10-10 10:55
all-wcprops125.00 B2012-10-10 10:55
format2.00 B2012-10-10 10:55
entries318.00 B2012-10-10 10:55
ReadListener.java936.00 B2012-10-10 10:55
FileWordsReader.java3.95 kB2012-10-10 10:55
SimpleReadListener.java2.72 kB2012-10-10 10:55
SimpleReadListener2.java2.53 kB2012-10-10 10:55
Detector.java3.14 kB2012-10-10 10:55
Node.java1.88 kB2012-10-10 10:55
ExtensionFileFilter.java1.22 kB2012-10-10 10:55
Difference.java2.82 kB2012-10-10 10:55
DifferenceListener.java879.00 B2012-10-10 10:55
Snapshot.java5.98 kB2012-10-10 10:55
HashBinaryDictionary.java.svn-base6.67 kB2012-10-10 10:55
Word.java.svn-base1.84 kB2012-10-10 10:55
Dictionary.java.svn-base1.71 kB2012-10-10 10:55
BinaryDictionary.java.svn-base3.15 kB2012-10-10 10:55
Hit.java.svn-base5.01 kB2012-10-10 10:55
DictionaryDelegate.java.svn-base1.30 kB2012-10-10 10:55
TryPaodingAnalyzer.java10.66 kB2012-10-10 10:55
Estimate.java4.94 kB2012-10-10 10:55
all-wcprops750.00 B2012-10-10 10:55
format2.00 B2012-10-10 10:55
entries853.00 B2012-10-10 10:55
MaxWordLengthTokenCollector.java2.43 kB2012-10-10 10:55
CompiledFileDictionaries.java8.25 kB2012-10-10 10:55
SortingDictionariesCompiler.java7.04 kB2012-10-10 10:55
MostWordsModeDictionariesCompiler.java9.05 kB2012-10-10 10:55
MostWordsTokenCollector.java2.88 kB2012-10-10 10:55
all-wcprops291.00 B2012-10-10 10:55
format2.00 B2012-10-10 10:55
entries421.00 B2012-10-10 10:55
Constants.java.svn-base4.78 kB2012-10-10 10:55
all-wcprops2.88 kB2012-10-10 10:55
format2.00 B2012-10-10 10:55
entries2.76 kB2012-10-10 10:55
all-wcprops1.01 kB2012-10-10 10:55
format2.00 B2012-10-10 10:55
entries1.07 kB2012-10-10 10:55
PaodingTokenizer.java5.00 kB2012-10-10 10:55
TokenCollector.java966.00 B2012-10-10 10:55
PaodingAnalyzer.java4.46 kB2012-10-10 10:55
PaodingAnalyzerBean.java4.05 kB2012-10-10 10:55
ChineseTokenizerFactory.java1.63 kB2012-10-10 11:06
SolrPaodingTokenizer.java1.09 kB2012-10-10 11:06
PaodingAnalysisException.java1.16 kB2012-10-10 10:55
all-wcprops242.00 B2012-10-10 10:55
format2.00 B2012-10-10 10:55
entries458.00 B2012-10-10 10:55
SmartKnifeBox.java974.00 B2012-10-10 10:55
PaodingMaker.java21.26 kB2012-10-10 10:55
CJKKnife.java14.72 kB2012-10-10 10:55
DictionariesWare.java853.00 B2012-10-10 10:55
Knife.java5.80 kB2012-10-10 10:55
Collector.java1.55 kB2012-10-10 10:55
Paoding.java1.35 kB2012-10-10 10:55
FileDictionariesDifferenceListener.java2.42 kB2012-10-10 10:55
FakeKnife.java2.08 kB2012-10-10 10:55
CharSet.java2.13 kB2012-10-10 10:55
Dictionaries.java1.85 kB2012-10-10 10:55
CollectorStdoutImpl.java1.18 kB2012-10-10 10:55
FileDictionaries.java12.74 kB2012-10-10 10:55
CombinatoricsKnife.java10.65 kB2012-10-10 10:55
KnifeBox.java2.47 kB2012-10-10 10:55
Beef.java3.84 kB2012-10-10 10:55
LetterKnife.java1.50 kB2012-10-10 10:55
DictionariesCompiler.java1.28 kB2012-10-10 10:55
NumberKnife.java4.38 kB2012-10-10 10:55
BinaryDictionary.java3.15 kB2012-10-10 10:55
Dictionary.java1.71 kB2012-10-10 10:55
DictionaryDelegate.java1.30 kB2012-10-10 10:55
Word.java1.84 kB2012-10-10 10:55
HashBinaryDictionary.java6.67 kB2012-10-10 10:55
Hit.java5.01 kB2012-10-10 10:55
all-wcprops97.00 B2012-10-10 10:55
format2.00 B2012-10-10 10:55
entries273.00 B2012-10-10 10:55
Constants.java4.78 kB2012-10-10 10:55
all-wcprops89.00 B2012-10-10 10:55
format2.00 B2012-10-10 10:55
entries264.00 B2012-10-10 10:55
readme36.00 B2012-10-11 16:29
paoding-analysis.properties187.00 B2012-10-10 10:55
paoding-analysis-default.properties220.00 B2012-10-10 10:55
paoding-analyzer.properties389.00 B2012-10-10 10:55
paoding-dic-home.properties450.00 B2012-10-11 11:36
paoding-knives.properties212.00 B2012-10-10 10:55
paoding-knives-user.properties260.00 B2012-10-10 10:55
pom.xml2.99 kB2012-10-13 14:56
zh-solr-se-solr-paoding-analysis-0.1.jar103.19 kB2012-10-13 14:19
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
Ʋ

Estimate.java (316.60 kB)

需要 1 积分
您持有 积分

CodeForge积分(原CF币)全新升级,功能更强大,使用更便捷,不仅可以用来下载海量源代码马上还可兑换精美小礼品了 了解更多

您的积分不足

支付宝优惠套餐快速获取 30 积分

订单支付完成后,积分将自动加入到您的账号。以下是优惠期的人民币价格,优惠期过后将恢复美元价格。

更多付款方式:网银PayPal

上传代码,免费获取

您本次下载所消耗的积分将转交上传作者。

同一源码,30天内重复下载,只扣除一次积分。

登录 CodeForge

还没有CodeForge账号? 立即注册
关注微博
联系客服

Switch to the English version?

Yes
CodeForge 英文版
No
CodeForge 中文版

完善个人资料,获价值¥30元积分奖励!

^_^"呃 ...

Sorry!这位大神很神秘,未开通博客呢,请浏览一下其他的吧
好的