CodeForge QQ客服 CodeForge 400电话 客服电话 4006316121

NumberKnife.java ( 文件浏览 )

  • 发布于2016-05-17
  • 浏览次数:0
  • 下载次数:0
  • 下载需 1 积分
  • 侵权举报
			/**
 * Copyright 2007 The Apache Software Foundation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package net.paoding.analysis.knife;

import net.paoding.analysis.dictionary.Dictionary;
import net.paoding.analysis.dictionary.Hit;

/**
 * 
 * @author Zhiliang Wang [qieqie.wang@gmail.com]
 * 
 */
public class NumberKnife extends CombinatoricsKnife implements DictionariesWare {


	private Dictionary units;
	
	public NumberKnife() {

	
}

	public NumberKnife(Dictionaries dictionaries) {

		setDictionaries(dictionaries);
	
}

	public void setDictionaries(Dictionaries dictionaries) {

		super.setDictionaries(dictionaries);
		units = dictionaries.getUnitsDictionary();
	
}
	

	public int assignable(Beef beef, int offset, int index) {

		char ch = beef.charAt(index);
		if (CharSet.isArabianNumber(ch))
			return ASSIGNED;
		if (index > offset) {

			if (CharSet.isLantingLetter(ch) || ch == '.' || ch == '-' || ch == '_') {

				if (CharSet.isLantingLetter(ch)
						|| !CharSet.isArabianNumber(beef.charAt(index + 1))) {

					//分词效果
					//123.456		->123.456/
					//123.abc.34	->123/123.abc.34/abc/34/	["abc"、"abc/34"系由LetterKnife分出,非NumberKnife]
					//没有或判断!CharSet.isArabianNumber(beef.charAt(index + 1)),则分出"123.",而非"123"
					//123.abc.34	->123./123.abc.34/abc/34/
					return POINT;
				
}
				return ASSIGNED;
			
}
		
}
		return LIMIT;
	
}
	
	protected int collectLimit(Collector collector, Beef beef,
			int offset, int point, int limit, int dicWordVote) {

		// "123abc"的直接调用super的
		if (point != -1) {

			return super.collectLimit(collector, beef, offset, point, limit, dicWordVote);
		
}
		// 
		// 2.2两
		//    ^=_point
		//     
		final int _point = limit;
		// 当前尝试判断的字符的位置
		int curTail = offset;
		int number1 = -1;
		int number2 = -1;
		int bitValue = 0;
		int maxUnit = 0;
		//TODO:这里又重复从curTail(其值为offset)判断,重新遍历判断是否为数字,算是一个重复计算
		//但考虑这个计算对中文分词性能影响微乎其微暂时先不优化
		for (; (bitValue = CharSet.toNumber(beef.charAt(curTail))) >= 0; curTail++) {

			// 
			if (bitValue == 2
					&& (beef.charAt(curTail) == '两' || beef.charAt(curTail) == '俩' || beef
							.charAt(curTail) == '倆')) {

				if (curTail != offset) {

					break;
				
}
			
}
			// 处理连续汉字个位值的数字:"三四五六"	->"3456"
			if (bitValue >= 0 && bitValue < 10) {

				if (number2 < 0)
					number2 = bitValue;
				else {

					number2 *= 10;
					number2 += bitValue;
				
}
			
} else {

				if (number2 < 0) {

					if (number1 < 0) {

						number1 = 1;
					
}
					number1 *= bitValue;
				
} else {

					if (number1 < 0) {

						number1 = 0;
					
}
					if (bitValue >= maxUnit) {

						number1 += number2;
						number1 *= bitValue;
						maxUnit = bitValue;
					
} else {

						number1 += number2 * bitValue;
					
}
				
}
				number2 = -1;
			
}
		
}
		if (number2 > 0) {

			if (number1 < 0) {

				number1 = number2;
			
} else {

				number1 += number2;
			
}
		
}
		if (number1 >= 0 && curTail > _point) {

			doCollect(collector, String.valueOf(number1), beef, offset, curTail);
		
}
		else {

			super.collectLimit(collector, beef, offset, point, limit, dicWordVote);
		
}
		
		curTail = curTail > limit ? curTail : limit;
		
		//
		// 后面可能跟了计量单位
		if (units != null && CharSet.isCjkUnifiedIdeographs(beef.charAt(curTail))) {

			Hit wd = null;
			Hit wd2 = null;
			int i = curTail + 1;
			while ((wd = units.search(beef, curTail, i - curTail)).isHit()) {

				wd2 = wd;
				i++;
				if (!wd.isUnclosed()) {

					break;
				
}
			
}
			i --;
			if (wd2 != null) {

				collector.collect(wd2.getWord().getText(), curTail, i);
				return i;
			
}
		
}
		//
		
		return curTail > limit ? curTail : -1;
	
}



}
			
...
展开> <收缩

下载源码到电脑,阅读使用更方便

1 积分

快速下载
还剩0行未阅读,继续阅读
Ʋ

源码文件列表

温馨提示: 点击源码文件名可预览文件内容哦 ^_^
...
名称 大小 修改日期
SimpleReadListener2.java.svn-base2.53 kB2012-10-10 10:55
SimpleReadListener.java.svn-base2.72 kB2012-10-10 10:55
ReadListener.java.svn-base936.00 B2012-10-10 10:55
FileWordsReader.java.svn-base3.95 kB2012-10-10 10:55
Difference.java.svn-base2.82 kB2012-10-10 10:55
Detector.java.svn-base3.14 kB2012-10-10 10:55
Node.java.svn-base1.88 kB2012-10-10 10:55
DifferenceListener.java.svn-base879.00 B2012-10-10 10:55
Snapshot.java.svn-base5.98 kB2012-10-10 10:55
ExtensionFileFilter.java.svn-base1.22 kB2012-10-10 10:55
Estimate.java.svn-base4.94 kB2012-10-10 10:55
TryPaodingAnalyzer.java.svn-base10.66 kB2012-10-10 10:55
MaxWordLengthTokenCollector.java.svn-base2.43 kB2012-10-10 10:55
MostWordsTokenCollector.java.svn-base2.88 kB2012-10-10 10:55
SortingDictionariesCompiler.java.svn-base7.04 kB2012-10-10 10:55
CompiledFileDictionaries.java.svn-base8.25 kB2012-10-10 10:55
MostWordsModeDictionariesCompiler.java.svn-base9.05 kB2012-10-10 10:55
all-wcprops854.00 B2012-10-10 10:55
format2.00 B2012-10-10 10:55
entries847.00 B2012-10-10 10:55
all-wcprops1.13 kB2012-10-10 10:55
format2.00 B2012-10-10 10:55
entries1.07 kB2012-10-10 10:55
all-wcprops449.00 B2012-10-10 10:55
format2.00 B2012-10-10 10:55
entries555.00 B2012-10-10 10:55
TokenCollector.java.svn-base966.00 B2012-10-10 10:55
PaodingAnalyzerBean.java.svn-base4.05 kB2012-10-10 10:55
PaodingAnalyzer.java.svn-base4.46 kB2012-10-10 10:55
PaodingTokenizer.java.svn-base5.00 kB2012-10-10 10:55
all-wcprops1.03 kB2012-10-10 10:55
format2.00 B2012-10-10 10:55
entries1.00 kB2012-10-10 10:55
PaodingAnalysisException.java.svn-base1.16 kB2012-10-10 10:55
KnifeBox.java.svn-base2.47 kB2012-10-10 10:55
LetterKnife.java.svn-base1.50 kB2012-10-10 10:55
PaodingMaker.java.svn-base21.26 kB2012-10-10 10:55
CharSet.java.svn-base2.13 kB2012-10-10 10:55
CombinatoricsKnife.java.svn-base10.65 kB2012-10-10 10:55
DictionariesCompiler.java.svn-base1.28 kB2012-10-10 10:55
FileDictionaries.java.svn-base12.74 kB2012-10-10 10:55
Dictionaries.java.svn-base1.85 kB2012-10-10 10:55
Knife.java.svn-base5.80 kB2012-10-10 10:55
Beef.java.svn-base3.84 kB2012-10-10 10:55
SmartKnifeBox.java.svn-base974.00 B2012-10-10 10:55
Collector.java.svn-base1.55 kB2012-10-10 10:55
Paoding.java.svn-base1.35 kB2012-10-10 10:55
FakeKnife.java.svn-base2.08 kB2012-10-10 10:55
CJKKnife.java.svn-base14.72 kB2012-10-10 10:55
NumberKnife.java.svn-base4.38 kB2012-10-10 10:55
DictionariesWare.java.svn-base853.00 B2012-10-10 10:55
FileDictionariesDifferenceListener.java.svn-base2.42 kB2012-10-10 10:55
CollectorStdoutImpl.java.svn-base1.18 kB2012-10-10 10:55
all-wcprops125.00 B2012-10-10 10:55
format2.00 B2012-10-10 10:55
entries318.00 B2012-10-10 10:55
ReadListener.java936.00 B2012-10-10 10:55
FileWordsReader.java3.95 kB2012-10-10 10:55
SimpleReadListener.java2.72 kB2012-10-10 10:55
SimpleReadListener2.java2.53 kB2012-10-10 10:55
Detector.java3.14 kB2012-10-10 10:55
Node.java1.88 kB2012-10-10 10:55
ExtensionFileFilter.java1.22 kB2012-10-10 10:55
Difference.java2.82 kB2012-10-10 10:55
DifferenceListener.java879.00 B2012-10-10 10:55
Snapshot.java5.98 kB2012-10-10 10:55
HashBinaryDictionary.java.svn-base6.67 kB2012-10-10 10:55
Word.java.svn-base1.84 kB2012-10-10 10:55
Dictionary.java.svn-base1.71 kB2012-10-10 10:55
BinaryDictionary.java.svn-base3.15 kB2012-10-10 10:55
Hit.java.svn-base5.01 kB2012-10-10 10:55
DictionaryDelegate.java.svn-base1.30 kB2012-10-10 10:55
TryPaodingAnalyzer.java10.66 kB2012-10-10 10:55
Estimate.java4.94 kB2012-10-10 10:55
all-wcprops750.00 B2012-10-10 10:55
format2.00 B2012-10-10 10:55
entries853.00 B2012-10-10 10:55
MaxWordLengthTokenCollector.java2.43 kB2012-10-10 10:55
CompiledFileDictionaries.java8.25 kB2012-10-10 10:55
SortingDictionariesCompiler.java7.04 kB2012-10-10 10:55
MostWordsModeDictionariesCompiler.java9.05 kB2012-10-10 10:55
MostWordsTokenCollector.java2.88 kB2012-10-10 10:55
all-wcprops291.00 B2012-10-10 10:55
format2.00 B2012-10-10 10:55
entries421.00 B2012-10-10 10:55
Constants.java.svn-base4.78 kB2012-10-10 10:55
all-wcprops2.88 kB2012-10-10 10:55
format2.00 B2012-10-10 10:55
entries2.76 kB2012-10-10 10:55
all-wcprops1.01 kB2012-10-10 10:55
format2.00 B2012-10-10 10:55
entries1.07 kB2012-10-10 10:55
PaodingTokenizer.java5.00 kB2012-10-10 10:55
TokenCollector.java966.00 B2012-10-10 10:55
PaodingAnalyzer.java4.46 kB2012-10-10 10:55
PaodingAnalyzerBean.java4.05 kB2012-10-10 10:55
ChineseTokenizerFactory.java1.63 kB2012-10-10 11:06
SolrPaodingTokenizer.java1.09 kB2012-10-10 11:06
PaodingAnalysisException.java1.16 kB2012-10-10 10:55
all-wcprops242.00 B2012-10-10 10:55
format2.00 B2012-10-10 10:55
entries458.00 B2012-10-10 10:55
SmartKnifeBox.java974.00 B2012-10-10 10:55
PaodingMaker.java21.26 kB2012-10-10 10:55
CJKKnife.java14.72 kB2012-10-10 10:55
DictionariesWare.java853.00 B2012-10-10 10:55
Knife.java5.80 kB2012-10-10 10:55
Collector.java1.55 kB2012-10-10 10:55
Paoding.java1.35 kB2012-10-10 10:55
FileDictionariesDifferenceListener.java2.42 kB2012-10-10 10:55
FakeKnife.java2.08 kB2012-10-10 10:55
CharSet.java2.13 kB2012-10-10 10:55
Dictionaries.java1.85 kB2012-10-10 10:55
CollectorStdoutImpl.java1.18 kB2012-10-10 10:55
FileDictionaries.java12.74 kB2012-10-10 10:55
CombinatoricsKnife.java10.65 kB2012-10-10 10:55
KnifeBox.java2.47 kB2012-10-10 10:55
Beef.java3.84 kB2012-10-10 10:55
LetterKnife.java1.50 kB2012-10-10 10:55
DictionariesCompiler.java1.28 kB2012-10-10 10:55
NumberKnife.java4.38 kB2012-10-10 10:55
BinaryDictionary.java3.15 kB2012-10-10 10:55
Dictionary.java1.71 kB2012-10-10 10:55
DictionaryDelegate.java1.30 kB2012-10-10 10:55
Word.java1.84 kB2012-10-10 10:55
HashBinaryDictionary.java6.67 kB2012-10-10 10:55
Hit.java5.01 kB2012-10-10 10:55
all-wcprops97.00 B2012-10-10 10:55
format2.00 B2012-10-10 10:55
entries273.00 B2012-10-10 10:55
Constants.java4.78 kB2012-10-10 10:55
all-wcprops89.00 B2012-10-10 10:55
format2.00 B2012-10-10 10:55
entries264.00 B2012-10-10 10:55
readme36.00 B2012-10-11 16:29
paoding-analysis.properties187.00 B2012-10-10 10:55
paoding-analysis-default.properties220.00 B2012-10-10 10:55
paoding-analyzer.properties389.00 B2012-10-10 10:55
paoding-dic-home.properties450.00 B2012-10-11 11:36
paoding-knives.properties212.00 B2012-10-10 10:55
paoding-knives-user.properties260.00 B2012-10-10 10:55
pom.xml2.99 kB2012-10-13 14:56
zh-solr-se-solr-paoding-analysis-0.1.jar103.19 kB2012-10-13 14:19
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
Ʋ

NumberKnife.java (316.60 kB)

需要 1 积分
您持有 积分

CodeForge积分(原CF币)全新升级,功能更强大,使用更便捷,不仅可以用来下载海量源代码马上还可兑换精美小礼品了 了解更多

您的积分不足

支付宝优惠套餐快速获取 30 积分

订单支付完成后,积分将自动加入到您的账号。以下是优惠期的人民币价格,优惠期过后将恢复美元价格。

更多付款方式:网银PayPal

上传代码,免费获取

您本次下载所消耗的积分将转交上传作者。

同一源码,30天内重复下载,只扣除一次积分。

登录 CodeForge

还没有CodeForge账号? 立即注册
关注微博
联系客服

Switch to the English version?

Yes
CodeForge 英文版
No
CodeForge 中文版

完善个人资料,获价值¥30元积分奖励!

^_^"呃 ...

Sorry!这位大神很神秘,未开通博客呢,请浏览一下其他的吧
好的