TryPaodingAnalyzer.java ( File view )

  • By 2016-05-17
  • View(s):0
  • Download(s):0
  • Point(s): 1
			package net.paoding.analysis.analyzer.estimate;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.net.URL;

import net.paoding.analysis.analyzer.PaodingAnalyzer;
import net.paoding.analysis.knife.PaodingMaker;

import org.apache.lucene.analysis.Analyzer;

public class TryPaodingAnalyzer {

	private static final String ARGS_TIP = ":";
	static String input = null;
	static String file = null;
	static Reader reader = null;
	static String charset = null;
	static String mode = null;
	static String analyzerName = null;
	static String print = null;
	static String properties = PaodingMaker.DEFAULT_PROPERTIES_PATH;
	
	public static void main(String[] args) {

		try {

			resetArgs();
			
			int inInput = 0;
			for (int i = 0; i < args.length; i++) {

				if (args[i] == null || (args[i] = args[i].trim()).length() == 0) {

					continue;
				
}
				if (args[i].equals("--file") || args[i].equals("-f")) {

					file = args[++i];
				
} else if (args[i].equals("--charset") || args[i].equals("-c")) {

					charset = args[++i];
				
} else if (args[i].equals("--mode") || args[i].equals("-m")) {

					mode = args[++i];
				
} else if (args[i].equals("--properties") || args[i].equals("-p")) {

					properties = args[++i];
				
} else if (args[i].equals("--analyzer") || args[i].equals("-a")) {

					analyzerName = args[++i];
				
} else if (args[i].equals("--print") || args[i].equals("-P")) {

					print = args[++i];
				
} else if (args[i].equals("--input") || args[i].equals("-i")) {

					inInput++;
				
} else if (args[i].equals("--help") || args[i].equals("-h")
						|| args[i].equals("?")) {

					printHelp();
					return;
				
} else {

					// 非选项的参数数组视为input
					if (!args[i].startsWith("-")
							&& (i == 0 || args[i - 1].equals("-i") || args[i - 1].equals("--input") || !args[i - 1].startsWith("-"))) {

						if (input == null) {

							input = args[i];// !!没有++i
						
} else {

							input = input + ' ' + args[i];// !!没有++i
						
}
						inInput++;
					
}
				
}
			
}
			if (file != null) {

				input = null;
				reader = getReader(file, charset);
			
}
			//
			analysing();
		
} catch (Exception e1) {

			resetArgs();
			e1.printStackTrace();
		
}
	
}



	private static void resetArgs() {

		input = null;
		file = null;
		reader = null;
		charset = null;
		mode = null;
		print = null;
		analyzerName = null;
		properties = PaodingMaker.DEFAULT_PROPERTIES_PATH;
	
}
	

	
	private static void analysing() throws Exception {

		Analyzer analyzer;
		if (analyzerName == null || analyzerName.length() == 0 || analyzerName.equalsIgnoreCase("paoding")) {

			//properties==null等同于new new PaodingAnalyzer();
			analyzer = new PaodingAnalyzer(properties);
			if (mode != null) {

				((PaodingAnalyzer) analyzer).setMode(mode);
			
}
		
}
		else {

			Class clz;
			if (analyzerName.equalsIgnoreCase("standard")) {

				analyzerName = "org.apache.lucene.analysis.standard.StandardAnalyzer";
			
}
			else if (analyzerName.equalsIgnoreCase("cjk")) {

				analyzerName = "org.apache.lucene.analysis.cjk.CJKAnalyzer";
			
}
			else if (analyzerName.equalsIgnoreCase("cn") || analyzerName.equalsIgnoreCase("chinese")) {

				analyzerName = "org.apache.lucene.analysis.cn.ChineseAnalyzer";
			
}
			else if (analyzerName.equalsIgnoreCase("st") || analyzerName.equalsIgnoreCase("standard")) {

				analyzerName = "org.apache.lucene.analysis.standard.StandardAnalyzer";
			
}
			clz = Class.forName(analyzerName);
			analyzer = (Analyzer) clz.newInstance();
		
}
		boolean readInputFromConsle = false;
		Estimate estimate = new Estimate(analyzer);
		if (print != null) {

			estimate.setPrint(print);
		
}
		while (true) {

			if (reader == null) {

				if (input == null || input.length() == 0 || readInputFromConsle) {

					input = getInputFromConsole();
					readInputFromConsle = true;
				
}
				if (input == null || input.length() == 0) {

					System.out.println("Warn: none charactors you input!!");
					continue;
				
}
				else if (input.startsWith(ARGS_TIP)) {

					String argsStr = input.substring(ARGS_TIP.length());
					main(argsStr.split(" "));
					continue;
				
}
			
}
			if (reader != null) {

				estimate.test(System.out, reader);
				reader = null;
			
}
			else {

				estimate.test(System.out, input);
				input = null;
			
}
			System.out.println("--------------------------------------------------");
			if (false == readInputFromConsle) {

				return;
			
}
		
}
	
}

	private static String getInputFromConsole() throws IOException {

		printTitleIfNotPrinted("");
		String input = null;
		BufferedReader reader = new BufferedReader(new InputStreamReader(
				System.in));
		String line;
		do {

			System.out.print("paoding> ");
			line = reader.readLine();
			if (line == null || line.length() == 0) {

				continue;
			
}
			if (line.equals(ARGS_TIP + "clear") || line.equals(ARGS_TIP + "c")) {

				input = null;
				System.out.println("paoding> Cleared");
				return getInputFromConsole();
			
}
			else if (line.equals(ARGS_TIP + "exit") || line.equals(ARGS_TIP + "quit") || line.equals(ARGS_TIP + "e") || line.equals(ARGS_TIP + "q") ) {

				System.out.println("Bye!");
				System.exit(0);
			
}
			else if (input == null && line.startsWith(ARGS_TIP)) {

				input = line;
				break;
			
}
			else {

				if (line.endsWith(";")) {

					if (line.length() > ";".length()) {

						input = line.substring(0, line.length() - ";".length());
					
}
					break;
				
}
				else {

					if (input == null) {

						input = line;
					
} else {

						input = input + "\n" + line;
					
}
				
}
			
}
		
} while (true);
		return input == null ? null : input.trim();
	
}

	private static void printHelp() {

		String app = System.getProperty("paoding.try.app",
				"TryPaodingAnalyzer");
		String cmd = System.getProperty("paoding.try.cmd", "java "
				+ TryPaodingAnalyzer.class.getName());
		System.out.println(app + "的用法:");
		System.out.println("\t" + cmd + " [OPTIONS] [text_content]");
		System.out.println("\nOPTIONS:");
		System.out.println("\t--file, -f:\n\t\t文章以文件的形式输入,在前缀加上\"classpath:\"表示从类路径中寻找该文件。");
		System.out.println("\t--charset, -c:\n\t\t文章的字符集编码,比如gbk,utf-8等。如果没有设置该选项,则使用Java环境默认的字符集编码。");
		System.out.println("\t--properties, -p:\n\t\t不读取默认的类路径下的庖丁分词属性文件,而使用指定的文件,在前缀加上\"classpath:\"表示从类路径中寻找该文件。");
		System.out.println("\t--mode, -m:\n\t\t强制使用给定的mode的分词器;可以设定为default,most-words,max-word-length或指定类名的其他mode(指定类名的,需要加前缀\"class:\")。");
		System.out.println("\t--input, -i:\n\t\t要被分词的文章内容;当没有通过-f或--file指定文章输入文件时可选择这个选项指定要被分词的内容。");
		System.out.println("\t--analyzer, -a:\n\t\t测试其他分词器,通过--analyzer或-a指定其完整类名。特别地,paoding、cjk、chinese、st分别对应PaodingAnalyzer、CJKAnalyzer、ChineseAnalyzer、StandardAnalyzer");
		System.out.println("\t--print, -P:\n\t\t 是否打印分词结果。默认打印前50行。规则:no表示不打印;50等价于1-50行;1-50表示打印1至50行;可以以逗号组合使用,如20,40-50表示打印1-20以及40-50行");
		System.out.println("\n示例:");
		System.out.println("\t" + cmd);
		System.out.println("\t" + cmd + " ?");
		System.out.println("\t" + cmd + " 中华人民共和国");
		System.out.println("\t" + cmd + " -m max 中华人民共和国");
		System.out.println("\t" + cmd + " -f e:/content.txt -c utf8");
		System.out.println("\t" + cmd + " -f e:/content.txt -c utf8 -m max-word-length");
		System.out.println("\t" + cmd + " -f e:/content.txt -c utf8 -a cjk");
		System.out.println("\n若是控制台进入\"paoding>\"后:");
		titlePrinted = false;
		printTitleIfNotPrinted("\t");
	
}
	
	
	private static boolean titlePrinted = false;
	private static boolean welcomePrinted = false;
	private static void printTitleIfNotPrinted(String prefix) {

		if (!titlePrinted) {

			System.out.println();
			if (!welcomePrinted) {

				System.out.println("Welcome to Paoding Analyser(2.0.4-alpha2)");
				System.out.println();
				welcomePrinted = true;
			
}
			System.out.println(prefix + "直接输入或粘贴要被分词的内容,以分号;结束,回车后开始分词。");
			System.out.println(prefix + "另起一行输入:clear或:c,使此次输入无效,用以重新输入。");
			System.out.println(prefix + "要使用命令行参数读入文件内容或其他参数请以冒号:开始,然后输入参数选项。");
			System.out.println(prefix + "退出,请输入:quit或:q、:exit、:e");
			System.out.println(prefix + "需要帮助,请输入:?");
			System.out.println(prefix + "注意:指定对文件分词之前要了解该文件的编码,如果系统编码和文件编码不一致,要通过-c指定文件的编码。");
			System.out.println();
			titlePrinted = true;
		
}
	
}
	
		
	static String getContent(String path, String encoding) throws IOException {

		return (String) read(path, encoding, true);
	
}
	
	static Reader getReader(String path, String encoding) throws IOException {

		return (Reader) read(path, encoding, false);
	
}
	
	static Object read(String path, String encoding, boolean return_string) throws IOException {

		InputStream in;
		if (path.startsWith("classpath:")) {

			path = path.substring("classpath:".length());
			URL url = Estimate.class.getClassLoader().getResource(path);
			if (url == null) {

				throw new IllegalArgumentException("Not found " + path
						+ " in classpath.");
			
}
			System.out.println("read content from:" + url.getFile());
			in = url.openStream();
		
} else {

			File f = new File(path);
			if (!f.exists()) {

			
...
...
(Not finished, please download and read the complete file)
			
...
Expand> <Close

Want complete source code? Download it here

Point(s): 1

Download
0 lines left, continue to read
Sponsored links

File list

Tips: You can preview the content of files by clicking file names^_^
Name Size Date
SimpleReadListener2.java.svn-base2.53 kB2012-10-10 10:55
SimpleReadListener.java.svn-base2.72 kB2012-10-10 10:55
ReadListener.java.svn-base936.00 B2012-10-10 10:55
FileWordsReader.java.svn-base3.95 kB2012-10-10 10:55
Difference.java.svn-base2.82 kB2012-10-10 10:55
Detector.java.svn-base3.14 kB2012-10-10 10:55
Node.java.svn-base1.88 kB2012-10-10 10:55
DifferenceListener.java.svn-base879.00 B2012-10-10 10:55
Snapshot.java.svn-base5.98 kB2012-10-10 10:55
ExtensionFileFilter.java.svn-base1.22 kB2012-10-10 10:55
Estimate.java.svn-base4.94 kB2012-10-10 10:55
TryPaodingAnalyzer.java.svn-base10.66 kB2012-10-10 10:55
MaxWordLengthTokenCollector.java.svn-base2.43 kB2012-10-10 10:55
MostWordsTokenCollector.java.svn-base2.88 kB2012-10-10 10:55
SortingDictionariesCompiler.java.svn-base7.04 kB2012-10-10 10:55
CompiledFileDictionaries.java.svn-base8.25 kB2012-10-10 10:55
MostWordsModeDictionariesCompiler.java.svn-base9.05 kB2012-10-10 10:55
all-wcprops854.00 B2012-10-10 10:55
format2.00 B2012-10-10 10:55
entries847.00 B2012-10-10 10:55
all-wcprops1.13 kB2012-10-10 10:55
format2.00 B2012-10-10 10:55
entries1.07 kB2012-10-10 10:55
all-wcprops449.00 B2012-10-10 10:55
format2.00 B2012-10-10 10:55
entries555.00 B2012-10-10 10:55
TokenCollector.java.svn-base966.00 B2012-10-10 10:55
PaodingAnalyzerBean.java.svn-base4.05 kB2012-10-10 10:55
PaodingAnalyzer.java.svn-base4.46 kB2012-10-10 10:55
PaodingTokenizer.java.svn-base5.00 kB2012-10-10 10:55
all-wcprops1.03 kB2012-10-10 10:55
format2.00 B2012-10-10 10:55
entries1.00 kB2012-10-10 10:55
PaodingAnalysisException.java.svn-base1.16 kB2012-10-10 10:55
KnifeBox.java.svn-base2.47 kB2012-10-10 10:55
LetterKnife.java.svn-base1.50 kB2012-10-10 10:55
PaodingMaker.java.svn-base21.26 kB2012-10-10 10:55
CharSet.java.svn-base2.13 kB2012-10-10 10:55
CombinatoricsKnife.java.svn-base10.65 kB2012-10-10 10:55
DictionariesCompiler.java.svn-base1.28 kB2012-10-10 10:55
FileDictionaries.java.svn-base12.74 kB2012-10-10 10:55
Dictionaries.java.svn-base1.85 kB2012-10-10 10:55
Knife.java.svn-base5.80 kB2012-10-10 10:55
Beef.java.svn-base3.84 kB2012-10-10 10:55
SmartKnifeBox.java.svn-base974.00 B2012-10-10 10:55
Collector.java.svn-base1.55 kB2012-10-10 10:55
Paoding.java.svn-base1.35 kB2012-10-10 10:55
FakeKnife.java.svn-base2.08 kB2012-10-10 10:55
CJKKnife.java.svn-base14.72 kB2012-10-10 10:55
NumberKnife.java.svn-base4.38 kB2012-10-10 10:55
DictionariesWare.java.svn-base853.00 B2012-10-10 10:55
FileDictionariesDifferenceListener.java.svn-base2.42 kB2012-10-10 10:55
CollectorStdoutImpl.java.svn-base1.18 kB2012-10-10 10:55
all-wcprops125.00 B2012-10-10 10:55
format2.00 B2012-10-10 10:55
entries318.00 B2012-10-10 10:55
ReadListener.java936.00 B2012-10-10 10:55
FileWordsReader.java3.95 kB2012-10-10 10:55
SimpleReadListener.java2.72 kB2012-10-10 10:55
SimpleReadListener2.java2.53 kB2012-10-10 10:55
Detector.java3.14 kB2012-10-10 10:55
Node.java1.88 kB2012-10-10 10:55
ExtensionFileFilter.java1.22 kB2012-10-10 10:55
Difference.java2.82 kB2012-10-10 10:55
DifferenceListener.java879.00 B2012-10-10 10:55
Snapshot.java5.98 kB2012-10-10 10:55
HashBinaryDictionary.java.svn-base6.67 kB2012-10-10 10:55
Word.java.svn-base1.84 kB2012-10-10 10:55
Dictionary.java.svn-base1.71 kB2012-10-10 10:55
BinaryDictionary.java.svn-base3.15 kB2012-10-10 10:55
Hit.java.svn-base5.01 kB2012-10-10 10:55
DictionaryDelegate.java.svn-base1.30 kB2012-10-10 10:55
TryPaodingAnalyzer.java10.66 kB2012-10-10 10:55
Estimate.java4.94 kB2012-10-10 10:55
all-wcprops750.00 B2012-10-10 10:55
format2.00 B2012-10-10 10:55
entries853.00 B2012-10-10 10:55
MaxWordLengthTokenCollector.java2.43 kB2012-10-10 10:55
CompiledFileDictionaries.java8.25 kB2012-10-10 10:55
SortingDictionariesCompiler.java7.04 kB2012-10-10 10:55
MostWordsModeDictionariesCompiler.java9.05 kB2012-10-10 10:55
MostWordsTokenCollector.java2.88 kB2012-10-10 10:55
all-wcprops291.00 B2012-10-10 10:55
format2.00 B2012-10-10 10:55
entries421.00 B2012-10-10 10:55
Constants.java.svn-base4.78 kB2012-10-10 10:55
all-wcprops2.88 kB2012-10-10 10:55
format2.00 B2012-10-10 10:55
entries2.76 kB2012-10-10 10:55
all-wcprops1.01 kB2012-10-10 10:55
format2.00 B2012-10-10 10:55
entries1.07 kB2012-10-10 10:55
PaodingTokenizer.java5.00 kB2012-10-10 10:55
TokenCollector.java966.00 B2012-10-10 10:55
PaodingAnalyzer.java4.46 kB2012-10-10 10:55
PaodingAnalyzerBean.java4.05 kB2012-10-10 10:55
ChineseTokenizerFactory.java1.63 kB2012-10-10 11:06
SolrPaodingTokenizer.java1.09 kB2012-10-10 11:06
PaodingAnalysisException.java1.16 kB2012-10-10 10:55
all-wcprops242.00 B2012-10-10 10:55
format2.00 B2012-10-10 10:55
entries458.00 B2012-10-10 10:55
SmartKnifeBox.java974.00 B2012-10-10 10:55
PaodingMaker.java21.26 kB2012-10-10 10:55
CJKKnife.java14.72 kB2012-10-10 10:55
DictionariesWare.java853.00 B2012-10-10 10:55
Knife.java5.80 kB2012-10-10 10:55
Collector.java1.55 kB2012-10-10 10:55
Paoding.java1.35 kB2012-10-10 10:55
FileDictionariesDifferenceListener.java2.42 kB2012-10-10 10:55
FakeKnife.java2.08 kB2012-10-10 10:55
CharSet.java2.13 kB2012-10-10 10:55
Dictionaries.java1.85 kB2012-10-10 10:55
CollectorStdoutImpl.java1.18 kB2012-10-10 10:55
FileDictionaries.java12.74 kB2012-10-10 10:55
CombinatoricsKnife.java10.65 kB2012-10-10 10:55
KnifeBox.java2.47 kB2012-10-10 10:55
Beef.java3.84 kB2012-10-10 10:55
LetterKnife.java1.50 kB2012-10-10 10:55
DictionariesCompiler.java1.28 kB2012-10-10 10:55
NumberKnife.java4.38 kB2012-10-10 10:55
BinaryDictionary.java3.15 kB2012-10-10 10:55
Dictionary.java1.71 kB2012-10-10 10:55
DictionaryDelegate.java1.30 kB2012-10-10 10:55
Word.java1.84 kB2012-10-10 10:55
HashBinaryDictionary.java6.67 kB2012-10-10 10:55
Hit.java5.01 kB2012-10-10 10:55
all-wcprops97.00 B2012-10-10 10:55
format2.00 B2012-10-10 10:55
entries273.00 B2012-10-10 10:55
Constants.java4.78 kB2012-10-10 10:55
all-wcprops89.00 B2012-10-10 10:55
format2.00 B2012-10-10 10:55
entries264.00 B2012-10-10 10:55
readme36.00 B2012-10-11 16:29
paoding-analysis.properties187.00 B2012-10-10 10:55
paoding-analysis-default.properties220.00 B2012-10-10 10:55
paoding-analyzer.properties389.00 B2012-10-10 10:55
paoding-dic-home.properties450.00 B2012-10-11 11:36
paoding-knives.properties212.00 B2012-10-10 10:55
paoding-knives-user.properties260.00 B2012-10-10 10:55
pom.xml2.99 kB2012-10-13 14:56
zh-solr-se-solr-paoding-analysis-0.1.jar103.19 kB2012-10-13 14:19
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
01.96 kB
...
Sponsored links

TryPaodingAnalyzer.java (316.60 kB)

Need 1 point
Your Point(s)

Your Point isn't enough.

Get point immediately by PayPal

More(Debit card / Credit card / PayPal Credit / Online Banking)

Submit your source codes. Get more point

LOGIN

Don't have an account? Register now
Need any help?
Mail to: support@codeforge.com

切换到中文版?

CodeForge Chinese Version
CodeForge English Version

Where are you going?

^_^"Oops ...

Sorry!This guy is mysterious, its blog hasn't been opened, try another, please!
OK

Warm tip!

CodeForge to FavoriteFavorite by Ctrl+D