CodeForge QQ客服 CodeForge 400电话 客服电话 4006316121

SortingDictionariesCompiler.java ( 文件浏览 )

  • 发布于2016-05-17
  • 浏览次数:0
  • 下载次数:0
  • 下载需 1 积分
  • 侵权举报
			package net.paoding.analysis.analyzer.impl;

import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.UnsupportedEncodingException;
import java.util.Properties;

import net.paoding.analysis.Constants;
import net.paoding.analysis.dictionary.Dictionary;
import net.paoding.analysis.dictionary.Word;
import net.paoding.analysis.knife.Dictionaries;
import net.paoding.analysis.knife.DictionariesCompiler;
import net.paoding.analysis.knife.Knife;

public class SortingDictionariesCompiler implements DictionariesCompiler {

	public static final String VERSION = "2";
	
	public boolean shouldCompile(Properties p) throws Exception {

		String lastModifieds = p.getProperty("paoding.analysis.properties.lastModifieds");
		String files = p.getProperty("paoding.analysis.properties.files");
		String dicHome = p.getProperty("paoding.dic.home.absolute.path");
		File dicHomeFile = new File(dicHome);
		File compliedMetadataFile = new File(dicHomeFile, ".compiled/sorting/.metadata");
		if (compliedMetadataFile.exists() && compliedMetadataFile.isFile()) {

			Properties compiledProperties = new Properties();
			InputStream compiledPropertiesInput = new FileInputStream(compliedMetadataFile);
			compiledProperties.load(compiledPropertiesInput);
			compiledPropertiesInput.close();
			String compiledLastModifieds = compiledProperties.getProperty("paoding.analysis.properties.lastModifieds");
			String compiledFiles = compiledProperties.getProperty("paoding.analysis.properties.files");
			String clazz = compiledProperties.getProperty("paoding.analysis.compiler.class");
			String version = compiledProperties.getProperty("paoding.analysis.compiler.version");
			if (lastModifieds.equals(compiledLastModifieds) && files.equals(compiledFiles)
					&& this.getClass().getName().equalsIgnoreCase(clazz)
					&& VERSION.equalsIgnoreCase(version)) {

				return false;
			
}
		
}
		return true;
	
}
	
	
	public void compile(Dictionaries dictionaries, Knife knife, Properties p) throws Exception {

		String dicHome = p.getProperty("paoding.dic.home.absolute.path");
		String noiseCharactor = getProperty(p, Constants.DIC_NOISE_CHARACTOR);
		String noiseWord = getProperty(p, Constants.DIC_NOISE_WORD);
		String unit = getProperty(p, Constants.DIC_UNIT);
		String confucianFamilyName = getProperty(p, Constants.DIC_CONFUCIAN_FAMILY_NAME);
		String combinatorics = getProperty(p, Constants.DIC_FOR_COMBINATORICS);
		String charsetName = getProperty(p, Constants.DIC_CHARSET);
		
		File dicHomeFile = new File(dicHome);
		File compiledDicHomeFile = new File(dicHomeFile, ".compiled/sorting");
		compiledDicHomeFile.mkdirs();
		
		//
		Dictionary vocabularyDictionary = dictionaries.getVocabularyDictionary();
		File vocabularyFile = new File(compiledDicHomeFile, "vocabulary.dic.compiled");
		sortCompile(vocabularyDictionary, vocabularyFile, charsetName);

		//
		Dictionary noiseCharactorsDictionary = dictionaries.getNoiseCharactorsDictionary();
		File noiseCharactorsDictionaryFile = new File(compiledDicHomeFile, noiseCharactor + ".dic.compiled");
		sortCompile(noiseCharactorsDictionary, noiseCharactorsDictionaryFile, charsetName);
		//
		Dictionary noiseWordsDictionary = dictionaries.getNoiseWordsDictionary();
		File noiseWordsDictionaryFile = new File(compiledDicHomeFile, noiseWord + ".dic.compiled");
		sortCompile(noiseWordsDictionary, noiseWordsDictionaryFile, charsetName);
		//
		Dictionary unitsDictionary = dictionaries.getUnitsDictionary();
		File unitsDictionaryFile = new File(compiledDicHomeFile, unit + ".dic.compiled");
		sortCompile(unitsDictionary, unitsDictionaryFile, charsetName);
		//
		Dictionary confucianFamilyDictionary = dictionaries.getConfucianFamilyNamesDictionary();
		File confucianFamilyDictionaryFile = new File(compiledDicHomeFile, confucianFamilyName + ".dic.compiled");
		sortCompile(confucianFamilyDictionary, confucianFamilyDictionaryFile, charsetName);
		//
		Dictionary combinatoricsDictionary = dictionaries.getCombinatoricsDictionary();
		File combinatoricsDictionaryFile = new File(compiledDicHomeFile, combinatorics + ".dic.compiled");
		sortCompile(combinatoricsDictionary, combinatoricsDictionaryFile, charsetName);

		//
		File compliedMetadataFile = new File(dicHomeFile, ".compiled/sorting/.metadata");
		if (compliedMetadataFile.exists()) {

			//compliedMetadataFile.setWritable(true);
			compliedMetadataFile.delete();
		
}
		else {

			compliedMetadataFile.getParentFile().mkdirs();
		
}
		OutputStream compiledPropertiesOutput = new FileOutputStream(compliedMetadataFile);
		Properties compiledProperties = new Properties();
		String lastModifiedsKey = "paoding.analysis.properties.lastModifieds";
		String filesKey = "paoding.analysis.properties.files";
		compiledProperties.setProperty(lastModifiedsKey, p.getProperty(lastModifiedsKey));
		compiledProperties.setProperty(filesKey, p.getProperty(filesKey));
		compiledProperties.setProperty("paoding.analysis.compiler.class", this.getClass().getName());
		compiledProperties.setProperty("paoding.analysis.compiler.version", VERSION);
		compiledProperties.store(compiledPropertiesOutput, "dont edit it! this file was auto generated by paoding.");
		compiledPropertiesOutput.close();
		compliedMetadataFile.setReadOnly();
	
}

	
	
	private void sortCompile(final Dictionary dictionary, 
			File dicFile, String charsetName) throws FileNotFoundException,
			IOException, UnsupportedEncodingException {

		int wordsSize = dictionary.size();
		if (dicFile.exists()) {

			//dicFile.setWritable(true);
			dicFile.delete();
		
}
		BufferedOutputStream out = new BufferedOutputStream(
				new FileOutputStream(dicFile), 1024 * 16);
		
		for (int i = 0; i < wordsSize; i++) {

			Word word = dictionary.get(i);
			out.write(word.getText().getBytes(charsetName));
			if (word.getModifiers() != Word.DEFAUL) {

				out.write("[m=".getBytes());
				out.write(String.valueOf(word.getModifiers()).getBytes());
				out.write(']');
			
}
			out.write('\r');
			out.write('\n');
		
}
		out.flush();
		out.close();
		dicFile.setReadOnly();
	
}
	
	public Dictionaries readCompliedDictionaries(Properties p) {

		String dicHomeAbsolutePath = p.getProperty("paoding.dic.home.absolute.path");
		String noiseCharactor = getProperty(p, Constants.DIC_NOISE_CHARACTOR);
		String noiseWord = getProperty(p, Constants.DIC_NOISE_WORD);
		String unit = getProperty(p, Constants.DIC_UNIT);
		String confucianFamilyName = getProperty(p, Constants.DIC_CONFUCIAN_FAMILY_NAME);
		String combinatorics = getProperty(p, Constants.DIC_FOR_COMBINATORICS);
		String charsetName = getProperty(p, Constants.DIC_CHARSET);
		return new CompiledFileDictionaries(
				dicHomeAbsolutePath + "/.compiled/sorting",
				noiseCharactor, noiseWord, unit,
				confucianFamilyName, combinatorics, charsetName);
	
}
	
	private static String getProperty(Properties p, String name) {

		return Constants.getProperty(p, name);
	
}
	

}
			
...
展开> <收缩

下载源码到电脑,阅读使用更方便

1 积分

快速下载
还剩0行未阅读,继续阅读
Ʋ

源码文件列表

温馨提示: 点击源码文件名可预览文件内容哦 ^_^
名称 大小 修改日期
SimpleReadListener2.java.svn-base2.53 kB2012-10-10 10:55
SimpleReadListener.java.svn-base2.72 kB2012-10-10 10:55
ReadListener.java.svn-base936.00 B2012-10-10 10:55
FileWordsReader.java.svn-base3.95 kB2012-10-10 10:55
Difference.java.svn-base2.82 kB2012-10-10 10:55
Detector.java.svn-base3.14 kB2012-10-10 10:55
Node.java.svn-base1.88 kB2012-10-10 10:55
DifferenceListener.java.svn-base879.00 B2012-10-10 10:55
Snapshot.java.svn-base5.98 kB2012-10-10 10:55
ExtensionFileFilter.java.svn-base1.22 kB2012-10-10 10:55
Estimate.java.svn-base4.94 kB2012-10-10 10:55
TryPaodingAnalyzer.java.svn-base10.66 kB2012-10-10 10:55
MaxWordLengthTokenCollector.java.svn-base2.43 kB2012-10-10 10:55
MostWordsTokenCollector.java.svn-base2.88 kB2012-10-10 10:55
SortingDictionariesCompiler.java.svn-base7.04 kB2012-10-10 10:55
CompiledFileDictionaries.java.svn-base8.25 kB2012-10-10 10:55
MostWordsModeDictionariesCompiler.java.svn-base9.05 kB2012-10-10 10:55
all-wcprops854.00 B2012-10-10 10:55
format2.00 B2012-10-10 10:55
entries847.00 B2012-10-10 10:55
all-wcprops1.13 kB2012-10-10 10:55
format2.00 B2012-10-10 10:55
entries1.07 kB2012-10-10 10:55
all-wcprops449.00 B2012-10-10 10:55
format2.00 B2012-10-10 10:55
entries555.00 B2012-10-10 10:55
TokenCollector.java.svn-base966.00 B2012-10-10 10:55
PaodingAnalyzerBean.java.svn-base4.05 kB2012-10-10 10:55
PaodingAnalyzer.java.svn-base4.46 kB2012-10-10 10:55
PaodingTokenizer.java.svn-base5.00 kB2012-10-10 10:55
all-wcprops1.03 kB2012-10-10 10:55
format2.00 B2012-10-10 10:55
entries1.00 kB2012-10-10 10:55
PaodingAnalysisException.java.svn-base1.16 kB2012-10-10 10:55
KnifeBox.java.svn-base2.47 kB2012-10-10 10:55
LetterKnife.java.svn-base1.50 kB2012-10-10 10:55
PaodingMaker.java.svn-base21.26 kB2012-10-10 10:55
CharSet.java.svn-base2.13 kB2012-10-10 10:55
CombinatoricsKnife.java.svn-base10.65 kB2012-10-10 10:55
DictionariesCompiler.java.svn-base1.28 kB2012-10-10 10:55
FileDictionaries.java.svn-base12.74 kB2012-10-10 10:55
Dictionaries.java.svn-base1.85 kB2012-10-10 10:55
Knife.java.svn-base5.80 kB2012-10-10 10:55
Beef.java.svn-base3.84 kB2012-10-10 10:55
SmartKnifeBox.java.svn-base974.00 B2012-10-10 10:55
Collector.java.svn-base1.55 kB2012-10-10 10:55
Paoding.java.svn-base1.35 kB2012-10-10 10:55
FakeKnife.java.svn-base2.08 kB2012-10-10 10:55
CJKKnife.java.svn-base14.72 kB2012-10-10 10:55
NumberKnife.java.svn-base4.38 kB2012-10-10 10:55
DictionariesWare.java.svn-base853.00 B2012-10-10 10:55
FileDictionariesDifferenceListener.java.svn-base2.42 kB2012-10-10 10:55
CollectorStdoutImpl.java.svn-base1.18 kB2012-10-10 10:55
all-wcprops125.00 B2012-10-10 10:55
format2.00 B2012-10-10 10:55
entries318.00 B2012-10-10 10:55
ReadListener.java936.00 B2012-10-10 10:55
FileWordsReader.java3.95 kB2012-10-10 10:55
SimpleReadListener.java2.72 kB2012-10-10 10:55
SimpleReadListener2.java2.53 kB2012-10-10 10:55
Detector.java3.14 kB2012-10-10 10:55
Node.java1.88 kB2012-10-10 10:55
ExtensionFileFilter.java1.22 kB2012-10-10 10:55
Difference.java2.82 kB2012-10-10 10:55
DifferenceListener.java879.00 B2012-10-10 10:55
Snapshot.java5.98 kB2012-10-10 10:55
HashBinaryDictionary.java.svn-base6.67 kB2012-10-10 10:55
Word.java.svn-base1.84 kB2012-10-10 10:55
Dictionary.java.svn-base1.71 kB2012-10-10 10:55
BinaryDictionary.java.svn-base3.15 kB2012-10-10 10:55
Hit.java.svn-base5.01 kB2012-10-10 10:55
DictionaryDelegate.java.svn-base1.30 kB2012-10-10 10:55
TryPaodingAnalyzer.java10.66 kB2012-10-10 10:55
Estimate.java4.94 kB2012-10-10 10:55
all-wcprops750.00 B2012-10-10 10:55
format2.00 B2012-10-10 10:55
entries853.00 B2012-10-10 10:55
MaxWordLengthTokenCollector.java2.43 kB2012-10-10 10:55
CompiledFileDictionaries.java8.25 kB2012-10-10 10:55
SortingDictionariesCompiler.java7.04 kB2012-10-10 10:55
MostWordsModeDictionariesCompiler.java9.05 kB2012-10-10 10:55
MostWordsTokenCollector.java2.88 kB2012-10-10 10:55
all-wcprops291.00 B2012-10-10 10:55
format2.00 B2012-10-10 10:55
entries421.00 B2012-10-10 10:55
Constants.java.svn-base4.78 kB2012-10-10 10:55
all-wcprops2.88 kB2012-10-10 10:55
format2.00 B2012-10-10 10:55
entries2.76 kB2012-10-10 10:55
all-wcprops1.01 kB2012-10-10 10:55
format2.00 B2012-10-10 10:55
entries1.07 kB2012-10-10 10:55
PaodingTokenizer.java5.00 kB2012-10-10 10:55
TokenCollector.java966.00 B2012-10-10 10:55
PaodingAnalyzer.java4.46 kB2012-10-10 10:55
PaodingAnalyzerBean.java4.05 kB2012-10-10 10:55
ChineseTokenizerFactory.java1.63 kB2012-10-10 11:06
SolrPaodingTokenizer.java1.09 kB2012-10-10 11:06
PaodingAnalysisException.java1.16 kB2012-10-10 10:55
all-wcprops242.00 B2012-10-10 10:55
format2.00 B2012-10-10 10:55
entries458.00 B2012-10-10 10:55
SmartKnifeBox.java974.00 B2012-10-10 10:55
PaodingMaker.java21.26 kB2012-10-10 10:55
CJKKnife.java14.72 kB2012-10-10 10:55
DictionariesWare.java853.00 B2012-10-10 10:55
Knife.java5.80 kB2012-10-10 10:55
Collector.java1.55 kB2012-10-10 10:55
Paoding.java1.35 kB2012-10-10 10:55
FileDictionariesDifferenceListener.java2.42 kB2012-10-10 10:55
FakeKnife.java2.08 kB2012-10-10 10:55
CharSet.java2.13 kB2012-10-10 10:55
Dictionaries.java1.85 kB2012-10-10 10:55
CollectorStdoutImpl.java1.18 kB2012-10-10 10:55
FileDictionaries.java12.74 kB2012-10-10 10:55
CombinatoricsKnife.java10.65 kB2012-10-10 10:55
KnifeBox.java2.47 kB2012-10-10 10:55
Beef.java3.84 kB2012-10-10 10:55
LetterKnife.java1.50 kB2012-10-10 10:55
DictionariesCompiler.java1.28 kB2012-10-10 10:55
NumberKnife.java4.38 kB2012-10-10 10:55
BinaryDictionary.java3.15 kB2012-10-10 10:55
Dictionary.java1.71 kB2012-10-10 10:55
DictionaryDelegate.java1.30 kB2012-10-10 10:55
Word.java1.84 kB2012-10-10 10:55
HashBinaryDictionary.java6.67 kB2012-10-10 10:55
Hit.java5.01 kB2012-10-10 10:55
all-wcprops97.00 B2012-10-10 10:55
format2.00 B2012-10-10 10:55
entries273.00 B2012-10-10 10:55
Constants.java4.78 kB2012-10-10 10:55
all-wcprops89.00 B2012-10-10 10:55
format2.00 B2012-10-10 10:55
entries264.00 B2012-10-10 10:55
readme36.00 B2012-10-11 16:29
paoding-analysis.properties187.00 B2012-10-10 10:55
paoding-analysis-default.properties220.00 B2012-10-10 10:55
paoding-analyzer.properties389.00 B2012-10-10 10:55
paoding-dic-