中文分词开源项目 JAVA中文分词

时间:2012-09-04 13:51:46
【文件属性】:
文件名称:中文分词开源项目 JAVA中文分词
文件大小:382KB
文件格式:RAR
更新时间:2012-09-04 13:51:46
java,JAVA中文分词,项目 import WordSegment.*; import java.awt.event.ActionEvent; import java.awt.event.ActionListener; import java.awt.*; import java.io.File; import java.util.Vector; import javax.swing.*; /** * */ /** * @author Truman * */ public class WordSegDemoFrame extends JFrame implements ActionListener { final static int ALGO_FMM = 1; final static int ALGO_BMM = 2; private JMenuBar menuBar = new JMenuBar(); private JMenuItem openDicItem, closeItem; private JRadioButtonMenuItem fmmItem, bmmItem; private JMenuItem openTrainFileItem, saveDicItem, aboutItem; private JButton btSeg; private JTextField tfInput; private JTextArea taOutput; private JPanel panel; JLabel infoDic, infoAlgo; private WordSegment seger; private DicTrainer trainer = new DicTrainer(); private void initFrame() { setTitle("Mini分词器"); setDefaultCloseOperation(EXIT_ON_CLOSE); setJMenuBar(menuBar); JMenu fileMenu = new JMenu("文件"); JMenu algorithmMenu = new JMenu("分词算法"); JMenu trainMenu = new JMenu("训练语料"); JMenu helpMenu = new JMenu("帮助"); openDicItem = fileMenu.add("载入词典"); fileMenu.addSeparator(); closeItem = fileMenu.add("退出"); algorithmMenu.add(fmmItem = new JRadioButtonMenuItem("正向最大匹配", true)); algorithmMenu.add(bmmItem = new JRadioButtonMenuItem("逆向最大匹配", false)); ButtonGroup algorithms = new ButtonGroup(); algorithms.add(fmmItem); algorithms.add(bmmItem); openTrainFileItem = trainMenu.add("载入并训练语料"); saveDicItem = trainMenu.add("保存词典"); aboutItem = helpMenu.add("关于Word Segment Demo"); menuBar.add(fileMenu); menuBar.add(algorithmMenu); menuBar.add(trainMenu); menuBar.add(helpMenu); openDicItem.addActionListener(this); closeItem.addActionListener(this); openTrainFileItem.addActionListener(this); saveDicItem.addActionListener(this); aboutItem.addActionListener(this); fmmItem.addActionListener(this); bmmItem.addActionListener(this); JPanel topPanel = new JPanel(); topPanel.setLayout(new FlowLayout()); JPanel centerPanel = new JPanel(); centerPanel.setLayout(new GridLayout()); JPanel bottomPanel = new JPanel(); this.getContentPane().add(topPanel, BorderLayout.NORTH); this.getContentPane().add(centerPanel, BorderLayout.CENTER); this.getContentPane().add(bottomPanel, BorderLayout.SOUTH); btSeg = new JButton("分词"); tfInput = new JTextField("", 30); taOutput = new JTextArea(); topPanel.add(tfInput); topPanel.add(btSeg); centerPanel.add(taOutput); infoDic = new JLabel(); infoAlgo = new JLabel(); bottomPanel.add(infoDic); bottomPanel.add(infoAlgo); saveDicItem.setEnabled(false); btSeg.addActionListener(this); } public WordSegDemoFrame() { initFrame(); seger = new WordSegment(); loadDic("dic.dat"); setAlgo(ALGO_FMM); } private void loadDic(String fileName) { seger.SetDic(fileName); infoDic.setText("词典 " + fileName + "已载入"); } private void setAlgo(int type) { String algo = null; switch(type) { case ALGO_FMM: seger.setStrategy(new FMM()); algo = "FMM"; break; case ALGO_BMM: seger.setStrategy(new BMM()); algo = "BMM"; break; } infoAlgo.setText("分词算法:" + algo); } private File openFile() { JFileChooser chooser = new JFileChooser(); int ret = chooser.showOpenDialog(this); if (ret != JFileChooser.APPROVE_OPTION) { return null; } File f = chooser.getSelectedFile(); if (f.isFile() && f.canRead()) { return f; } else { JOptionPane.showMessageDialog(this, "Could not open file: " + f, "Error opening file", JOptionPane.ERROR_MESSAGE); return null; } } private void trainDic(File f) { trainer.Train(f.getAbsolutePath()); seger.SetDic(trainer.getDic()); infoDic.setText("训练完成,新的词典已载入"); saveDicItem.setEnabled(true); } private void saveDic() { JFileChooser chooser = new JFileChooser(); int ret = chooser.showSaveDialog(this); if (ret != JFileChooser.APPROVE_OPTION) { return; } File f = chooser.getSelectedFile(); trainer.SaveDic(f.getAbsolutePath()); infoDic.setText("词典已保存到" + f.getAbsolutePath()); } /* (non-Javadoc) * @see java.awt.event.ActionListener#actionPerformed(java.awt.event.ActionEvent) */ public void actionPerformed(ActionEvent e) { if(e.getSource() == openDicItem) { File dicFile = openFile(); if(dicFile == null) return; loadDic(dicFile.getAbsolutePath()); saveDicItem.setEnabled(false); return; } if(e.getSource() == closeItem) { dispose(); System.exit(0); return; } if(e.getSource() == openTrainFileItem) { File trainFile = openFile(); if(trainFile == null) return; else trainDic(trainFile); return; } if(e.getSource() == saveDicItem) { saveDic(); return; } if(e.getSource() == aboutItem) { JOptionPane.showMessageDialog(this, "作者:Truman\nemail: trumanhe@gmail.com", "关于Mini分词器", JOptionPane.INFORMATION_MESSAGE); return; } if(e.getSource() == fmmItem) { setAlgo(ALGO_FMM); return; } if(e.getSource() == bmmItem) { setAlgo(ALGO_BMM); return; } if(e.getSource() == btSeg) { String sentence = tfInput.getText(); Vector vec = seger.Segment(sentence); taOutput.setText(""); for(int i=0;i 立即下载

【文件预览】:
Mini分词
----Mini分词器文档.pdf(155KB)
----bin()
--------WordSegment()
--------dic.dat(731KB)
--------WordSegDemoFrame.class(7KB)
----run.bat(29B)
----语料示例.txt(420B)
----Copy (2) of New Folder()
----src()
--------WordSegment()
--------WordSegDemoFrame.java(6KB)
----Copy (3) of New Folder()
----Copy of New Folder()
----计算机编程网.txt(268B)
----New Folder()

网友评论

  • 不错,思路挺清晰的,还可以。
  • 不错的资源,可以使用
  • 这个真不错~~
  • 思路比较清晰吧 还行
  • 可以,虽然很多不懂
  • 基本算法,思路清晰,不错
  • 基本算法,思路清晰,歧义以及非中文处理需要加强~~