python正向最大匹配分词和逆向最大匹配分词的实例
正向最大匹配
#-*-coding:utf-8-*- CODEC='utf-8' defu(s,encoding): 'convertedotherencodingtounicodeencoding' ifisinstance(s,unicode): returns else: returnunicode(s,encoding) deffwd_mm_seg(wordDict,maxLen,str): 'forwardmaxmatchsegment' wordList=[] segStr=str segStrLen=len(segStr) forwordinwordDict: print'word:',word print"\n" whilesegStrLen>0: ifsegStrLen>maxLen: wordLen=maxLen else: wordLen=segStrLen subStr=segStr[0:wordLen] print"subStr:",subStr whilewordLen>1: ifsubStrinwordDict: print"subStr1:%r"%subStr break else: print"subStr2:%r"%subStr wordLen=wordLen-1 subStr=subStr[0:wordLen] #print"subStr3:",subStr wordList.append(subStr) segStr=segStr[wordLen:] segStrLen=segStrLen-wordLen forwordstrinwordList: print"wordstr:",wordstr returnwordList defmain(): fp_dict=open('words.dic') wordDict={} foreachWordinfp_dict: wordDict[u(eachWord.strip(),'utf-8')]=1 segStr=u'你好世界helloworld' printsegStr wordList=fwd_mm_seg(wordDict,10,segStr) print"==".join(wordList) if__name__=='__main__': main()
逆向最大匹配
#-*-coding:utf-8-*- defu(s,encoding): 'convertedotherencodingtounicodeencoding' ifisinstance(s,unicode): returns else: returnunicode(s,encoding) CODEC='utf-8' defbwd_mm_seg(wordDict,maxLen,str): 'forwardmaxmatchsegment' wordList=[] segStr=str segStrLen=len(segStr) forwordinwordDict: print'word:',word print"\n" whilesegStrLen>0: ifsegStrLen>maxLen: wordLen=maxLen else: wordLen=segStrLen subStr=segStr[-wordLen:None] print"subStr:",subStr whilewordLen>1: ifsubStrinwordDict: print"subStr1:%r"%subStr break else: print"subStr2:%r"%subStr wordLen=wordLen-1 subStr=subStr[-wordLen:None] #print"subStr3:",subStr wordList.append(subStr) segStr=segStr[0:-wordLen] segStrLen=segStrLen-wordLen wordList.reverse() forwordstrinwordList: print"wordstr:",wordstr returnwordList defmain(): fp_dict=open('words.dic') wordDict={} foreachWordinfp_dict: wordDict[u(eachWord.strip(),'utf-8')]=1 segStr=ur'你好世界helloworld' printsegStr wordList=bwd_mm_seg(wordDict,10,segStr) print"==".join(wordList) if__name__=='__main__': main()
以上这篇python正向最大匹配分词和逆向最大匹配分词的实例就是小编分享给大家的全部内容了,希望能给大家一个参考,也希望大家多多支持毛票票。