# for i,s in enumerate(contents): # r2=pinyin(s,style=Style.TONE3,neutral_tone_with_five=True) # if r2[0][0][:-1]!=r1[i][0][:-1]: # print(contents[i-5:i+6],r1[i],r2[0]) # count+=1 # if count==10: # sys.exit(1)
nstr='' cset=set() for i,s in enumerate(contents): if r1[i] in [['zui4'],['jin4'],['yi4'],['zhi2'],['you3'],['ge4'],['bu4'],['da4'],['xiao3'],['de5'],['kun4'],['huo4']]: nstr+=s else: cset.add(nstr) nstr='' print(cset) cset_l=list(cset) cset_l.sort(key=lambda x:len(x),reverse=True) print(cset_l)