# -*- coding: utf-8 -*- import re, os import codecs from time import clock as now #import boot as PATH zhPattern = re.compile(u'"[^"]*[\u4e00-\u9fa5]+[^"]*"') zhPattern2 = re.compile(u'\[\[[^\"\[\]]*[\u4e00-\u9fa5]+[^\"\[\]]*\]\]') base_path = os.path.abspath("../src") base_path2 = os.path.abspath("done") file_type = [".lua"] all_chinese = {} all_chinese_short = {} start=now() def addOne(one, file_name): global all_chinese global all_chinese_short args = re.compile(u"[0-9.]+").findall(one) short = re.sub(u"[0-9.]+", "@@", one) args2 = re.compile(u"[^\u4e00-\u9fa5]+").findall(one) short2 = re.sub(u"[^\u4e00-\u9fa5]+", "##", one) if not args: args = [] all_chinese[one] = {"base":one, "short":short, "args":args, "len":len(one), "args2":args2, "short2":short2, "file_name":file_name} all_chinese_short[short2] = one def check(file_name, fn): print(file_name) global all_chinese i = 0 for line in codecs.open(file_name, "r", "utf-8"): line = re.sub("--.+", "", line) line = re.sub("print.+", "", line) i += 1 match = zhPattern.findall(line) if match: for one in match: addOne(one[1:-1], fn[:-3] + str(i)) match = zhPattern2.findall(line) if match: for one in match: addOne(one[2:-2], fn[:-3] + str(i)) def loop_path(path): global base_path for fn in os.listdir(path): file = os.path.join(path, fn) if os.path.isdir(file): loop_path(file) continue elif os.path.splitext(fn)[1] in file_type: check(file, path[len(base_path)+1:]+"/"+fn) loop_path(base_path) all_short = {} for key in all_chinese: short = all_chinese[key]["short2"] all_short[short] = [all_chinese[key]["file_name"], key] all_short = sorted(all_short.iteritems(), key=lambda asd:asd[1][0], reverse = False) f2 = codecs.open("all_word_short.txt", "w", "utf-8") f3 = codecs.open("all_word_base.txt", "w", "utf-8") for one in all_short: # print(one[0], one[1]) f2.write("%s\t%s\t%s\n" % (one[1][1], one[0], one[1][0])) f3.write("%s\n" % (one[0])) f2.close() f3.close() f = codecs.open("all_word.txt", "w", "utf-8") for key in all_chinese: one = all_chinese[key] f.write("%s\n" % (one["base"])) f.close() print "finish!" end = now() time_last = end - start print 'use time:',time_last,'sec'