# -*- coding: utf-8 -*-
|
import re, os
|
import codecs
|
from time import clock as now
|
|
#import boot as PATH
|
zhPattern = re.compile(u'"[^"]*[\u4e00-\u9fa5]+[^"]*"')
|
zhPattern2 = re.compile(u'\[\[[^\"\[\]]*[\u4e00-\u9fa5]+[^\"\[\]]*\]\]')
|
base_path = os.path.abspath("../src")
|
base_path2 = os.path.abspath("done")
|
file_type = [".lua"]
|
all_chinese = {}
|
all_chinese_short = {}
|
start=now()
|
def addOne(one, file_name):
|
global all_chinese
|
global all_chinese_short
|
args = re.compile(u"[0-9.]+").findall(one)
|
short = re.sub(u"[0-9.]+", "@@", one)
|
args2 = re.compile(u"[^\u4e00-\u9fa5]+").findall(one)
|
short2 = re.sub(u"[^\u4e00-\u9fa5]+", "##", one)
|
if not args:
|
args = []
|
all_chinese[one] = {"base":one, "short":short, "args":args, "len":len(one), "args2":args2, "short2":short2, "file_name":file_name}
|
all_chinese_short[short2] = one
|
|
def check(file_name, fn):
|
print(file_name)
|
global all_chinese
|
i = 0
|
for line in codecs.open(file_name, "r", "utf-8"):
|
line = re.sub("--.+", "", line)
|
line = re.sub("print.+", "", line)
|
i += 1
|
match = zhPattern.findall(line)
|
if match:
|
for one in match:
|
addOne(one[1:-1], fn[:-3] + str(i))
|
match = zhPattern2.findall(line)
|
if match:
|
for one in match:
|
addOne(one[2:-2], fn[:-3] + str(i))
|
|
def loop_path(path):
|
global base_path
|
for fn in os.listdir(path):
|
file = os.path.join(path, fn)
|
if os.path.isdir(file):
|
loop_path(file)
|
continue
|
elif os.path.splitext(fn)[1] in file_type:
|
check(file, path[len(base_path)+1:]+"/"+fn)
|
loop_path(base_path)
|
all_short = {}
|
for key in all_chinese:
|
short = all_chinese[key]["short2"]
|
all_short[short] = [all_chinese[key]["file_name"], key]
|
all_short = sorted(all_short.iteritems(), key=lambda asd:asd[1][0], reverse = False)
|
f2 = codecs.open("all_word_short.txt", "w", "utf-8")
|
f3 = codecs.open("all_word_base.txt", "w", "utf-8")
|
for one in all_short:
|
# print(one[0], one[1])
|
f2.write("%s\t%s\t%s\n" % (one[1][1], one[0], one[1][0]))
|
f3.write("%s\n" % (one[0]))
|
f2.close()
|
f3.close()
|
|
f = codecs.open("all_word.txt", "w", "utf-8")
|
for key in all_chinese:
|
one = all_chinese[key]
|
f.write("%s\n" % (one["base"]))
|
f.close()
|
print "finish!"
|
end = now()
|
time_last = end - start
|
print 'use time:',time_last,'sec'
|
|