简单写的代码, 方便提取日文文字.
点击(此处)折叠或打开
- package com.test;
- import java.io.BufferedReader;
- import java.io.BufferedWriter;
- import java.io.File;
- import java.io.FileReader;
- import java.io.FileWriter;
- import java.util.ArrayList;
- import java.util.List;
- import java.util.regex.Matcher;
- import java.util.regex.Pattern;
- public class GetJPText {
- static String filePath = "F:\\proj\\src";
- static File resultFile = new File("F:\\result-js.txt");
- /**
- * @param args
- */
- public static void main(String[] args) throws Exception {
- GetJPText.exec();
- }
- // Check exist JP text
- public static boolean checkJPTextExist(String s) {
- if (null == s || s.length() < 1) {
- return false;
- }
- for (int i = 0; i < s.length(); i++) {
- if (s.charAt(i) > 256 && (int) s.charAt(i) != 65279) {
- System.out.println((int) s.charAt(i));
- return true;
- }
- }
- return false;
- }
- // Comment line check
- public static boolean checkCommentLine(String lineStr) {
- Pattern pattern = Pattern.compile("^(//|/\\*|\\*|trace).+");
- Matcher m = pattern.matcher(lineStr);
- if (m.matches()) {
- return true;
- }
- return false;
- }
- public static void write2File(int line, String filename, String lineStr) throws Exception {
- BufferedWriter out = new BufferedWriter(new FileWriter(resultFile, true));
- // for()
- out.write(line + "\t" + filename + "\t" + lineStr);
- out.newLine();
- out.close();
- out = null;
- }
- // Read file
- public static void readFileLine(File file) {
- BufferedReader reader = null;
- try {
- reader = new BufferedReader(new FileReader(file));
- String tmpStr = null;
- int line = 1;
- while ((tmpStr = reader.readLine()) != null) {
- tmpStr = tmpStr.replaceAll("\\t", " ");
- tmpStr = tmpStr.trim();
- if (!checkCommentLine(tmpStr)) {
- if (checkJPTextExist(tmpStr)) {
- write2File(line, file.getPath(), tmpStr);
- }
- }
- line++;
- }
- reader.close();
- } catch (Exception e) {
- e.printStackTrace();
- } finally {
- if (reader != null) {
- try {
- reader.close();
- } catch (Exception e) {
- e.printStackTrace();
- }
- }
- }
- }
- //
- public static void exec() throws Exception {
- List<File> listf = new ArrayList<File>();
- try {
- GetJPText.getAsFileList(listf, new File(filePath));
- } catch (Exception e) {
- e.printStackTrace();
- }
- System.out.println("List size: " + listf.size());
- for (File file : listf) {
- readFileLine(file);
- }
- }
- public static List<File> getAsFileList(List<File> list, File f) throws Exception {
- if (f.isDirectory()) {
- String[] fileListTmp = f.list();
- for (String str : fileListTmp) {
- Pattern pattern = Pattern.compile(".+\\.js$");
- Matcher m = pattern.matcher(str);
- if (m.matches()) {
- list.add(new File(f.getPath() + "\\" + str));
- } else {
- File tmp = new File(f.getPath() + "\\" + str);
- if (tmp.isDirectory() && !str.equals(".svn")) {
- getAsFileList(list, tmp);
- }
- }
- }
- } else {
- if (f.getName().matches(".js")) {
- list.add(f);
- }
- }
- // System.out.println("List size: " + list.size());
- return list;
- }
- }