关于bash下的词典的改进-lys5300-ChinaUnix博客

前段时间写过一个终端下的词典，最近突发奇想，将其数据的组织方式重新改进。大概的改进有以下几点。

1. 将操作函数化。

2. 将数据进行重组。把相同首字母的单词放在一个文件下。利于以后对查询结果的再利用。

3. 对单词进行标志。将查找次数多的单词优先排序。即排放在文件开始部分。

4. 舍弃对中文的存储。

5. 增加网络发音，而不是单纯的espeak。

source code。

#!/bin/bash
# This is a dictionary based for Termial.
# Its cache directory is in ~/.ydictionary/
PS3='Your choice is : '
word="$*"
first_alp=$(echo "$1" | cut -c 1)
cache="/home/echo/.tdic_data/${first_alp}"
# read this word
read_word()
{
re_word="$1"
mplayer "" >/dev/null 2>&1
if [ $? ]
then
:
else
espeak -s 130 "$re_word" 1>/dev/null 2>&1
fi
exit 0
}
# judge whether read this word.
judge_read()
{
readword="$1"
clew="Read it?(y/n)y:"
echo -ne "\033[35;1m$clew\033[00m"
read answer
: ${answer:=y}
if [ "$answer" = "y" ];then
read_word "$readword"
else
exit 5
fi
}
# local cache
local_search()
{
if [ -d ${cache%/*} ];then
if [ -e "${cache}" ];then
word2=$(sed -n "/^[[:digit:]]\{1,\}[ ]\<$word\>/,/^$/p" "$cache")
if [ -z "$word2" ];then
net_search
exit 0
else
Word2=$(echo "$word2" | sed 's/^[[:digit:]]*//g')
echo -e "\033[34;1m$Word2\033[0m"
echo
num=$(echo "$word2" | awk '{if(NR==1)print $1}')
let num+=1
sed -i "/^[[:digit:]]\{1,\}[ ]\<$word\>/s/^[[:digit:]]\{1,\}/$num/" "$cache"
sort_file &
judge_read "$word"
exit 0
fi
else
net_search
eixt 0
fi
#elif [ ! -e ${cache%/*} ];then
else
mkdir -p ${cache%/*}
local_search
fi
}
# net_search
# 抓取网页快照。
net_search()
{
wordf=$(wget -q "" -O -)
# 截取指定字符段。
# 判断是否有单词匹配，如果没有给出相近的。否则就进行查询。
if echo "$wordf"|grep 'sugg' 1>/dev/null 2>&1
then
echo "You may want to search these words."
sword=$(echo "$wordf"|sed -e 's/<[\/]*sugg>//g'|grep '^[^<]'|tr ["\n"] ["\t"])
select guessw in $sword ;do
if [ $guessw ];then
echo -e "\033[36;1m$guessw\033[0m"
tdic $guessw
exit 2
else
read -n 1 -p "Do you want to try again:(y/n)y:" choice
: ${choice:=y}
echo
if [ "$choice" = "y" ] || [ "$choice" = "Y" ];then
echo 'Please input again:'
else
exit 3
fi
fi
done
else
word1=$(echo "$wordf"|sed -e 's/<[\/]*$def\|sent\|orig\|trans$>//g' -e 's/<em>$.*$<\/em>/( \1 )/g' -e 's/$>\|<$/ /g'|grep '^[^<]')
echo -e "\033[32;1m$word1\033[0m"
jd_save=$(echo "$word" | sed -n '/[a-zA-Z]\{1,\}$/p')
if [ ! -z "$jd_save" ];then
echo -e "1 $word\n$word1\n">>"$cache"
fi
echo
judge_read "$word"
#cache如果大于一百兆，提示用户。进行释放。
total=$(du ${cache%/*}|cut -d/ -f 1)
if [ $total -gt 102400 ];then
echo
echo -e "\tThe dictionary cache is beyond 100M.Maybe you can release some space."
fi
fi
exit 0
}
# sort file
sort_file()
{
list=$(awk '{if($0 ~/^[[:digit:]]+[ ].*/) print $0}' "$cache" | sort -nr)
echo "$list">/tmp/$$
while read a b
do
w=`echo "$a $b"`
sed -n "/\<$w\>/,/^$/p" "$cache" >> ${cache}.bak
done
rm -f /tmp/$$
rm -f ${cache}
mv ${cache}.bak ${cache}
}
# main function
if [ $# -lt 1 ];then
echo
echo "usag: `basename $0` [word]"
echo
exit 1
fi
# 优先进行本地搜索
local_search
exit 0