Project

General

Profile

1 7431 aaronmk
#!/bin/sh
2
# Translates a Redmine HTML page to a thesaurus
3
# Usage: self <page
4
5
sedEreFlag="$(test "$(uname)" = Darwin && echo E || echo r)"
6
7
sed () { "$(which sed)" -"$sedEreFlag" "$@";}
8
9 7442 aaronmk
ambigTerm=
10
ambigPrefix=
11 7431 aaronmk
term=
12 7442 aaronmk
sed -n 's/^.*<h([1-4])[^>]*><a href="#[^>]+>([^<]+).*$/\1 \2/p'\
13 7431 aaronmk
|while read -r line; do
14
    set -- $line # split using IFS
15
    level="$1" name="$2"
16 7442 aaronmk
17
    # Handle synonyms
18 7445 aaronmk
    if test "$level" = 3; then echo "$name,$term"
19 7442 aaronmk
    else
20
        term="$name"
21
22
        # Handle ambiguous terms
23
        termName="${term#\?}"
24
        if test "$termName" != "$term"; then # ambiguous term (starts with ?)
25
            ambigTerm="$term"
26
            ambigPrefix="${termName}_"
27
        elif test -n "$ambigTerm" -a "${term#$ambigPrefix}" != "$term"; then
28
            # alternative of ambiguous term (starts with $ambigTerm)
29 7445 aaronmk
            echo "$ambigTerm,$term"
30 7442 aaronmk
        else # term not related to ambiguous terms
31
            ambigTerm= ambigPrefix= # clear any ambiguous term in effect
32
        fi
33 7431 aaronmk
    fi
34
done