Project

General

Profile

1
#!/bin/sh
2
# Translates a Redmine HTML page to a thesaurus
3
# Usage: self <page
4

    
5
sedEreFlag="$(test "$(uname)" = Darwin && echo E || echo r)"
6

    
7
sed () { "$(which sed)" -"$sedEreFlag" "$@";}
8

    
9
ambigTerm=
10
ambigPrefix=
11
term=
12
sed -n 's/^.*<h([1-4])[^>]*><a href="#[^>]+>([^<]+).*$/\1 \2/p'\
13
|while read -r line; do
14
    set -- $line # split using IFS
15
    level="$1" name="$2"
16
    
17
    # Handle synonyms
18
    if test "$level" = 3; then echo "$name,$term"
19
    else
20
        term="$name"
21
        
22
        # Handle ambiguous terms
23
        termName="${term#\?}"
24
        if test "$termName" != "$term"; then # ambiguous term (starts with ?)
25
            ambigTerm="$term"
26
            ambigPrefix="${termName}_"
27
        elif test -n "$ambigTerm" -a "${term#$ambigPrefix}" != "$term"; then
28
            # alternative of ambiguous term (starts with $ambigTerm)
29
            echo "$ambigTerm,$term"
30
        else # term not related to ambiguous terms
31
            ambigTerm= ambigPrefix= # clear any ambiguous term in effect
32
        fi
33
    fi
34
done
(57-57/79)