Project

General

Profile

1
#!/bin/sh
2
# Translates a Redmine HTML page to a thesaurus
3
# Usage: self <page
4

    
5
sedEreFlag="$(test "$(uname)" = Darwin && echo E || echo r)"
6

    
7
sed () { "$(which sed)" -"$sedEreFlag" "$@";}
8

    
9
ambigTerm=
10
term=
11
sed -n 's/^.*<h([1-4])[^>]*>(<img [^>]*> *)?<a href="#[^>]+>([^<]+).*$/\1 \3/p'\
12
|while read -r line; do
13
    set -- $line # split using IFS
14
    level="$1" name="$2"
15
    
16
    # Handle synonyms
17
    if test "$level" = 3; then echo "$name,$term"
18
    else
19
        term="$name"
20
        
21
        # Handle ambiguous terms
22
        if test "${term#\?}" != "$term"; then # ambiguous term (starts with ?)
23
            ambigTerm="$term"
24
        elif test "$level" = 4; then # alternative of ambiguous term
25
            echo "$ambigTerm,$term"
26
        else # term not related to ambiguous terms
27
            ambigTerm= # clear any ambiguous term in effect
28
        fi
29
    fi
30
done
(57-57/79)