summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorUlrich Müller <ulm@gentoo.org>2019-03-11 22:29:48 +0100
committerUlrich Müller <ulm@gentoo.org>2019-03-11 22:29:48 +0100
commit3db08691f11a7e6e823120073b11bd578acec57e (patch)
tree2616dff6d30e1170ca98566a2edbbe7ecedeb463 /Makefile
parentebuild-functions.tex: S to WORKDIR fallback is conditional for src_test. (diff)
downloadpms-3db08691f11a7e6e823120073b11bd578acec57e.tar.gz
pms-3db08691f11a7e6e823120073b11bd578acec57e.tar.bz2
pms-3db08691f11a7e6e823120073b11bd578acec57e.zip
Makefile: Change encoding of HTML file to UTF-8.
This will allow to drop the dependency on app-text/recode. Replace ligatures in tex4ht output by their components, because they would interfere with text search. Update sed expression for the list of tables workaround. Signed-off-by: Ulrich Müller <ulm@gentoo.org>
Diffstat (limited to 'Makefile')
-rw-r--r--Makefile16
1 files changed, 8 insertions, 8 deletions
diff --git a/Makefile b/Makefile
index 5359342..612af4c 100644
--- a/Makefile
+++ b/Makefile
@@ -44,20 +44,20 @@ pms.dvi: $(LATEXFILES) pms.bbl $(COMMITINFO)
pms.html: $(LATEXFILES) pms.bbl $(COMMITINFO)
set -e; sum=''; \
while true; do \
- mk4ht xhlatex pms xhtml,fn-in; \
+ mk4ht xhlatex pms 'xhtml,fn-in,charset=utf-8' ' -cunihtf -utf8'; \
oldsum=$${sum}; sum=$$(cksum $@); \
test "$${sum}" != "$${oldsum}" || break; \
done
- @# some www servers ignore meta tags, resulting in a wrong charset.
- @# therefore recode the very few non-ascii characters
- recode -d l1..h3 $@
- @# declare encoding as utf-8, although it is pure ascii
- LC_ALL=C sed -i -e '/<?xml\|<meta/s/iso-8859-1/utf-8/' $@
+ @# replace ligatures by their component letters
+ LC_ALL=C sed -i "$$(printf 's/\\xef\\xac\\x8%s/%s/g;' \
+ 0 ff 1 fi 2 fl 3 ffi 4 ffl)" $@
@# work around irregularity in how links to longtables are
@# formatted in the List of Tables
- LC_ALL=C sed -i -e '/<span class="lotToc" >&#x00A0;/{N;N;s/\(&#x00A0;<a \nhref="[^"]\+">\)\([0-9A-Z.]\+\)[ \n]\+/\2\1/}' $@
+ LC_ALL=C sed -Ei '/<span class="lotToc" *>\B/{N;N;'\
+ 's/([^>]*<a\s+href="[^"]+">)([0-9A-Z.]+)\s+/\2\1/;}' $@
@# remove redundant span elements
- LC_ALL=C sed -i -e ':x;/<span\(\s\+[^>]*\)\?$$/{N;bx;};:y;s/\(<span\s\+[^>]*>\)\([^<]*\)<\/span>\1/\1\2/;ty' $@
+ LC_ALL=C sed -Ei ':x;/<span(\s+[^>]*)?$$/{N;bx;};'\
+ ':y;s,(<span\s+[^>]*>)([^<]*)</span>\1,\1\2,;ty' $@
pms.bbl: pms.bib $(LATEXFILES) $(COMMITINFO)
$(aux-clean)