From 1b2b7c3eb939724696894f31ff27db97b0cac84c Mon Sep 17 00:00:00 2001 From: Andreas Widen Date: Sat, 25 Nov 2023 17:21:37 +0100 Subject: Initial commit. Signed-off-by: Andreas Widen --- sexp-xgettext.scm | 514 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 514 insertions(+) create mode 100644 sexp-xgettext.scm (limited to 'sexp-xgettext.scm') diff --git a/sexp-xgettext.scm b/sexp-xgettext.scm new file mode 100644 index 0000000..7277e4b --- /dev/null +++ b/sexp-xgettext.scm @@ -0,0 +1,514 @@ +;;; Harmonic Flow web site + +(define-module (sexp-xgettext) + #:use-module (ice-9 match) + #:use-module (ice-9 peg) + #:use-module (ice-9 rdelim) + #:use-module (ice-9 receive) + #:use-module (ice-9 regex) + #:use-module (srfi srfi-1) ;lists + #:use-module (srfi srfi-9) ;records + #:export (set-complex-keywords! + set-simple-keywords! + sgettext + sngettext + spgettext + snpgettext + %linguas)) + +(define %complex-keywords + ;; Use set-complex-keywords! to change this to a list of keywords + ;; for sexp-xgettext functions other than sgettext. + (make-parameter '())) + +(define (set-complex-keywords! kw) + (%complex-keywords kw)) + +(define %simple-keywords + ;; Use set-simple-keywords! to change this to a list of keywords + ;; for sgettext. + (make-parameter '())) + +(define (set-simple-keywords! kw) + (%simple-keywords kw)) + +(define (gettext-keyword? id) + (or (member id (%complex-keywords)) + (member id (%simple-keywords)))) + +;;COPIED FROM scripts/sexp-xgettext.scm: +(define* (tag counter prefix #:key (flavor 'start)) + "Formats the number COUNTER as a tag according to FLAVOR, which is +either 'start, 'end or 'empty for a start, end or empty tag, +respectively." + (string-append "<" + (if (eq? flavor 'end) "/" "") + prefix + (number->string counter) + (if (eq? flavor 'empty) "/" "") + ">")) +;;END COPIED FROM scripts/sexp-xgettext.scm + +;;ADAPTED FROM scripts/sexp-xgettext.scm +(define-record-type + (make-construct-fold-state msgid-string maybe-part counter) + construct-fold-state? + ;; msgid constructed so far; #f if none, "" if only empty string + (msgid-string construct-fold-state-msgid-string) + ;; only append this if string follows: + (maybe-part construct-fold-state-maybe-part) + ;; counter for next tag: + (counter construct-fold-state-counter)) +;;END ADAPTED FROM scripts/sexp-xgettext.scm + +(define (sexp->msgid exp) + "Return the msgid as constructed by construct-msgid-and-po-entries +in scripts/sexp-xgettext.scm from the expression EXP." + (let loop ((exp exp) + (prefix "")) + (match exp + (() "") + ((or ('quote inner-exp) + ('quasiquote inner-exp) + ('unquote inner-exp) + ('unquote-splicing inner-exp)) + (loop inner-exp prefix)) + ((first-component . components) + (cond + ((gettext-keyword? first-component) + (error "Double-marked for translation." exp)) + (else + (or + (construct-fold-state-msgid-string + (fold + (lambda (component prev-state) + (match prev-state + (($ msgid-string maybe-part counter) + (let inner-loop ((exp component)) + (match exp + ((or (? symbol?) (? keyword?)) + (if (not msgid-string) + ;; ignore symbols at the beginning + prev-state + ;; else make a tag for the symbol + (make-construct-fold-state + msgid-string + (string-append maybe-part + (tag counter prefix #:flavor 'empty)) + (1+ counter)))) + ((? string?) + (make-construct-fold-state + (string-append (or msgid-string "") + maybe-part exp) + "" counter)) + ((? list?) + (match exp + (() ;ignore empty list + prev-state) + ((or (singleton) + ('quote singleton) + ('quasiquote singleton) + ('unquote singleton) + ('unquote-splicing singleton)) + (inner-loop singleton)) + ((components ...) + (cond + ((and (not (null? components)) + (member (car components) (%simple-keywords))) + ;; if marked for translation, insert inside tag + (make-construct-fold-state + (string-append (or msgid-string "") + maybe-part + (tag counter prefix #:flavor 'start) + (loop (cadr components) + (string-append + prefix + (number->string counter) + ".")) + (tag counter prefix #:flavor 'end)) + "" + (1+ counter))) + ;; else ignore if first + ((not msgid-string) + prev-state) + ;; else make empty tag + (else (make-construct-fold-state + msgid-string + (string-append + maybe-part + (tag counter prefix #:flavor 'empty)) + (1+ counter)))))))))))) + (make-construct-fold-state #f "" 1) + exp)) + (error "Marking for translation yields empty msgid." exp))))) + ((? string?) exp) + (else (error "Single symbol marked for translation." exp))))) + +(define-record-type + (make-deconstruct-fold-state tagged maybe-tagged counter) + deconstruct-fold-state? + ;; XML-tagged expressions as an association list name->expression: + (tagged deconstruct-fold-state-tagged) + ;; associate this not-yet-tagged expression with pre if string + ;; follows, with post if not: + (maybe-tagged deconstruct-fold-state-maybe-tagged) + ;; counter for next tag: + (counter deconstruct-fold-state-counter)) + +(define (deconstruct exp msgstr) + "Return an s-expression like EXP, but filled with the content from +MSGSTR." + (define (find-empty-element msgstr name) + "Return the regex match structure for the empty tag for XML +element of type NAME inside MSGSTR. If the element does not exist or +is more than the empty tag, #f is returned." + (string-match (string-append "<" (regexp-quote name) "/>") msgstr)) + (define (find-element-with-content msgstr name) + "Return the regex match structure for the non-empty XML element of +type NAME inside MSGSTR. Submatch 1 is its content. If the element +does not exist or is just the empty tag, #f is returned." + (string-match (string-append "<" (regexp-quote name) ">" + "(.*)" + "") + msgstr)) + (define (get-first-element-name prefix msgstr) + "Return the name of the first XML element in MSGSTR whose name +begins with PREFIX, or #f if there is none." + (let ((m (string-match + (string-append "<(" (regexp-quote prefix) "[^>/.]+)/?>") msgstr))) + (and m (match:substring m 1)))) + (define (prefix+counter prefix counter) + "Return PREFIX with the number COUNTER appended." + (string-append prefix (number->string counter))) + (let loop ((exp exp) + (msgstr msgstr) + (prefix "")) + (define (unwrap-marked-expression exp) + "Return two values for an expression EXP containing a (possibly +quoted/unquoted) marking for translation with a simple keyword at its +root. The first return value is a list with the inner expression, the +second is a procedure to wrap the processed inner expression in the +same quotes or unquotes again." + (match exp + (('quote inner-exp) + (receive (unwrapped quotation) + (unwrap-marked-expression inner-exp) + (values unwrapped + (lambda (res) + (list 'quote (quotation res)))))) + (('quasiquote inner-exp) + (receive (unwrapped quotation) + (unwrap-marked-expression inner-exp) + (values unwrapped + (lambda (res) + (list 'quasiquote (quotation res)))))) + (('unquote inner-exp) + (receive (unwrapped quotation) + (unwrap-marked-expression inner-exp) + (values unwrapped + (lambda (res) + (list 'unquote (quotation res)))))) + (('unquote-splicing inner-exp) + (receive (unwrapped quotation) + (unwrap-marked-expression inner-exp) + (values unwrapped + (lambda (res) + (list 'unquote-splicing (quotation res)))))) + ((marking . rest) ;list with marking as car + ;; assume arg to translate is first argument to marking: + (values (list-ref rest 0) identity)))) + (define (assemble-parenthesized-expression prefix tagged) + "Return a parenthesized expression deconstructed from MSGSTR +with the meaning of XML elements taken from the name->expression +association list TAGGED. The special tags [prefix]pre and +[prefix]post are associated with a list of expressions before or after +all others in the parenthesized expression with the prefix, +respectively, in reverse order." + (append ;prepend pre elements to what is in msgstr + (reverse (or (assoc-ref tagged (string-append prefix "pre")) '())) + (let assemble ((rest msgstr)) + (let ((name (get-first-element-name prefix rest))) + (cond + ((and name (find-empty-element rest name)) => + ;; first XML element in rest is empty element + (lambda (m) + (cons* + (match:prefix m) ;prepend string before name + (assoc-ref tagged name) ;and expression for name + (assemble (match:suffix m))))) + ((and name (find-element-with-content rest name)) => + ;; first XML element in rest has content + (lambda (m) + (receive (unwrapped quotation) + (unwrap-marked-expression (assoc-ref tagged name)) + (cons* + (match:prefix m) ;prepend string before name + ;; and the deconstructed element with the content as msgstr: + (quotation + (loop + unwrapped + (match:substring m 1) + (string-append name "."))) + (assemble (match:suffix m)))))) + (else + ;; there is no first element + (cons + rest ;return remaining string + (reverse ;and post expressions + (or (assoc-ref tagged (string-append prefix "post")) '()))))))))) + (match exp + (() '()) + (('quote singleton) + (cons 'quote (list (loop singleton msgstr prefix)))) + (('quasiquote singleton) + (cons 'quasiquote (list (loop singleton msgstr prefix)))) + (('unquote singleton) + (cons 'unquote (list (loop singleton msgstr prefix)))) + (('unquote-splicing singleton) + (cons 'unquote-splicing (list (loop singleton msgstr prefix)))) + ((singleton) + (list (loop singleton msgstr prefix))) + ((first-component . components) + (cond + ((gettext-keyword? first-component) + ;; another marking for translation + ;; -> should be an error anyway; just retain exp + exp) + (else + ;; This handles a single level of a parenthesized expression. + ;; assemble-parenthesized-expression will call loop to + ;; recurse to deeper levels. + (let ((tagged-state + (fold + (lambda (component prev-state) + (match prev-state + (($ tagged maybe-tagged counter) + (let inner-loop ((exp component) ;sexp to handle + (quoting identity)) ;for wrapping state + (define (tagged-with-maybes) + "Return the value of tagged after adding all +maybe-tagged expressions. This should be used as the base value for +tagged when a string or marked expression is seen." + (match counter + (#f + (alist-cons (string-append prefix "pre") + maybe-tagged + tagged)) + ((? number?) + (let accumulate ((prev-counter counter) + (maybes (reverse maybe-tagged))) + (match maybes + (() tagged) + ((head . tail) + (alist-cons + (prefix+counter prefix prev-counter) + head + (accumulate (1+ prev-counter) tail)))))))) + (define (add-maybe exp) + "Return a deconstruct-fold-state with EXP +added to maybe-tagged. This should be used for expressions that are +neither strings nor marked for translation with a simple keyword." + (make-deconstruct-fold-state + tagged + (cons (quoting exp) maybe-tagged) + counter)) + (define (counter-with-maybes) + "Return the old counter value incremented by +one for each expression in maybe-tagged. This should be used together +with tagged-with-maybes." + (match counter + ((? number?) + (+ counter (length maybe-tagged))) + (#f + 1))) + (define (add-tagged exp) + "Return a deconstruct-fold-state with an +added association in tagged from the current counter to EXP. If +MAYBE-TAGGED is not empty, associations for its expressions are added +to pre or their respective counter. This should be used for +expressions marked for translation with a simple keyword." + (let ((c (counter-with-maybes))) + (make-deconstruct-fold-state + (alist-cons + (prefix+counter prefix c) + (quoting exp) + (tagged-with-maybes)) + '() + (1+ c)))) + (match exp + (('quote inner-exp) + (inner-loop inner-exp + (lambda (res) + (list 'quote res)))) + (('quasiquote inner-exp) + (inner-loop inner-exp + (lambda (res) + (list 'quasiquote res)))) + (('unquote inner-exp) + (inner-loop inner-exp + (lambda (res) + (list 'unquote res)))) + (('unquote-splicing inner-exp) + (inner-loop inner-exp + (lambda (res) + (list 'unquote-splicing res)))) + (((? gettext-keyword?) . rest) + (add-tagged exp)) + ((or (? symbol?) (? keyword?) (? list?)) + (add-maybe exp)) + ((? string?) + ;; elements in maybe-tagged appear between strings + (let ((c (counter-with-maybes))) + (make-deconstruct-fold-state + (tagged-with-maybes) + '() + c)))))))) + (make-deconstruct-fold-state '() '() #f) + exp))) + (match tagged-state + (($ tagged maybe-tagged counter) + (assemble-parenthesized-expression + prefix + (match maybe-tagged + (() tagged) + (else ;associate maybe-tagged with pre or post + (alist-cons + (cond ;if there already is a pre, use post + ((assoc-ref tagged (string-append prefix "pre")) + (string-append prefix "post")) + (else (string-append prefix "pre"))) + maybe-tagged + tagged)))))))))) + ((? string?) msgstr) + (else (error "Single symbol marked for translation." exp))))) + +;; NOTE: The sgettext macros have no hygiene because they use +;; datum->syntax and do not preserve the semantics of anything looking +;; like an sgettext macro. This is an exceptional use case; do not +;; try this at home. + +(define (sgettext x) + "After choosing an identifier for marking s-expressions for +translation, make it usable by defining a macro with it calling +sgettext. If for example the chosen identifier is G_, +use (define-syntax G_ sgettext)." + (syntax-case x () + ((id exp) + (let* ((msgid (sexp->msgid (syntax->datum #'exp))) + (new-exp (deconstruct (syntax->datum #'exp) + (gettext msgid)))) + (datum->syntax #'id new-exp))))) + +;; gettext’s share/gettext/gettext.h tells us we can prepend a msgctxt +;; and #\eot before a msgid in a gettext call. + +(define (spgettext x) + "After choosing an identifier for behavior similar to pgettext:1c,2, +make it usable like (define-syntax C_ spgettext)." + (syntax-case x () + ((id msgctxt exp) + (let* ((gettext-context-glue #\eot) ;as defined in gettext.h + (lookup (string-append (syntax->datum #'msgctxt) + (string gettext-context-glue) + (sexp->msgid (syntax->datum #'exp)))) + (msgstr (car (reverse (string-split (gettext lookup) + gettext-context-glue)))) + (new-exp (deconstruct (syntax->datum #'exp) + msgstr))) + (datum->syntax #'id new-exp))))) + +(define %plural-numbers + ;; Hard-coded list of input numbers such that for each language’s + ;; plural formula, for each possible output grammatical number, + ;; there is an n among %plural-numbers that yields this output (for + ;; any language documented when running “info "(gettext) Plural + ;; forms"”), except 1 is omitted from this list because it is a + ;; special case for sngettext. That is, calling ngettext with each + ;; number from %plural-numbers and with 1 in any locale is + ;; guaranteed to return each plural form at least once. It would be + ;; more resilient towards new languages if instead of hard-coding we + ;; computed this from the Plural-Forms in the MO file header entry, + ;; but that is not worth the incurred code complexity. + '(0 2 3 11 100)) + +(define (sngettext x) + "After choosing an identifier for behavior similar to ngettext:1,2, +make it usable like (define-syntax N_ sngettext). sngettext takes +into account that not all languages have only singular and plural +forms." + (syntax-case x () + ((id exp1 exp2 n) + (let* ((msgid1 (sexp->msgid (syntax->datum #'exp1))) + (msgid2 (sexp->msgid (syntax->datum #'exp2))) + (msgstr1 (ngettext msgid1 msgid2 1)) + (result (acons ;return an association list msgstr->deconstructed + ;; msgstr for n=1: + msgstr1 + `(,'unquote ,(deconstruct (syntax->datum #'exp1) + msgstr1)) + ;; other msgstr for n of each plural form: + (map + (lambda (n) + (let ((msgstr (ngettext msgid1 msgid2 n))) + (cons msgstr `(,'unquote + ,(deconstruct (syntax->datum #'exp2) + msgstr))))) + %plural-numbers)))) + (datum->syntax + #'id + `(,assoc-ref (,'quasiquote ,result) + (,ngettext ,msgid1 ,msgid2 ,(syntax->datum #'n)))))))) + +(define (snpgettext x) + "After choosing an identifier for behavior similar to npgettext:1c,2,3, +make it usable like (define-syntax NC_ snpgettext)." + (syntax-case x () + ((id msgctxt exp1 exp2 n) + (let* ((gettext-context-glue #\eot) ;as defined in gettext.h + (msgid1 (string-append (syntax->datum #'msgctxt) + (string gettext-context-glue) + (sexp->msgid (syntax->datum #'exp1)))) + ;; gettext.h implementation shows: msgctxt is only part of msgid1. + (msgid2 (sexp->msgid (syntax->datum #'exp2))) + (msgstr1 (car + (reverse + (string-split + (ngettext msgid1 msgid2 1) + gettext-context-glue)))) + (result (acons ;return an association list msgstr->deconstructed + ;; msgstr for n=1: + msgstr1 + `(,'unquote ,(deconstruct (syntax->datum #'exp1) + msgstr1)) + ;; other msgstr for n of each plural form: + (map + (lambda (n) + (let ((msgstr (car + (reverse + (string-split + (ngettext msgid1 msgid2 n) + gettext-context-glue))))) + (cons msgstr `(,'unquote + ,(deconstruct (syntax->datum #'exp2) + msgstr))))) + %plural-numbers)))) + (datum->syntax + #'id + `(,assoc-ref (,'quasiquote ,result) + (,car + (,reverse + (,string-split + (,ngettext ,msgid1 ,msgid2 ,(syntax->datum #'n)) + ,gettext-context-glue))))))))) + +(define %linguas + (with-input-from-file "po/LINGUAS" + (lambda _ + (let loop ((line (read-line))) + (if (eof-object? line) + '() + ;; else read linguas before comment + (let ((before-comment (car (string-split line #\#)))) + (append + (map match:substring (list-matches "[^ \t]+" before-comment)) + (loop (read-line))))))))) -- cgit v1.2.3-54-g00ecf