This experiment shows the use of an Application-Specific Language defined to help users author a TEI representation of a manuscript. This version of MTSS (Manuscript Transcription Simple Syntax) supports line breaks, sentence splitting, abbreviation expansion and word hyphenation.
The editor (with syntax and current sentence highlighting) is developed with CodeMirror.
(function() {
// noprotect
;
var current_sentence, editor, update_code;
CodeMirror.defineSimpleMode('mtss', {
start: [
{
regex: new RegExp('\\|\\|'),
token: 'sentence'
}, {
regex: new RegExp('(\\[)([^\\]]*)(\\])(\\()([^\\)]*)(\\))'),
token: ['choice_square', 'choice_abbr', 'choice_square', 'choice_round', 'choice_expan', 'choice_round']
}, {
regex: new RegExp('{{'),
token: 'w',
next: 'w'
}
],
w: [
{
regex: new RegExp('}}'),
token: 'w',
next: 'start'
}, {
regex: new RegExp('.'),
token: 'w_content'
}
]
});
editor = CodeMirror.fromTextArea(document.getElementById('editor'), {
mode: 'mtss',
lineNumbers: true,
lineWrapping: true
});
editor.on('change', function() {
return update_code();
});
update_code = function() {
var code_el, i, mtss, tei, _i, _j;
mtss = editor.getValue();
tei = '<s class="sentence">\n <lb/>' + mtss.replace(new RegExp('\n', 'g'), '\n <lb/>').replace(new RegExp('\\|\\|', 'g'), '\n</s>\n<s class="sentence">').replace(new RegExp('{{', 'g'), '<w>').replace(new RegExp('}}', 'g'), '</w>').replace(new RegExp('\\[([^\\]]*)\\]\\(([^\\)]*)\\)', 'g'), '<choice><abbr>$1</abbr><expan>$2</expan></choice>');
tei += '\n</s>';
for (i = _i = 1; _i <= 99; i = ++_i) {
tei = tei.replace('<lb/>', "<lb n=\"" + (d3.format('02d')(i)) + "\"/>");
}
for (i = _j = 1; _j <= 99; i = ++_j) {
tei = tei.replace('<s class="sentence">', "<s class=\"sentence\" n=\"s_" + (d3.format('02d')(i)) + "\">");
}
code_el = d3.select('#code > code');
code_el.text(tei);
return hljs.highlightBlock(code_el.node());
};
update_code();
current_sentence = null;
editor.on('cursorActivity', function() {
var cursor, from, search_cursor, to;
cursor = editor.getCursor();
search_cursor = editor.getSearchCursor('||', cursor);
search_cursor.findPrevious();
from = search_cursor.pos.to;
search_cursor.findNext();
to = search_cursor.pos.from;
if (current_sentence != null) {
current_sentence.clear();
}
return current_sentence = editor.markText(from, to, {
className: 'sentence_highlight'
});
});
}).call(this);
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<meta name="description" content="Manuscript Transcription Simple Syntax" />
<title>Manuscript Transcription Simple Syntax</title>
<link type="text/css" href="//cdnjs.cloudflare.com/ajax/libs/codemirror/4.6.0/codemirror.min.css" rel="stylesheet"/>
<link type="text/css" href="//cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/styles/github.min.css" rel="stylesheet"/>
<link type="text/css" href="index.css" rel="stylesheet"/>
<script src="//d3js.org/d3.v3.min.js"></script>
<script src="//cdnjs.cloudflare.com/ajax/libs/codemirror/4.6.0/codemirror.min.js"></script>
<script src="//wafi.iit.cnr.it/webvis/tmp/codemirror_mode_simple.js"></script>
<script src="//cdnjs.cloudflare.com/ajax/libs/codemirror/4.6.0/addon/search/searchcursor.min.js"></script>
</head>
<body>
<textarea id="editor">This is a sample text written in MTSS (Manuscript
Transcription Simple Syntax), a simple language that
can be automatically translated into TEI.||Sentences can be
terminated with double pipes.||Line breaks are simply
defined by inserting
newline characters.||A word that's splitted by a line break
can be marked by using two curly braces, as in this {{exam
ple}}.||Abbreviated words can be annotated with the
corresponding expansion by using a combination of square
and round brackets: [abbr.](expansion).||
Play with this code to see how the TEI is updated.</textarea>
<pre id="code"><code class="xml"></code></pre>
<script src="//cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/highlight.min.js"></script>
<script src="index.js"></script>
</body>
</html>
`
// noprotect
`
# MTSS EDITOR
CodeMirror.defineSimpleMode('mtss', {
start: [
{regex: new RegExp('\\|\\|'), token: 'sentence'},
{regex: new RegExp('(\\[)([^\\]]*)(\\])(\\()([^\\)]*)(\\))'), token: ['choice_square','choice_abbr','choice_square','choice_round','choice_expan','choice_round']},
{regex: new RegExp('{{'), token: 'w', next: 'w'}
],
w: [
{regex: new RegExp('}}'), token: 'w', next: 'start'},
{regex: new RegExp('.'), token: 'w_content'}
]
})
editor = CodeMirror.fromTextArea document.getElementById('editor'), {
mode: 'mtss',
lineNumbers: true,
lineWrapping: true
}
# TEI translation
editor.on 'change', () -> update_code()
update_code = () ->
mtss = editor.getValue()
# opening sentence in the first folio
tei = '<s class="sentence">\n <lb/>' + mtss
# line break
.replace(new RegExp('\n','g'), '\n <lb/>')
# sentence end mark
.replace(new RegExp('\\|\\|','g'), '\n</s>\n<s class="sentence">')
# <w> tag
.replace(new RegExp('{{','g'), '<w>')
.replace(new RegExp('}}','g'), '</w>')
# <choice> tag
.replace(new RegExp('\\[([^\\]]*)\\]\\(([^\\)]*)\\)','g'), '<choice><abbr>$1</abbr><expan>$2</expan></choice>')
# closing sentence in the last folio
tei += '\n</s>'
# lb numbering
for i in [1..99] # WARNING this is a dirty hack
tei = tei
.replace('<lb/>', "<lb n=\"#{d3.format('02d')(i)}\"/>")
# s numbering
for i in [1..99] # WARNING this is a dirty hack
tei = tei
.replace('<s class="sentence">', "<s class=\"sentence\" n=\"s_#{d3.format('02d')(i)}\">")
code_el = d3.select('#code > code')
code_el.text(tei)
# update syntax highlighting
hljs.highlightBlock(code_el.node())
update_code()
# Sentence highlighting
current_sentence = null
editor.on 'cursorActivity', () ->
cursor = editor.getCursor()
search_cursor = editor.getSearchCursor('||', cursor)
search_cursor.findPrevious()
from = search_cursor.pos.to
search_cursor.findNext()
to = search_cursor.pos.from
if current_sentence?
current_sentence.clear()
current_sentence = editor.markText(from, to, {className: 'sentence_highlight'})
svg {
background: white;
}
.cm-sentence{
font-weight: bold;
color: #F70;
}
.cm-choice_square, .cm-choice_round {
font-weight: bold;
color: #07F;
}
.cm-w {
font-weight: bold;
color: #092;
}
.cm-w_content {
color: #092;
}
.cm-choice_expan {
font-style: italic;
color: #777;
}
.cm-sentence-2 {
background: yellow;
}
#editor {
flex: 1;
}
.CodeMirror {
flex: 1;
height: 500px;
line-height: normal;
}
#code {
margin: 0;
border-left: 2px solid gray;
background: #EEE;
white-space: pre-wrap;
overflow-y: scroll;
height: 500px;
flex: 1;
}
body {
display: -webkit-box; /* OLD - iOS 6-, Safari 3.1-6 */
display: -moz-box; /* OLD - Firefox 19- (buggy but mostly works) */
display: -ms-flexbox; /* TWEENER - IE 10 */
display: -webkit-flex; /* NEW - Chrome */
display: flex; /* NEW, Spec - Opera 12.1, Firefox 20+ */
-ms-flex-flow: row;
-webkit-flex-flow: row;
flex-flow: row;
}
.sentence_highlight {
background: rgba(255,255,0,0.15);
}