block by nitaku 9135a0f974d115e83e90

Manuscript Transcription Simple Syntax

Full Screen

This experiment shows the use of an Application-Specific Language defined to help users author a TEI representation of a manuscript. This version of MTSS (Manuscript Transcription Simple Syntax) supports line breaks, sentence splitting, abbreviation expansion and word hyphenation.

The editor (with syntax and current sentence highlighting) is developed with CodeMirror.

index.js

(function() {
  
// noprotect
;
  var current_sentence, editor, update_code;

  CodeMirror.defineSimpleMode('mtss', {
    start: [
      {
        regex: new RegExp('\\|\\|'),
        token: 'sentence'
      }, {
        regex: new RegExp('(\\[)([^\\]]*)(\\])(\\()([^\\)]*)(\\))'),
        token: ['choice_square', 'choice_abbr', 'choice_square', 'choice_round', 'choice_expan', 'choice_round']
      }, {
        regex: new RegExp('{{'),
        token: 'w',
        next: 'w'
      }
    ],
    w: [
      {
        regex: new RegExp('}}'),
        token: 'w',
        next: 'start'
      }, {
        regex: new RegExp('.'),
        token: 'w_content'
      }
    ]
  });

  editor = CodeMirror.fromTextArea(document.getElementById('editor'), {
    mode: 'mtss',
    lineNumbers: true,
    lineWrapping: true
  });

  editor.on('change', function() {
    return update_code();
  });

  update_code = function() {
    var code_el, i, mtss, tei, _i, _j;

    mtss = editor.getValue();
    tei = '<s class="sentence">\n    <lb/>' + mtss.replace(new RegExp('\n', 'g'), '\n    <lb/>').replace(new RegExp('\\|\\|', 'g'), '\n</s>\n<s class="sentence">').replace(new RegExp('{{', 'g'), '<w>').replace(new RegExp('}}', 'g'), '</w>').replace(new RegExp('\\[([^\\]]*)\\]\\(([^\\)]*)\\)', 'g'), '<choice><abbr>$1</abbr><expan>$2</expan></choice>');
    tei += '\n</s>';
    for (i = _i = 1; _i <= 99; i = ++_i) {
      tei = tei.replace('<lb/>', "<lb n=\"" + (d3.format('02d')(i)) + "\"/>");
    }
    for (i = _j = 1; _j <= 99; i = ++_j) {
      tei = tei.replace('<s class="sentence">', "<s class=\"sentence\" n=\"s_" + (d3.format('02d')(i)) + "\">");
    }
    code_el = d3.select('#code > code');
    code_el.text(tei);
    return hljs.highlightBlock(code_el.node());
  };

  update_code();

  current_sentence = null;

  editor.on('cursorActivity', function() {
    var cursor, from, search_cursor, to;

    cursor = editor.getCursor();
    search_cursor = editor.getSearchCursor('||', cursor);
    search_cursor.findPrevious();
    from = search_cursor.pos.to;
    search_cursor.findNext();
    to = search_cursor.pos.from;
    if (current_sentence != null) {
      current_sentence.clear();
    }
    return current_sentence = editor.markText(from, to, {
      className: 'sentence_highlight'
    });
  });

}).call(this);

index.html

<!DOCTYPE html>
<html>
	<head>
        <meta charset="utf-8">
        <meta name="description" content="Manuscript Transcription Simple Syntax" />
        <title>Manuscript Transcription Simple Syntax</title>
        <link type="text/css" href="//cdnjs.cloudflare.com/ajax/libs/codemirror/4.6.0/codemirror.min.css" rel="stylesheet"/>
        <link type="text/css" href="//cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/styles/github.min.css" rel="stylesheet"/>
        <link type="text/css" href="index.css" rel="stylesheet"/>
        <script src="//d3js.org/d3.v3.min.js"></script>
        <script src="//cdnjs.cloudflare.com/ajax/libs/codemirror/4.6.0/codemirror.min.js"></script>
        <script src="//wafi.iit.cnr.it/webvis/tmp/codemirror_mode_simple.js"></script>
        <script src="//cdnjs.cloudflare.com/ajax/libs/codemirror/4.6.0/addon/search/searchcursor.min.js"></script>
	</head>
	<body>
      <textarea id="editor">This is a sample text written in MTSS (Manuscript
Transcription Simple Syntax), a simple language that
can be automatically translated into TEI.||Sentences can be
terminated with double pipes.||Line breaks are simply
defined by inserting
newline characters.||A word that's splitted by a line break
can be marked by using two curly braces, as in this {{exam
ple}}.||Abbreviated words can be annotated with the
corresponding expansion by using a combination of square
and round brackets: [abbr.](expansion).||

Play with this code to see how the TEI is updated.</textarea>
      <pre id="code"><code class="xml"></code></pre>
      <script src="//cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/highlight.min.js"></script>
      <script src="index.js"></script>
	</body>
</html>

index.coffee

`
// noprotect
`

# MTSS EDITOR

CodeMirror.defineSimpleMode('mtss', {
  start: [
    {regex: new RegExp('\\|\\|'), token: 'sentence'},
    {regex: new RegExp('(\\[)([^\\]]*)(\\])(\\()([^\\)]*)(\\))'), token: ['choice_square','choice_abbr','choice_square','choice_round','choice_expan','choice_round']},
    {regex: new RegExp('{{'), token: 'w', next: 'w'}
  ],
  w: [
    {regex: new RegExp('}}'), token: 'w', next: 'start'},
    {regex: new RegExp('.'), token: 'w_content'}
  ]
})


editor = CodeMirror.fromTextArea document.getElementById('editor'), {
  mode: 'mtss',
  lineNumbers: true,
  lineWrapping: true
}


# TEI translation

editor.on 'change', () -> update_code()
  
update_code = () ->
  mtss = editor.getValue()
  
        # opening sentence in the first folio
  tei = '<s class="sentence">\n    <lb/>' + mtss
    # line break
    .replace(new RegExp('\n','g'), '\n    <lb/>')
    # sentence end mark
    .replace(new RegExp('\\|\\|','g'), '\n</s>\n<s class="sentence">')
    # <w> tag
    .replace(new RegExp('{{','g'), '<w>')
    .replace(new RegExp('}}','g'), '</w>')
    # <choice> tag
    .replace(new RegExp('\\[([^\\]]*)\\]\\(([^\\)]*)\\)','g'), '<choice><abbr>$1</abbr><expan>$2</expan></choice>')
    
  # closing sentence in the last folio
  tei += '\n</s>'
    
  # lb numbering
  for i in [1..99] # WARNING this is a dirty hack
    tei = tei
      .replace('<lb/>', "<lb n=\"#{d3.format('02d')(i)}\"/>")
  
  # s numbering
  for i in [1..99] # WARNING this is a dirty hack
    tei = tei
      .replace('<s class="sentence">', "<s class=\"sentence\" n=\"s_#{d3.format('02d')(i)}\">")
      
  code_el = d3.select('#code > code')
  
  code_el.text(tei)
    
  # update syntax highlighting
  hljs.highlightBlock(code_el.node())
  
update_code()


# Sentence highlighting

current_sentence = null

editor.on 'cursorActivity', () ->
  cursor = editor.getCursor()
  
  search_cursor = editor.getSearchCursor('||', cursor)
  
  search_cursor.findPrevious()
  from = search_cursor.pos.to
  
  search_cursor.findNext()
  to = search_cursor.pos.from
  
  if current_sentence?
      current_sentence.clear()
  
  current_sentence = editor.markText(from, to, {className: 'sentence_highlight'})
  

index.css

svg {
  background: white;
}
.cm-sentence{
  font-weight: bold;
  color: #F70;
}
.cm-choice_square, .cm-choice_round {
  font-weight: bold;
  color: #07F;
}
.cm-w {
  font-weight: bold;
  color: #092;
}
.cm-w_content {
  color: #092;
}
.cm-choice_expan {
  font-style: italic;
  color: #777;
}
.cm-sentence-2 {
  background: yellow;
}

#editor {
  flex: 1;
}
.CodeMirror {
  flex: 1;
  height: 500px;
  line-height: normal;
}
#code {
  margin: 0;
  border-left: 2px solid gray;
  background: #EEE;
  white-space: pre-wrap;
  overflow-y: scroll;
  
  height: 500px;
  flex: 1;
}

body {
  display: -webkit-box;      /* OLD - iOS 6-, Safari 3.1-6 */
  display: -moz-box;         /* OLD - Firefox 19- (buggy but mostly works) */
  display: -ms-flexbox;      /* TWEENER - IE 10 */
  display: -webkit-flex;     /* NEW - Chrome */
  display: flex;             /* NEW, Spec - Opera 12.1, Firefox 20+ */
  
  -ms-flex-flow: row;
  -webkit-flex-flow: row;
  flex-flow: row;
}

.sentence_highlight {
  background: rgba(255,255,0,0.15);
}