block by nitaku 1fc26ae44b5865700d22

Arc Diagram III

Full Screen

Like the previous example, but with the option to provide a custom string. The application relies on a server-side computation that uses Oliver Mader’s code to compute the essential matching pairs.

The example shows the first 93 digits of PI.

A translucent white stroke has also been introduced to avoid mistaking palindrome repetition of matching subsequences with large subsequences (e.g. 793238 and 383279 on the left side of the visualization).

index.js

// Generated by CoffeeScript 1.10.0
(function() {
  var height, redraw, svg, width;

  svg = d3.select('svg');

  width = svg.node().getBoundingClientRect().width;

  height = svg.node().getBoundingClientRect().height;

  svg.attr({
    viewBox: (-width / 2) + " " + (-height + 60) + " " + width + " " + height
  });

  width -= 60;

  redraw = function() {
    var j, ref, results, sequence, x;
    sequence = d3.select('#sequence_input').node().value;
    x = d3.scale.ordinal().domain((function() {
      results = [];
      for (var j = 0, ref = sequence.length; 0 <= ref ? j <= ref : j >= ref; 0 <= ref ? j++ : j--){ results.push(j); }
      return results;
    }).apply(this)).rangeBands([-width / 2, width / 2]);
    return d3.json("main.php?sequence=" + sequence, function(matches) {
      var arcs, characters, enter_arcs, subsequence;
      svg.selectAll('.character').remove();
      svg.selectAll('.arc').remove();
      svg.select('.subsequence').remove();
      characters = svg.selectAll('.character').data(sequence);
      characters.enter().append('text').text(function(d) {
        return d;
      }).attr({
        "class": 'character',
        x: function(d, i) {
          return x(i) + x.rangeBand() / 2;
        },
        dy: '1em'
      });
      arcs = svg.selectAll('.arc').data(matches.filter(function(m) {
        return m.length > 1;
      }));
      enter_arcs = arcs.enter().append('path').attr({
        "class": 'arc',
        d: function(match) {
          var big_radius, end_left, end_right, small_radius, start_left, start_right;
          start_left = x(match.source);
          start_right = x(match.source + match.length);
          end_left = x(match.target);
          end_right = x(match.target + match.length);
          big_radius = (end_right - start_left) / 2;
          small_radius = (end_left - start_right) / 2;
          return "M" + start_left + " 0 A" + big_radius + " " + big_radius + " 0 1 1 " + end_right + " 0 L" + end_left + " 0 A" + small_radius + " " + small_radius + " 0 1 0 " + start_right + " 0  z";
        }
      });
      subsequence = svg.append('text').attr({
        "class": 'subsequence',
        y: -350
      });
      enter_arcs.on('click', function(d1) {
        subsequence.text(d1.subsequence);
        svg.selectAll('.arc').classed('selected', function(d2) {
          return d1.subsequence === d2.subsequence;
        }).classed('dimmed', function(d2) {
          return d1.subsequence !== d2.subsequence;
        });
        return d3.event.stopPropagation();
      });
      return svg.on('click', function() {
        subsequence.text('');
        return svg.selectAll('.arc').classed('selected', false).classed('dimmed', false);
      });
    });
  };

  redraw();

  d3.select('#sequence_input').on('change', function() {
    console.log('change');
    return redraw();
  });

}).call(this);

index.html

<!DOCTYPE html>
<html>
<head>
  <meta charset="utf-8">
  <title>Arc Diagram III</title>
  <link type="text/css" href="index.css" rel="stylesheet"/>
  <script src="//d3js.org/d3.v3.min.js"></script>
</head>
<body>
  <svg></svg>
  <div id="input_box">
    <label for="sequence_input">Sequence to be analyzed (press ENTER to redraw):</label>
    <input id="sequence_input" type="text" value="314159265358979323846264338327950288419716939937510582097494459230781640628620899862803482534">
  </div>
  <script src="index.js"></script>
</body>
</html>

algorithm.py

# Copyright (C) 2013 Oliver Mader <b52@reaktor42.de>

# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:

# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.

# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.

# -*- coding: utf-8 -*-

import naive_algorithm

from collections import defaultdict
from itertools import product, islice

from suffix_tree import SuffixTree

__all__ = [
    'maximal_matching_pairs',
    'repetition_regions',
    'essential_matching_pairs'
]


def maximal_matching_pairs(tree):
    """
    Find all substring pairs fulfilling the properties specified in
    definition 1, namely _identical_, _non-overlapping_, _consecutive_ and
    _maximal_.

    Args:
        tree (SuffixTree): A suffix tree, build from the string to be searched.

    Returns:
        generator. A generator of tuples, each describing one matching pair as
                   composed of the start of the first and the second substring,
                   as well as the length of the substring.
    """
    s = tree.string
    substrings = defaultdict(set)

    # get all substring starts repeated at least once
    for leaf in tree.leaves:
        node = leaf.parent
        end = leaf.start
        while node is not None and \
              (node.pathLabel not in substrings or end - len(node.pathLabel) \
                      not in substrings[node.pathLabel]) and \
              node.edgeLabel != '':
            for i in range(len(node.pathLabel)):
                substrings[node.pathLabel[:i+1]].add(end - len(node.pathLabel))
            end -= len(node.edgeLabel)
            node = node.parent

    def contained(x, y, l, largers):
        for large in largers:
            starts = substrings[large]
            idx = list(find_all(large, sub))
            for i,j in product(idx, idx):
                if (x - i) + len(large) <= (y - j) and (x - i) in starts and \
                   (y - j) in starts:
                    return True
        return False

    # apply constraints and yield legit pairs
    for sub in sorted(substrings, key=len, reverse=True):
        starts = sorted(substrings[sub])
        l = len(sub)
        cs = [k for k in substrings if len(k) > len(sub) and
              k.startswith(sub) and k.endswith(sub)]

        for x, y in zip(starts, islice(starts, 1, None)):
            # overlapping
            if x + l > y:
                continue

            # non maximal: left and right expansion
            if (x > 0 and x + l < y and s[x-1] == s[y-1]) or \
               (y + l < len(s) and x + l < y and s[x+l] == s[y+l]):
                continue

            # not maximal: inner and outer expansion
            if contained(x, y, l, cs):
                continue

            yield x, y, l


def repetition_regions(tree):
    """
    Find all repetition regions as specified in definition 2 and the following
    further limiting rules:

    2.1) _Minimal_: There does not exist any other repetition region R'
         containing R, with the fundamental substring P' containing P.
    2.2) _Leftmost_: There doesn't exist another repetition region R',
         originating from R shifted one character to the left, with equal
         length of the region and of the fundamental substring.

    Args:
        tree (SuffixTree): A suffix tree, build from the string to be searched.

    Returns:
        list. A list of tuples, each describing one repetition region build
              from multiple matching pairs. The tuples contain the start of
              the region, the end of the region not inclusive and the length
              of the fundamental substring, respectively.
    """
    return naive_algorithm.repetition_regions(tree.string)

    s = tree.string
    substrings = defaultdict(set)

    for leaf in tree.leaves:
        node = leaf.parent
        end = leaf.start
        while node is not None and \
              (node.pathLabel not in substrings or end - len(node.pathLabel) \
                      not in substrings[node.pathLabel]) and \
              node.edgeLabel != '':
            for i in range(len(node.pathLabel)):
                substrings[node.pathLabel[:i+1]].add(end - len(node.pathLabel))
            end -= len(node.edgeLabel)
            node = node.parent

    regions = []

    for sub in sorted(substrings, key=len):
        starts = sorted(substrings[sub])
        l = len(sub)

        a = 0
        while a < len(starts) - 1:
            f = starts[a]
            e = f + l
            b = a + 1
            while b < len(starts) and starts[b] - starts[b - 1] == l:
                e += l
                b += 1

            if b - a > 1:
                regions.append((f, e, l))

            a = b

    return regions


def essential_matching_pairs(string):
    """
    Find all essential matching pairs as specified in definition 3. These
    pairs might be used to build arc diagrams from.

    Args:
        string (str): The string to be searched.

    Returns:
        generator. Yields tuples, each describing one matching pair as composed
                   of the start of the first and the second substring, as well
                   as the length of the substring.
    """
    tree = SuffixTree(string)

    regions = repetition_regions(tree)

    # definition 3.1 and 3.2
    for x,y,l in maximal_matching_pairs(tree):
        if not any(x >= r and y + l <= e for r,e,_ in regions) or \
           any(int((x - r)/f) == int((y + l - r - 1)/f) for r,_,f in regions):
            yield x, y, l

    # definition 3.3
    for r,e,l in regions:
        for x in range(r, e - l, l):
            yield x, x + l, l


def children(node):
    """ Returns a generator to iterate all direct node children. """
    c = node.firstChild
    while c is not None:
        yield c
        c = c.next


def find_all(a_str, sub):
    """ Yield all occurences of a substring. """
    start = 0
    while True:
        start = a_str.find(sub, start)
        if start == -1: return
        yield start
        start += 1

# vim: set expandtab shiftwidth=4 softtabstop=4 textwidth=79:

algorithm.pyc

�
!SVc@s�ddlZddlmZddlmZmZddlmZdddgZd�Z	d	�Z
d
�Zd�Zd�Z
dS(
i����N(tdefaultdict(tproducttislice(t
SuffixTreetmaximal_matching_pairstrepetition_regionstessential_matching_pairsc
#s�|j}tt��x�|jD]�}|j}|j}x�|dk	r�|j�ksu|t|j��|jkr�|j	dkr�xFt
t|j��D]/}�|j|d j|t|j��q�W|t|j	�8}|j}q:WqW��fd�}xvt�dtdt
�D]\�t���}t��}g�D]B}	t|	�t��krE|	j��rE|	j��rE|	^qE}
x�t|t|dd��D]�\}}|||kr�q�n|dkr|||kr||d||dks�||t|�krK|||krK||||||krKq�n|||||
�rfq�n|||fVq�WqWdS(s�
    Find all substring pairs fulfilling the properties specified in
    definition 1, namely _identical_, _non-overlapping_, _consecutive_ and
    _maximal_.

    Args:
        tree (SuffixTree): A suffix tree, build from the string to be searched.

    Returns:
        generator. A generator of tuples, each describing one matching pair as
                   composed of the start of the first and the second substring,
                   as well as the length of the substring.
    tic	s�x�|D]�}�|}tt|���}x_t||�D]N\}}||t|�||kr<|||kr<|||kr<tSq<WqWtS(N(tlisttfind_allRtlentTruetFalse(	txtytltlargerstlargetstartstidxtitj(tsubt
substrings(s8/var/www/lab/workspace/1fc26ae44b5865700d22/algorithm.pyt	containedCs

.tkeytreverseiN(tstringRtsettleavestparenttstarttNonet	pathLabelR
t	edgeLabeltrangetaddtsortedRt
startswithtendswithtzipR(
ttreetstleaftnodetendRRRRtktcsR
R((RRs8/var/www/lab/workspace/1fc26ae44b5865700d22/algorithm.pyR%s8			-%*(8Bc	Cs�tj|j�S|j}tt�}x�|jD]�}|j}|j}x�|dk	r�|j	|ks�|t
|j	�||j	kr�|jdkr�xFtt
|j	��D]/}||j	|d j
|t
|j	��q�W|t
|j�8}|j}qJWq/Wg}x�t|dt
�D]�}t||�}	t
|�}
d}x�|t
|	�dkr�|	|}||
}
|d}xF|t
|	�kr�|	||	|d|
kr�|
|
7}
|d7}q|W||dkr�|j||
|
f�n|}qEWqW|S(sY
    Find all repetition regions as specified in definition 2 and the following
    further limiting rules:

    2.1) _Minimal_: There does not exist any other repetition region R'
         containing R, with the fundamental substring P' containing P.
    2.2) _Leftmost_: There doesn't exist another repetition region R',
         originating from R shifted one character to the left, with equal
         length of the region and of the fundamental substring.

    Args:
        tree (SuffixTree): A suffix tree, build from the string to be searched.

    Returns:
        list. A list of tuples, each describing one repetition region build
              from multiple matching pairs. The tuples contain the start of
              the region, the end of the region not inclusive and the length
              of the fundamental substring, respectively.
    RiRiN(tnaive_algorithmRRRRRRRR R!R
R"R#R$R%tappend(R)R*RR+R,R-RtregionsRRRtatftetb((s8/var/www/lab/workspace/1fc26ae44b5865700d22/algorithm.pyRes<			-


1
c#s�t|�}t|�}xst|�D]e\���t���fd�|D��syt���fd�|D��r%���fVq%q%WxJ|D]B\}}�x0t||���D]�����fVq�Wq�WdS(s�
    Find all essential matching pairs as specified in definition 3. These
    pairs might be used to build arc diagrams from.

    Args:
        string (str): The string to be searched.

    Returns:
        generator. Yields tuples, each describing one matching pair as composed
                   of the start of the first and the second substring, as well
                   as the length of the substring.
    c3s4|]*\}}}�|ko+��|kVqdS(N((t.0trR5t_(RR
R(s8/var/www/lab/workspace/1fc26ae44b5865700d22/algorithm.pys	<genexpr>�sc3sH|]>\}}}t�||�t��|d|�kVqdS(iN(tint(R7R8R9R4(RR
R(s8/var/www/lab/workspace/1fc26ae44b5865700d22/algorithm.pys	<genexpr>�sN(RRRtanyR#(RR)R2R8R5((RR
Rs8/var/www/lab/workspace/1fc26ae44b5865700d22/algorithm.pyR�s
#"ccs.|j}x|dk	r)|V|j}qWdS(s: Returns a generator to iterate all direct node children. N(t
firstChildR tnext(R,tc((s8/var/www/lab/workspace/1fc26ae44b5865700d22/algorithm.pytchildren�s	ccsHd}x;trC|j||�}|dkr1dS|V|d7}q	WdS(s& Yield all occurences of a substring. ii����Ni(Rtfind(ta_strRR((s8/var/www/lab/workspace/1fc26ae44b5865700d22/algorithm.pyR	�s	(R0tcollectionsRt	itertoolsRRtsuffix_treeRt__all__RRRR?R	(((s8/var/www/lab/workspace/1fc26ae44b5865700d22/algorithm.pyt<module>s		@	<		

index.coffee

svg = d3.select('svg')
width = svg.node().getBoundingClientRect().width
height = svg.node().getBoundingClientRect().height

# translate the viewBox to have (0,0) at the center of the vis
svg
 .attr
   viewBox: "#{-width/2} #{-height+60} #{width} #{height}"
  
width -= 60

redraw = () ->
  sequence = d3.select('#sequence_input').node().value
    
  x = d3.scale.ordinal()
    .domain([0..sequence.length])
    .rangeBands([-width/2,width/2])

  d3.json "main.php?sequence=#{sequence}", (matches) ->
    svg.selectAll('.character').remove()
    svg.selectAll('.arc').remove()
    svg.select('.subsequence').remove()
    
    characters = svg.selectAll('.character')
      .data(sequence)

    characters.enter().append('text')
      .text (d) -> d
      .attr
        class: 'character'
        x: (d,i) -> x(i) + x.rangeBand()/2
        dy: '1em'

    arcs = svg.selectAll('.arc')
      .data(matches.filter (m) -> m.length > 1)

    enter_arcs = arcs.enter().append('path')
      .attr
        class: 'arc'
        d: (match) ->
          start_left = x(match.source)
          start_right = x(match.source + match.length)
          end_left = x(match.target)
          end_right = x(match.target + match.length)
          big_radius = (end_right-start_left)/2
          small_radius = (end_left-start_right)/2
          return "M#{start_left} 0 A#{big_radius} #{big_radius} 0 1 1 #{end_right} 0 L#{end_left} 0 A#{small_radius} #{small_radius} 0 1 0 #{start_right} 0  z"

    # subsequence selection
    subsequence = svg.append 'text'
      .attr
        class: 'subsequence'
        y: -350

    enter_arcs.on 'click', (d1) ->
      subsequence
        .text d1.subsequence

      svg.selectAll('.arc')
        .classed 'selected', (d2) -> d1.subsequence is d2.subsequence
        .classed 'dimmed', (d2) -> d1.subsequence isnt d2.subsequence

      d3.event.stopPropagation()

    svg.on 'click', () ->
      subsequence
        .text ''

      svg.selectAll('.arc')
        .classed 'selected', false
        .classed 'dimmed', false

redraw()

d3.select('#sequence_input').on 'change', () ->
  console.log 'change'
  redraw()
  

index.css

body, html {
  padding: 0;
  margin: 0;
  height: 100%;
  font-family: sans-serif;
  font-size: 12px;
}
svg {
  width: 100%;
  height: 100%;
  background: white;
}
.character {
  text-anchor: middle;
  font-size: 16px;
  font-family: Georgia;
}
.arc {
  opacity: 0.2;
  cursor: pointer;
  stroke: white
}
.arc.selected {
  fill: steelblue;
  opacity: 0.4;
}
.arc.dimmed {
  opacity: 0.07;
}

.subsequence {
  fill: steelblue;
  text-anchor: middle;
  font-size: 40px;
  font-family: Georgia;
  text-transform: uppercase;
}
#input_box {
  position: absolute;
  top: 6px;
  left: 6px;
  width: 100%
}
#sequence_input {
  width: 50%;
}

main.php

<?php
  echo shell_exec('python main.py ' . $_GET['sequence']);
?>

main.py

import sys
import algorithm as arc
import json

sequence = sys.argv[1]

matches = arc.essential_matching_pairs(sequence)

result = [{'source': match[0], 'target': match[1], 'length': match[2], 'subsequence': sequence[match[0]:match[0]+match[2]]} for match in matches if match[2] > 1]

print json.dumps(result)

naive_algorithm.py

# Copyright (C) 2013 Oliver Mader <b52@reaktor42.de>

# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:

# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.

# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.

# -*- coding: utf-8 -*-

def maximal_matching_pairs(string):
    """
    Find all substring pairs fulfilling the properties specified in
    definition 1, namely _identical_, _non-overlapping_, _consecutive_ and
    _maximal_.

    Args:
        string (str): The string to be searched.

    Returns:
        generator. A generator of tuples, each describing one matching pair as
                   composed of the start of the first and the second substring,
                   as well as the length of the substring.
    """
    n = len(string)
    for x in range(0, n - 1):
        for l in range(int((n - x)/2) + 1, 0, -1):
            c = string[x:x+l]
            y = string.find(c, x + 1)

            # not found or not consecutive
            if y == -1 or y < x + l:
                continue

            # not maximal: left or right expansion
            if (y + l < n and x + l < y and string[x+l] == string[y+l]) or \
               (x - 1 >= 0 and x + l < y and string[x-1] == string[y-1]):
                continue

            # not maximal: inner expansion
            if any(string[x:x+l+i] == string[y-i:y+l]
                   for i in range(1, (y - x - l)/2 + 1)):
                continue

            # not maximal: outer expansion
            if any(string[x-i:x+l] == string[y:y+l+i]
                   for i in range(1, max(x, n - y - l) + 1)):
                continue

            yield x, y, l


def repetition_regions(string):
    """
    Find all repetition regions as specified in definition 2 and the following
    further limiting rules:

    2.1) _Minimal_: There doesn't exist any other repetition region R'
         containing R, with an equal or smaller fundamental substring P',
         contained in P.
    2.2) _Leftmost_: There doesn't exist another repetition region R',
         originating from R shifted one character to the left, with equal
         length of the region and of the fundamental substring.

    Args:
        string (str): The string to be searched.

    Returns:
        list. A list of tuples, each describing one repetition region build
              from multiple matching pairs. The tuples contain the start of
              the region, the end of the region not inclusive and the length
              of the fundamental substring, respectively.
    """
    n = len(string)
    s = 0
    regions = []
    while s < n - 1:
        for l in range(1, (n - s)/2 + 1):
            candidate = string[s:s+l]

            end = s + l
            while string.find(candidate, end, end + l) == end:
                end += l

            # not enough pairs
            if end == s + l:
                continue

            # not left most
            if (s-1, end-1, len(candidate)) in regions:
                continue

            # not minimal
            if any(r[0] <= s and r[1] >= end and r[2] <= len(candidate)
                   for r in regions):
                continue

            regions.append((s, end, len(candidate)))
        s += 1
    return regions


def essential_matching_pairs(string):
    """
    Find all essential matching pairs as specified in definition 3. These
    pairs might be used to build arc diagrams from.

    Args:
        string (str): The string to be searched.

    Returns:
        generator. Yields tuples, each describing one matching pair as composed
                   of the start of the first and the second substring, as well
                   as the length of the substring.
    """
    regions = repetition_regions(string)

    # definition 3.1 and 3.2
    for x,y,l in maximal_matching_pairs(string):
        if not any(x >= r and y + l <= e for r,e,_ in regions) or \
           any(int((x - r)/f) == int((y + l - r - 1)/f) for r,_,f in regions):
            yield (x, y, l)

    # definition 3.3
    for r,e,l in regions:
        for x in range(r, e - l, l):
            yield (x, x + l, l)

# vim: set expandtab shiftwidth=4 softtabstop=4 textwidth=79:

naive_algorithm.pyc

�
!SVc@sd�Zd�Zd�ZdS(c#s�t��}x�td|d�D]��x�tt|�d�ddd�D]h�����!}�j|�d���dksK���kr�qKn��|kr����kr�������ksK�ddkr���kr��d��dkrqKnt����fd�td���dd�D��r\qKnt����fd�tdt�|���d�D��r�qKn���fVqKWq WdS(s�
    Find all substring pairs fulfilling the properties specified in
    definition 1, namely _identical_, _non-overlapping_, _consecutive_ and
    _maximal_.

    Args:
        string (str): The string to be searched.

    Returns:
        generator. A generator of tuples, each describing one matching pair as
                   composed of the start of the first and the second substring,
                   as well as the length of the substring.
    iiii����c3s9|]/}����|!��|��!kVqdS(N((t.0ti(tltstringtxty(s>/var/www/lab/workspace/1fc26ae44b5865700d22/naive_algorithm.pys	<genexpr>5sc3s9|]/}��|��!����|!kVqdS(N((RR(RRRR(s>/var/www/lab/workspace/1fc26ae44b5865700d22/naive_algorithm.pys	<genexpr>:sN(tlentrangetinttfindtanytmax(Rtntc((RRRRs>/var/www/lab/workspace/1fc26ae44b5865700d22/naive_algorithm.pytmaximal_matching_pairss"+<<&+cs1t|�}d�g}x�|dkr,x�td|�dd�D]�}|��|!��|�x-|j���|��kr��|7�qkW��|kr�qGn�d�dt��f|kr�qGnt���fd�|D��r�qGn|j��t��f�qGW�d7�qW|S(sU
    Find all repetition regions as specified in definition 2 and the following
    further limiting rules:

    2.1) _Minimal_: There doesn't exist any other repetition region R'
         containing R, with an equal or smaller fundamental substring P',
         contained in P.
    2.2) _Leftmost_: There doesn't exist another repetition region R',
         originating from R shifted one character to the left, with equal
         length of the region and of the fundamental substring.

    Args:
        string (str): The string to be searched.

    Returns:
        list. A list of tuples, each describing one repetition region build
              from multiple matching pairs. The tuples contain the start of
              the region, the end of the region not inclusive and the length
              of the fundamental substring, respectively.
    iiic3sE|];}|d�ko<|d�ko<|dt��kVqdS(iiiN(R(Rtr(t	candidatetendts(s>/var/www/lab/workspace/1fc26ae44b5865700d22/naive_algorithm.pys	<genexpr>js(RRR	R
tappend(RRtregionsR((RRRs>/var/www/lab/workspace/1fc26ae44b5865700d22/naive_algorithm.pytrepetition_regionsAs&"
"#
 c#s�t|�}xst|�D]e\���t���fd�|D��smt���fd�|D��r���fVqqWxJ|D]B\}}�x0t||���D]�����fVq�Wq�WdS(s�
    Find all essential matching pairs as specified in definition 3. These
    pairs might be used to build arc diagrams from.

    Args:
        string (str): The string to be searched.

    Returns:
        generator. Yields tuples, each describing one matching pair as composed
                   of the start of the first and the second substring, as well
                   as the length of the substring.
    c3s4|]*\}}}�|ko+��|kVqdS(N((RRtet_(RRR(s>/var/www/lab/workspace/1fc26ae44b5865700d22/naive_algorithm.pys	<genexpr>�sc3sH|]>\}}}t�||�t��|d|�kVqdS(iN(R(RRRtf(RRR(s>/var/www/lab/workspace/1fc26ae44b5865700d22/naive_algorithm.pys	<genexpr>�sN(RRR
R(RRRR((RRRs>/var/www/lab/workspace/1fc26ae44b5865700d22/naive_algorithm.pytessential_matching_pairsss
#"N(RRR(((s>/var/www/lab/workspace/1fc26ae44b5865700d22/naive_algorithm.pyt<module>s	*	2