将文本拆分为相等长度的字符串,保持字不变

时间:2023-01-14 21:40:08

I have this code that break longer lines into array of equal length strings keeping words it also take into account the formatting like [[u;#fff;]some text], it split text so each string can be converted into html independently:

我有这个代码,将较长的行分成等长字符串的数组,保持单词,它还考虑了[[u; #fff;]某些文本]的格式,它分割文本,因此每个字符串可以独立转换为html:

var format_re = /\[\[([!gbiuso]*;[^;\]]*;[^;\]]*(?:;|[^\]()]*);?[^\]]*)\]([^\]]*\\\][^\]]*|[^\]]*|[^\[]*\[[^\]]*)\]?/gi;
var format_begin_re = /(\[\[[!gbiuso]*;[^;]*;[^\]]*\])/i;
var format_last_re = /\[\[[!gbiuso]*;[^;]*;[^\]]*\]?$/i;
$.terminal.split_equal = function(str, length, words) {
  var formatting = false;
  var in_text = false;
  var prev_format = '';
  var result = [];
  // add format text as 5th paramter to formatting it's used for
  // data attribute in format function
  var array = str.replace(format_re, function(_, format, text) {
    var semicolons = format.match(/;/g).length;
    // missing semicolons
    if (semicolons == 2) {
      semicolons = ';;';
    } else if (semicolons == 3) {
      semicolons = ';';
    } else {
      semicolons = '';
    }
    // return '[[' + format + ']' + text + ']';
    // closing braket will break formatting so we need to escape
    // those using html entity equvalent
    return '[[' + format + semicolons +
      text.replace(/\\\]/g, ']').replace(/\n/g, '\\n') + ']' +
      text + ']';
  }).split(/\n/g);
  for (var i = 0, len = array.length; i < len; ++i) {
    if (array[i] === '') {
      result.push('');
      continue;
    }
    var line = array[i];
    var first_index = 0;
    var count = 0;
    var space = -1;
    for (var j=0, jlen=line.length; j<jlen; ++j) {
      if (line[j] === '[' && line[j+1] === '[') {
        formatting = true;
      } else if (formatting && line[j] === ']') {
        if (in_text) {
          formatting = false;
          in_text = false;
        } else {
          in_text = true;
        }
      } else if ((formatting && in_text) || !formatting) {
        if (line[j] === '&') { // treat entity as one character
          var m = line.substring(j).match(/^(&[^;]+;)/);
          if (!m) {
            // should never happen if used by terminal,
            // because it always calls $.terminal.encode
            // before this function
            throw new Error("Unclosed html entity in line " +
                            (i+1) + ' at char ' + (j+1));
          }
          j+=m[1].length-2; // because continue adds 1 to j
          // if entity is at the end there is no next loop
          // issue #77
          if (j === jlen-1) {
            result.push(output + m[1]);
          }
          continue;
        } else if (line[j] === ']' && line[j-1] === '\\') {
          // escape \] counts as one character
          --count;
        } else {
          ++count;
        }
      }
      function is_space() {
        return line.substring(j-6, j) == '&nbsp;' ||
          line.substring(j-1, j) == ' ';
      }
      if (is_space() && ((formatting && in_text) || !formatting)) {
        space = j;
      }
      if ((count === length || j === jlen-1) &&
          ((formatting && in_text) || !formatting)) {
        var output;
        var after = line.substring(space, j+length+1);
        var text = $('<span>' + after + '</span>').text();
        var can_break = text.match(/\s/);
        if (words && space != -1 && j !== jlen-1 && can_break) {
          // get text to last space
          output = line.substring(first_index, space);
          j = space-1;
          space = -1;
        } else {
          output = line.substring(first_index, j+1);
        }
        if (words) {
          output = output.replace(/^(&nbsp;|\s)+|(&nbsp;|\s)+$/g, '');
        }
        first_index = j+1;
        count = 0;
        if (prev_format) {
          output = prev_format + output;
          if (output.match(']')) {
            prev_format = '';
          }
        }
        // Fix output if formatting not closed
        var matched = output.match(format_re);
        if (matched) {
          var last = matched[matched.length-1];
          if (last[last.length-1] !== ']') {
            prev_format = last.match(format_begin_re)[1];
            output += ']';
          } else if (output.match(format_last_re)) {
            var line_len = output.length;
            // why this line ???
            //var f_len = line_len-last[last.length-1].length;
            output = output.replace(format_last_re, '');
            prev_format = last.match(format_begin_re)[1];
          }
        }
        result.push(output);
      }
    }
  }
  return result;
};

It work almost right but some lines are shorter then it should like:

它工作得几乎正确,但有些线路比它应该更短:

is cracker.The term

in this FIDDLE, it work right when you strip formatting, checking checkbox. I work on this for couple of hours and have no clue why that line is shorter, any help will be very appreciated.

在这个FIDDLE中,当你去除格式化,检查复选框时,它可以正常工作。我在这工作了几个小时,并且不知道为什么这条线更短,任何帮助都将非常感激。

2 个解决方案

#1


4  

Here's how to fix the original code:

以下是修复原始代码的方法:

Add the following after line 40:

在第40行之后添加以下内容:

in_text = false;

The in_text flag is used by the code to determine if the current position is in regular text. However, it was not clearing the flag when it entered a region of formatting markup. This was the cause care of the main issue described in the question with the ultra-short line.

代码使用in_text标志来确定当前位置是否是常规文本。但是,当它进入格式化标记区域时,它没有清除标记。这是超短线问题中描述的主要问题的原因。

Change the if statement at line 76/77 to:

将第76/77行的if语句更改为:

if (is_space() && ((formatting && in_text) || !formatting || (line[j] === '[' && line[j+1] === '['))) {

This takes care of a lesser problem where line breaks were not happening on spaces between regular text and formatted text.

这解决了在常规文本和格式化文本之间的空格上没有发生换行的较小问题。

Working fiddle here: https://jsfiddle.net/2w10xp3m/1/

在这里工作小提琴:https://jsfiddle.net/2w10xp3m/1/

#2


4  

I think I've solved the problem using a much simpler approach. First break up all words, then re-assemble the lines while keeping track of the current format. See JsFiddle.

我想我已经用一种更简单的方法解决了这个问题。首先分解所有单词,然后重新组合行,同时跟踪当前格式。见JsFiddle。

JavaScript

$.terminal.split_equal = function(str, length, words) {
  var result = [],
    currentFormat = null,
    currentLine = '',
    currentLineLengthWithoutFormatting = 0;

  // 1. Split words on &nbsp;
  words = str.split(/&nbsp;/g);

  // 2. Re-assemble lines while keeping track of current formats
  words.forEach(function(word) {
    // Keep track of current format
    var format = word.match(/^\[\[([^\]]+)\]/g),
      wordWithFormatting, wordLength;
    if (format !== null && format[0]) {
      currentFormat = format[0];
      word = word.slice(format[0].length);
    }
    // Apply current format to each word separatly
    wordLength = word.length;
    wordWithFormatting = (currentFormat || '') + word;
    if (currentFormat) {
      if (word.indexOf(']') !== -1) {
        wordLength--;
        currentFormat = null;
      } else {
        wordWithFormatting += ']';
      }
    }
    // Assemble line
    if (currentLineLengthWithoutFormatting + wordLength <= length) {
      // Word still fits on current line
      if (currentLineLengthWithoutFormatting > 0) {
        currentLine += ' ';
        currentLineLengthWithoutFormatting++;
      }
    } else {
      // Need to start new line
      result.push(currentLine);
      currentLine = '';
      currentLineLengthWithoutFormatting = 0;
    }

    currentLine += wordWithFormatting;
    currentLineLengthWithoutFormatting += wordLength;
  });

  if (currentLineLengthWithoutFormatting > 0)
    result.push(currentLine);

  return result;
};

#1


4  

Here's how to fix the original code:

以下是修复原始代码的方法:

Add the following after line 40:

在第40行之后添加以下内容:

in_text = false;

The in_text flag is used by the code to determine if the current position is in regular text. However, it was not clearing the flag when it entered a region of formatting markup. This was the cause care of the main issue described in the question with the ultra-short line.

代码使用in_text标志来确定当前位置是否是常规文本。但是,当它进入格式化标记区域时,它没有清除标记。这是超短线问题中描述的主要问题的原因。

Change the if statement at line 76/77 to:

将第76/77行的if语句更改为:

if (is_space() && ((formatting && in_text) || !formatting || (line[j] === '[' && line[j+1] === '['))) {

This takes care of a lesser problem where line breaks were not happening on spaces between regular text and formatted text.

这解决了在常规文本和格式化文本之间的空格上没有发生换行的较小问题。

Working fiddle here: https://jsfiddle.net/2w10xp3m/1/

在这里工作小提琴:https://jsfiddle.net/2w10xp3m/1/

#2


4  

I think I've solved the problem using a much simpler approach. First break up all words, then re-assemble the lines while keeping track of the current format. See JsFiddle.

我想我已经用一种更简单的方法解决了这个问题。首先分解所有单词,然后重新组合行,同时跟踪当前格式。见JsFiddle。

JavaScript

$.terminal.split_equal = function(str, length, words) {
  var result = [],
    currentFormat = null,
    currentLine = '',
    currentLineLengthWithoutFormatting = 0;

  // 1. Split words on &nbsp;
  words = str.split(/&nbsp;/g);

  // 2. Re-assemble lines while keeping track of current formats
  words.forEach(function(word) {
    // Keep track of current format
    var format = word.match(/^\[\[([^\]]+)\]/g),
      wordWithFormatting, wordLength;
    if (format !== null && format[0]) {
      currentFormat = format[0];
      word = word.slice(format[0].length);
    }
    // Apply current format to each word separatly
    wordLength = word.length;
    wordWithFormatting = (currentFormat || '') + word;
    if (currentFormat) {
      if (word.indexOf(']') !== -1) {
        wordLength--;
        currentFormat = null;
      } else {
        wordWithFormatting += ']';
      }
    }
    // Assemble line
    if (currentLineLengthWithoutFormatting + wordLength <= length) {
      // Word still fits on current line
      if (currentLineLengthWithoutFormatting > 0) {
        currentLine += ' ';
        currentLineLengthWithoutFormatting++;
      }
    } else {
      // Need to start new line
      result.push(currentLine);
      currentLine = '';
      currentLineLengthWithoutFormatting = 0;
    }

    currentLine += wordWithFormatting;
    currentLineLengthWithoutFormatting += wordLength;
  });

  if (currentLineLengthWithoutFormatting > 0)
    result.push(currentLine);

  return result;
};