diff --git a/lib/rdoc/cross_reference.rb b/lib/rdoc/cross_reference.rb index 1f33538b73..2251f24c9b 100644 --- a/lib/rdoc/cross_reference.rb +++ b/lib/rdoc/cross_reference.rb @@ -1,7 +1,5 @@ # frozen_string_literal: true -require_relative 'markup/attribute_manager' # for PROTECT_ATTR - ## # RDoc::CrossReference is a reusable way to create cross references for names. @@ -33,7 +31,7 @@ class RDoc::CrossReference # See CLASS_REGEXP_STR METHOD_REGEXP_STR = /( - (?!\d)[\w#{RDoc::Markup::AttributeManager::PROTECT_ATTR}]+[!?=]?| + (?!\d)[\w]+[!?=]?| %|=(?:==?|~)|![=~]|\[\]=?|<(?:<|=>?)?|>[>=]?|[-+!]@?|\*\*?|[\/%\`|&^~] )#{METHOD_ARGS_REGEXP_STR}/.source.delete("\n ").freeze diff --git a/lib/rdoc/markup.rb b/lib/rdoc/markup.rb index 8307874461..379db63bdb 100644 --- a/lib/rdoc/markup.rb +++ b/lib/rdoc/markup.rb @@ -79,7 +79,7 @@ # # class WikiHtml < RDoc::Markup::ToHtml # def handle_regexp_WIKIWORD(target) -# "" + target.text + "" +# "" + target + "" # end # end # @@ -110,10 +110,10 @@ class RDoc::Markup - ## - # An AttributeManager which handles inline markup. + # Array of regexp handling pattern and its name. A regexp handling + # sequence is something like a WikiWord - attr_reader :attribute_manager + attr_reader :regexp_handlings ## # Parses +str+ into an RDoc::Markup::Document. @@ -148,27 +148,11 @@ def self.parse(str) # structure (paragraphs, lists, and so on). Invoke an event handler as we # identify significant chunks. - def initialize(attribute_manager = nil) - @attribute_manager = attribute_manager || RDoc::Markup::AttributeManager.new + def initialize + @regexp_handlings = [] @output = nil end - ## - # Add to the sequences used to add formatting to an individual word (such - # as *bold*). Matching entries will generate attributes that the output - # formatters can recognize by their +name+. - - def add_word_pair(start, stop, name) - @attribute_manager.add_word_pair(start, stop, name) - end - - ## - # Add to the sequences recognized as general markup. - - def add_html(tag, name) - @attribute_manager.add_html(tag, name) - end - ## # Add to other inline sequences. For example, we could add WikiWords using # something like: @@ -178,7 +162,7 @@ def add_html(tag, name) # Each wiki word will be presented to the output formatter. def add_regexp_handling(pattern, name) - @attribute_manager.add_regexp_handling(pattern, name) + @regexp_handlings << [pattern, name] end ## @@ -197,15 +181,9 @@ def convert(input, formatter) end autoload :Parser, "#{__dir__}/markup/parser" + autoload :InlineParser, "#{__dir__}/markup/inline_parser" autoload :PreProcess, "#{__dir__}/markup/pre_process" - # Inline markup classes - autoload :AttrChanger, "#{__dir__}/markup/attr_changer" - autoload :AttrSpan, "#{__dir__}/markup/attr_span" - autoload :Attributes, "#{__dir__}/markup/attributes" - autoload :AttributeManager, "#{__dir__}/markup/attribute_manager" - autoload :RegexpHandling, "#{__dir__}/markup/regexp_handling" - # RDoc::Markup AST autoload :BlankLine, "#{__dir__}/markup/blank_line" autoload :BlockQuote, "#{__dir__}/markup/block_quote" diff --git a/lib/rdoc/markup/attr_changer.rb b/lib/rdoc/markup/attr_changer.rb deleted file mode 100644 index e5ba470bb6..0000000000 --- a/lib/rdoc/markup/attr_changer.rb +++ /dev/null @@ -1,22 +0,0 @@ -# frozen_string_literal: true -class RDoc::Markup - - AttrChanger = Struct.new :turn_on, :turn_off # :nodoc: - -end - -## -# An AttrChanger records a change in attributes. It contains a bitmap of the -# attributes to turn on, and a bitmap of those to turn off. - -class RDoc::Markup::AttrChanger - - def to_s # :nodoc: - "Attr: +#{turn_on}/-#{turn_off}" - end - - def inspect # :nodoc: - '+%d/-%d' % [turn_on, turn_off] - end - -end diff --git a/lib/rdoc/markup/attr_span.rb b/lib/rdoc/markup/attr_span.rb deleted file mode 100644 index f1fabf1c3b..0000000000 --- a/lib/rdoc/markup/attr_span.rb +++ /dev/null @@ -1,35 +0,0 @@ -# frozen_string_literal: true -## -# An array of attributes which parallels the characters in a string. - -class RDoc::Markup::AttrSpan - - ## - # Creates a new AttrSpan for +length+ characters - - def initialize(length, exclusive) - @attrs = Array.new(length, 0) - @exclusive = exclusive - end - - ## - # Toggles +bits+ from +start+ to +length+ - def set_attrs(start, length, bits) - updated = false - for i in start ... (start+length) - if (@exclusive & @attrs[i]) == 0 || (@exclusive & bits) != 0 - @attrs[i] |= bits - updated = true - end - end - updated - end - - ## - # Accesses flags for character +n+ - - def [](n) - @attrs[n] - end - -end diff --git a/lib/rdoc/markup/attribute_manager.rb b/lib/rdoc/markup/attribute_manager.rb deleted file mode 100644 index 79df273c13..0000000000 --- a/lib/rdoc/markup/attribute_manager.rb +++ /dev/null @@ -1,432 +0,0 @@ -# frozen_string_literal: true - -## -# Manages changes of attributes in a block of text - -class RDoc::Markup::AttributeManager - unless ::MatchData.method_defined?(:match_length) - using ::Module.new { - refine(::MatchData) { - def match_length(nth) # :nodoc: - b, e = offset(nth) - e - b if b - end - } - } - end - - ## - # The NUL character - - NULL = "\000".freeze - - #-- - # We work by substituting non-printing characters in to the text. For now - # I'm assuming that I can substitute a character in the range 0..8 for a 7 - # bit character without damaging the encoded string, but this might be - # optimistic - #++ - - A_PROTECT = 004 # :nodoc: - - ## - # Special mask character to prevent inline markup handling - - PROTECT_ATTR = A_PROTECT.chr # :nodoc: - - ## - # The attributes enabled for this markup object. - - attr_reader :attributes - - ## - # This maps delimiters that occur around words (such as *bold* or +tt+) - # where the start and end delimiters and the same. This lets us optimize - # the regexp - - attr_reader :matching_word_pairs - - ## - # And this is used when the delimiters aren't the same. In this case the - # hash maps a pattern to the attribute character - - attr_reader :word_pair_map - - ## - # This maps HTML tags to the corresponding attribute char - - attr_reader :html_tags - - ## - # A \ in front of a character that would normally be processed turns off - # processing. We do this by turning \< into <#{PROTECT} - - attr_reader :protectable - - ## - # And this maps _regexp handling_ sequences to a name. A regexp handling - # sequence is something like a WikiWord - - attr_reader :regexp_handlings - - ## - # A bits of exclusive maps - attr_reader :exclusive_bitmap - - ## - # Creates a new attribute manager that understands bold, emphasized and - # teletype text. - - def initialize - @html_tags = {} - @matching_word_pairs = {} - @protectable = %w[<] - @regexp_handlings = [] - @word_pair_map = {} - @exclusive_bitmap = 0 - @attributes = RDoc::Markup::Attributes.new - - add_word_pair "*", "*", :BOLD, true - add_word_pair "_", "_", :EM, true - add_word_pair "+", "+", :TT, true - add_word_pair "`", "`", :TT, true - - add_html "em", :EM, true - add_html "i", :EM, true - add_html "b", :BOLD, true - add_html "tt", :TT, true - add_html "code", :TT, true - add_html "s", :STRIKE, true - add_html "del", :STRIKE, true - - @word_pair_chars = @matching_word_pairs.keys.join - - # Matches a word pair delimiter (*, _, +, `) that is NOT already protected. - # Used by #protect_code_markup to escape delimiters inside / tags. - @unprotected_word_pair_regexp = /([#{@word_pair_chars}])(?!#{PROTECT_ATTR})/ - end - - ## - # Return an attribute object with the given turn_on and turn_off bits set - - def attribute(turn_on, turn_off) - RDoc::Markup::AttrChanger.new turn_on, turn_off - end - - ## - # Changes the current attribute from +current+ to +new+ - - def change_attribute(current, new) - diff = current ^ new - attribute(new & diff, current & diff) - end - - ## - # Used by the tests to change attributes by name from +current_set+ to - # +new_set+ - - def changed_attribute_by_name(current_set, new_set) - current = new = 0 - current_set.each do |name| - current |= @attributes.bitmap_for(name) - end - - new_set.each do |name| - new |= @attributes.bitmap_for(name) - end - - change_attribute(current, new) - end - - ## - # Copies +start_pos+ to +end_pos+ from the current string - - def copy_string(start_pos, end_pos) - res = @str[start_pos...end_pos] - res.gsub!(/\000/, '') - res - end - - # :nodoc: - def exclusive?(attr) - (attr & @exclusive_bitmap) != 0 - end - - NON_PRINTING_START = "\1" # :nodoc: - NON_PRINTING_END = "\2" # :nodoc: - - ## - # Map attributes like textto the sequence - # \001\002\001\003, where is a per-attribute specific - # character - - def convert_attrs(str, attrs, exclusive = false) - convert_attrs_matching_word_pairs(str, attrs, exclusive) - convert_attrs_word_pair_map(str, attrs, exclusive) - end - - # :nodoc: - def convert_attrs_matching_word_pairs(str, attrs, exclusive) - # first do matching ones - tags = @matching_word_pairs.select { |start, bitmap| - exclusive == exclusive?(bitmap) - }.keys - return if tags.empty? - tags = "[#{tags.join("")}](?!#{PROTECT_ATTR})" - all_tags = "[#{@word_pair_chars}](?!#{PROTECT_ATTR})" - - re = /(?:^|\W|#{all_tags})\K(#{tags})(\1*[#\\]?[\w:#{PROTECT_ATTR}.\/\[\]-]+?\S?)\1(?!\1)(?=#{all_tags}|\W|$)/ - - 1 while str.gsub!(re) { |orig| - a, w = (m = $~).values_at(1, 2) - attr = @matching_word_pairs[a] - if attrs.set_attrs(m.begin(2), w.length, attr) - a = NULL * a.length - else - a = NON_PRINTING_START + a + NON_PRINTING_END - end - a + w + a - } - str.delete!(NON_PRINTING_START + NON_PRINTING_END) - end - - # :nodoc: - def convert_attrs_word_pair_map(str, attrs, exclusive) - # then non-matching - unless @word_pair_map.empty? then - @word_pair_map.each do |regexp, attr| - next unless exclusive == exclusive?(attr) - 1 while str.gsub!(regexp) { |orig| - w = (m = ($~))[2] - updated = attrs.set_attrs(m.begin(2), w.length, attr) - if updated - NULL * m.match_length(1) + w + NULL * m.match_length(3) - else - orig - end - } - end - end - end - - ## - # Converts HTML tags to RDoc attributes - - def convert_html(str, attrs, exclusive = false) - tags = @html_tags.select { |start, bitmap| - exclusive == exclusive?(bitmap) - }.keys.join '|' - - 1 while str.gsub!(/<(#{tags})>(.*?)<\/\1>/i) { |orig| - attr = @html_tags[$1.downcase] - html_length = $~.match_length(1) + 2 # "<>".length - seq = NULL * html_length - attrs.set_attrs($~.begin(2), $~.match_length(2), attr) - seq + $2 + seq + NULL - } - end - - ## - # Converts regexp handling sequences to RDoc attributes - - def convert_regexp_handlings(str, attrs, exclusive = false) - @regexp_handlings.each do |regexp, attribute| - next unless exclusive == exclusive?(attribute) - str.scan(regexp) do - capture = $~.size == 1 ? 0 : 1 - - s, e = $~.offset capture - - attrs.set_attrs s, e - s, attribute | @attributes.regexp_handling - end - end - end - - ## - # Escapes regexp handling sequences of text to prevent conversion to RDoc - - def mask_protected_sequences - # protect __send__, __FILE__, etc. - @str.gsub!(/__([a-z]+)__/i, - "_#{PROTECT_ATTR}_#{PROTECT_ATTR}\\1_#{PROTECT_ATTR}_#{PROTECT_ATTR}") - @str.gsub!(/(\A|[^\\])\\([#{Regexp.escape @protectable.join}])/m, - "\\1\\2#{PROTECT_ATTR}") - @str.gsub!(/\\(\\[#{Regexp.escape @protectable.join}])/m, "\\1") - end - - ## - # Protects word pair delimiters (*, _, +) inside - # and tags from being processed as inline formatting. - # For example, *bold* in +*bold*+ will NOT be rendered as bold. - - def protect_code_markup - @str.gsub!(/<(code|tt)>(.*?)<\/\1>/im) do - tag = $1 - content = $2 - # Protect word pair delimiters (*, _, +) from being processed - escaped = content.gsub(@unprotected_word_pair_regexp, "\\1#{PROTECT_ATTR}") - # Protect HTML-like tags from being processed (e.g., inside code) - escaped = escaped.gsub(/<(?!#{PROTECT_ATTR})/, "<#{PROTECT_ATTR}") - "<#{tag}>#{escaped}" - end - end - - ## - # Unescapes regexp handling sequences of text - - def unmask_protected_sequences - @str.gsub!(/(.)#{PROTECT_ATTR}/, "\\1\000") - end - - ## - # Adds a markup class with +name+ for words wrapped in the +start+ and - # +stop+ character. To make words wrapped with "*" bold: - # - # am.add_word_pair '*', '*', :BOLD - - def add_word_pair(start, stop, name, exclusive = false) - raise ArgumentError, "Word flags may not start with '<'" if - start[0, 1] == '<' - - bitmap = @attributes.bitmap_for name - - if start == stop then - @matching_word_pairs[start] = bitmap - else - pattern = /(#{Regexp.escape start})(\S+)(#{Regexp.escape stop})/ - @word_pair_map[pattern] = bitmap - end - - @protectable << start[0, 1] - @protectable.uniq! - - @exclusive_bitmap |= bitmap if exclusive - end - - ## - # Adds a markup class with +name+ for words surrounded by HTML tag +tag+. - # To process emphasis tags: - # - # am.add_html 'em', :EM - - def add_html(tag, name, exclusive = false) - bitmap = @attributes.bitmap_for name - @html_tags[tag.downcase] = bitmap - @exclusive_bitmap |= bitmap if exclusive - end - - ## - # Adds a regexp handling for +pattern+ with +name+. A simple URL handler - # would be: - # - # @am.add_regexp_handling(/((https?:)\S+\w)/, :HYPERLINK) - - def add_regexp_handling(pattern, name, exclusive = false) - bitmap = @attributes.bitmap_for(name) - @regexp_handlings << [pattern, bitmap] - @exclusive_bitmap |= bitmap if exclusive - end - - ## - # Processes +str+ converting attributes, HTML and regexp handlings - - def flow(str) - @str = str.dup - - mask_protected_sequences - protect_code_markup - - @attrs = RDoc::Markup::AttrSpan.new @str.length, @exclusive_bitmap - - convert_attrs @str, @attrs, true - convert_html @str, @attrs, true - convert_regexp_handlings @str, @attrs, true - convert_attrs @str, @attrs - convert_html @str, @attrs - convert_regexp_handlings @str, @attrs - - unmask_protected_sequences - - split_into_flow - end - - ## - # Debug method that prints a string along with its attributes - - def display_attributes - puts - puts @str.tr(NULL, "!") - bit = 1 - 16.times do |bno| - line = "" - @str.length.times do |i| - if (@attrs[i] & bit) == 0 - line << " " - else - if bno.zero? - line << "S" - else - line << ("%d" % (bno+1)) - end - end - end - puts(line) unless line =~ /^ *$/ - bit <<= 1 - end - end - - ## - # Splits the string into chunks by attribute change - - def split_into_flow - res = [] - current_attr = 0 - - str_len = @str.length - - # skip leading invisible text - i = 0 - i += 1 while i < str_len and @str[i].chr == "\0" - start_pos = i - - # then scan the string, chunking it on attribute changes - while i < str_len - new_attr = @attrs[i] - if new_attr != current_attr - if i > start_pos - res << copy_string(start_pos, i) - start_pos = i - end - - res << change_attribute(current_attr, new_attr) - current_attr = new_attr - - if (current_attr & @attributes.regexp_handling) != 0 then - i += 1 while - i < str_len and (@attrs[i] & @attributes.regexp_handling) != 0 - - res << RDoc::Markup::RegexpHandling.new(current_attr, - copy_string(start_pos, i)) - start_pos = i - next - end - end - - # move on, skipping any invisible characters - begin - i += 1 - end while i < str_len and @str[i].chr == "\0" - end - - # tidy up trailing text - if start_pos < str_len - res << copy_string(start_pos, str_len) - end - - # and reset to all attributes off - res << change_attribute(current_attr, 0) if current_attr != 0 - - res - end - -end diff --git a/lib/rdoc/markup/attributes.rb b/lib/rdoc/markup/attributes.rb deleted file mode 100644 index fcdc61fd85..0000000000 --- a/lib/rdoc/markup/attributes.rb +++ /dev/null @@ -1,70 +0,0 @@ -# frozen_string_literal: true -## -# We manage a set of attributes. Each attribute has a symbol name and a bit -# value. - -class RDoc::Markup::Attributes - - ## - # The regexp handling attribute type. See RDoc::Markup#add_regexp_handling - - attr_reader :regexp_handling - - ## - # Creates a new attributes set. - - def initialize - @regexp_handling = 1 - - @name_to_bitmap = [ - [:_REGEXP_HANDLING_, @regexp_handling], - ] - - @next_bitmap = @regexp_handling << 1 - end - - ## - # Returns a unique bit for +name+ - - def bitmap_for(name) - bitmap = @name_to_bitmap.assoc name - - unless bitmap then - bitmap = @next_bitmap - @next_bitmap <<= 1 - @name_to_bitmap << [name, bitmap] - else - bitmap = bitmap.last - end - - bitmap - end - - ## - # Returns a string representation of +bitmap+ - - def as_string(bitmap) - return 'none' if bitmap.zero? - res = [] - - @name_to_bitmap.each do |name, bit| - res << name if (bitmap & bit) != 0 - end - - res.join ',' - end - - ## - # yields each attribute name in +bitmap+ - - def each_name_of(bitmap) - return enum_for __method__, bitmap unless block_given? - - @name_to_bitmap.each do |name, bit| - next if bit == @regexp_handling - - yield name.to_s if (bitmap & bit) != 0 - end - end - -end diff --git a/lib/rdoc/markup/formatter.rb b/lib/rdoc/markup/formatter.rb index 8c55a37b06..cb3b9b823e 100644 --- a/lib/rdoc/markup/formatter.rb +++ b/lib/rdoc/markup/formatter.rb @@ -10,6 +10,8 @@ # RDoc::Markup::FormatterTestCase. If you're writing a text-output formatter # use RDoc::Markup::TextFormatterTestCase which provides extra test cases. +require 'rdoc/markup/inline_parser' + class RDoc::Markup::Formatter ## @@ -18,6 +20,7 @@ class RDoc::Markup::Formatter InlineTag = Struct.new(:bit, :on, :off) + ## # Converts a target url to one that is relative to a given path @@ -49,17 +52,7 @@ def initialize(options, markup = nil) @options = options @markup = markup || RDoc::Markup.new - @am = @markup.attribute_manager - @am.add_regexp_handling(/
/, :HARD_BREAK) - - @attributes = @am.attributes - - @attr_tags = [] - - @in_tt = 0 - @tt_bit = @attributes.bitmap_for :TT - @hard_break = '' @from_path = '.' end @@ -84,29 +77,6 @@ def add_regexp_handling_RDOCLINK @markup.add_regexp_handling(/rdoc-[a-z]+:[^\s\]]+/, :RDOCLINK) end - ## - # Adds a regexp handling for links of the form {}[] and - # [] - - def add_regexp_handling_TIDYLINK - @markup.add_regexp_handling(/(?: - \{[^{}]*\} | # multi-word label - \b[^\s{}]+? # single-word label - ) - - \[\S+?\] # link target - /x, :TIDYLINK) - end - - ## - # Add a new set of tags for an attribute. We allow separate start and end - # tags for flexibility - - def add_tag(name, start, stop) - attr = @attributes.bitmap_for name - @attr_tags << InlineTag.new(attr, start, stop) - end - ## # Allows +tag+ to be decorated with additional information. @@ -121,117 +91,170 @@ def convert(content) @markup.convert content, self end - ## - # Converts flow items +flow+ - - def convert_flow(flow) - res = [] - - flow.each do |item| - case item - when String then - res << convert_string(item) - when RDoc::Markup::AttrChanger then - off_tags res, item - on_tags res, item - when RDoc::Markup::RegexpHandling then - res << convert_regexp_handling(item) + # Applies regexp handling to +text+ and returns an array of [text, converted?] pairs. + + def apply_regexp_handling(text) + output = [] + start = 0 + loop do + pos = text.size + matched_name = matched_text = nil + @markup.regexp_handlings.each do |pattern, name| + m = text.match(pattern, start) + next unless m + idx = m[1] ? 1 : 0 + if m.begin(idx) < pos + pos = m.begin(idx) + matched_text = m[idx] + matched_name = name + end + end + output << [text[start...pos], false] if pos > start + if matched_name + handled = public_send(:"handle_regexp_#{matched_name}", matched_text) + output << [handled, true] + start = pos + matched_text.size else - raise "Unknown flow element: #{item.inspect}" + start = pos end + break if pos == text.size end - - res.join + output end - ## - # Converts added regexp handlings. See RDoc::Markup#add_regexp_handling + # Called when processing plain text while traversing inline nodes from handle_inline. + # +text+ may need proper escaping. - def convert_regexp_handling(target) - return target.text if in_tt? + def handle_PLAIN_TEXT(text) + end - handled = false + # Called when processing regexp-handling-processed text while traversing inline nodes from handle_inline. + # +text+ may contain markup tags. - @attributes.each_name_of target.type do |name| - method_name = "handle_regexp_#{name}" + def handle_REGEXP_HANDLING_TEXT(text) + end + + # Called when processing text node while traversing inline nodes from handle_inline. + # Apply regexp handling and dispatch to the appropriate handler: handle_REGEXP_HANDLING_TEXT or handle_PLAIN_TEXT. - if respond_to? method_name then - target.text = public_send method_name, target - handled = true + def handle_TEXT(text) + apply_regexp_handling(text).each do |part, converted| + if converted + handle_REGEXP_HANDLING_TEXT(part) + else + handle_PLAIN_TEXT(part) end end + end - unless handled then - target_name = @attributes.as_string target.type + # Called when processing a hard break while traversing inline nodes from handle_inline. - raise RDoc::Error, "Unhandled regexp handling #{target_name}: #{target}" - end + def handle_HARD_BREAK + end - target.text + # Called when processing bold nodes while traversing inline nodes from handle_inline. + # Traverse the children nodes and dispatch to the appropriate handlers. + + def handle_BOLD(nodes) + traverse_inline_nodes(nodes) end - ## - # Converts a string to be fancier if desired + # Called when processing emphasis nodes while traversing inline nodes from handle_inline. + # Traverse the children nodes and dispatch to the appropriate handlers. - def convert_string(string) - string + def handle_EM(nodes) + traverse_inline_nodes(nodes) end - ## - # Use ignore in your subclass to ignore the content of a node. - # - # ## - # # We don't support raw nodes in ToNoRaw - # - # alias accept_raw ignore + # Called when processing bold word nodes while traversing inline nodes from handle_inline. + # +word+ may need proper escaping. - def ignore *node + def handle_BOLD_WORD(word) + handle_PLAIN_TEXT(word) end - ## - # Are we currently inside tt tags? + # Called when processing emphasis word nodes while traversing inline nodes from handle_inline. + # +word+ may need proper escaping. - def in_tt? - @in_tt > 0 + def handle_EM_WORD(word) + handle_PLAIN_TEXT(word) end - def tt_tag?(attr_mask, reverse = false) - each_attr_tag(attr_mask, reverse) do |tag| - return true if tt? tag - end - false + # Called when processing tt nodes while traversing inline nodes from handle_inline. + # +code+ may need proper escaping. + + def handle_TT(code) + handle_PLAIN_TEXT(code) end - ## - # Turns on tags for +item+ on +res+ + # Called when processing strike nodes while traversing inline nodes from handle_inline. + # Traverse the children nodes and dispatch to the appropriate handlers. - def on_tags(res, item) - each_attr_tag(item.turn_on) do |tag| - res << annotate(tag.on) - @in_tt += 1 if tt? tag - end + def handle_STRIKE(nodes) + traverse_inline_nodes(nodes) end - ## - # Turns off tags for +item+ on +res+ + # Called when processing tidylink nodes while traversing inline nodes from handle_inline. + # +label_part+ is an array of strings or nodes representing the link label. + # +url+ is the link URL. + # Traverse the label_part nodes and dispatch to the appropriate handlers. - def off_tags(res, item) - each_attr_tag(item.turn_off, true) do |tag| - @in_tt -= 1 if tt? tag - res << annotate(tag.off) - end + def handle_TIDYLINK(label_part, url) + traverse_inline_nodes(label_part) end - def each_attr_tag(attr_mask, reverse = false) - return if attr_mask.zero? + # Parses inline +text+, traverse the resulting nodes, and calls the appropriate handler methods. + + def handle_inline(text) + nodes = RDoc::Markup::InlineParser.new(text).parse + traverse_inline_nodes(nodes) + end - @attr_tags.public_send(reverse ? :reverse_each : :each) do |tag| - if attr_mask & tag.bit != 0 then - yield tag + # Traverses +nodes+ and calls the appropriate handler methods + # Nodes formats are described in RDoc::Markup::InlineParser#parse + + def traverse_inline_nodes(nodes) + nodes.each do |node| + next handle_TEXT(node) if String === node + case node[:type] + when :TIDYLINK + handle_TIDYLINK(node[:children], node[:url]) + when :HARD_BREAK + handle_HARD_BREAK + when :BOLD + handle_BOLD(node[:children]) + when :BOLD_WORD + handle_BOLD_WORD(node[:children][0] || '') + when :EM + handle_EM(node[:children]) + when :EM_WORD + handle_EM_WORD(node[:children][0] || '') + when :TT + handle_TT(node[:children][0] || '') + when :STRIKE + handle_STRIKE(node[:children]) end end end + ## + # Converts a string to be fancier if desired + + def convert_string(string) + string + end + + ## + # Use ignore in your subclass to ignore the content of a node. + # + # ## + # # We don't support raw nodes in ToNoRaw + # + # alias accept_raw ignore + + def ignore *node + end + ## # Extracts and a scheme, url and an anchor id from +url+ and returns them. diff --git a/lib/rdoc/markup/heading.rb b/lib/rdoc/markup/heading.rb index 36f3603de4..3936d836c8 100644 --- a/lib/rdoc/markup/heading.rb +++ b/lib/rdoc/markup/heading.rb @@ -57,8 +57,8 @@ def self.to_html to_html = Markup::ToHtml.new nil - def to_html.handle_regexp_CROSSREF(target) - target.text.sub(/^\\/, '') + def to_html.handle_regexp_CROSSREF(text) + text.sub(/^\\/, '') end to_html diff --git a/lib/rdoc/markup/inline_parser.rb b/lib/rdoc/markup/inline_parser.rb new file mode 100644 index 0000000000..6bbd15e7e2 --- /dev/null +++ b/lib/rdoc/markup/inline_parser.rb @@ -0,0 +1,311 @@ +# frozen_string_literal: true + +require 'set' +require 'strscan' + +# Parses inline markup in RDoc text. +# This parser handles em, bold, strike, tt, hard break, and tidylink. +# Block-level constructs are handled in RDoc::Markup::Parser. + +class RDoc::Markup::InlineParser + + # TT, BOLD_WORD, EM_WORD: regexp-handling(example: crossref) is disabled + WORD_PAIRS = { + '*' => :BOLD_WORD, + '**' => :BOLD_WORD, + '_' => :EM_WORD, + '__' => :EM_WORD, + '+' => :TT, + '++' => :TT, + '`' => :TT, + '``' => :TT + } # :nodoc: + + # Other types: regexp-handling(example: crossref) is enabled + TAGS = { + 'em' => :EM, + 'i' => :EM, + 'b' => :BOLD, + 's' => :STRIKE, + 'del' => :STRIKE, + } # :nodoc: + + STANDALONE_TAGS = { 'br' => :HARD_BREAK } # :nodoc: + + CODEBLOCK_TAGS = %w[tt code] # :nodoc: + + TOKENS = { + **WORD_PAIRS.transform_values { [:word_pair, nil] }, + **TAGS.keys.to_h {|tag| ["<#{tag}>", [:open_tag, tag]] }, + **TAGS.keys.to_h {|tag| ["", [:close_tag, tag]] }, + **CODEBLOCK_TAGS.to_h {|tag| ["<#{tag}>", [:code_start, tag]] }, + **STANDALONE_TAGS.keys.to_h {|tag| ["<#{tag}>", [:standalone_tag, tag]] }, + '{' => [:tidylink_start, nil], + '}' => [:tidylink_mid, nil], + '\\' => [:escape, nil], + '[' => nil # To make `label[url]` scan as separate tokens + } # :nodoc: + + multi_char_tokens_regexp = Regexp.union(TOKENS.keys.select {|s| s.size > 1 }).source + token_starts_regexp = TOKENS.keys.map {|s| s[0] }.uniq.map {|s| Regexp.escape(s) }.join + + SCANNER_REGEXP = + /(?: + #{multi_char_tokens_regexp} + |[^#{token_starts_regexp}\sa-zA-Z0-9\.]+ # chunk of normal text + |\s+|[a-zA-Z0-9\.]+|. + )/x # :nodoc: + + # Characters that can be escaped with backslash. + ESCAPING_CHARS = '\\*_+`{}[]<>' # :nodoc: + + # Pattern to match code block content until
or
. + CODEBLOCK_REGEXPS = CODEBLOCK_TAGS.to_h {|name| [name, /((?:\\.|[^\\])*?)<\/#{name}>/] } # :nodoc: + + # Word contains alphanumeric and _./:[]- characters. + # Word may start with # and may end with any non-space character. (e.g. #eql?). + # Underscore delimiter have special rules. + WORD_REGEXPS = { + # Words including _, longest match. + # Example: `_::A_` `_-42_` `_A::B::C.foo_bar[baz]_` `_kwarg:_` + # Content must not include _ followed by non-alphanumeric character + # Example: `_host_:_port_` will be `_host_` + `:` + `_port_` + '_' => /#?([a-zA-Z0-9.\/:\[\]-]|_+[a-zA-Z0-9])+[^\s]?_(?=[^a-zA-Z0-9_]|\z)/, + # Words allowing _ but not allowing __ + '__' => /#?[a-zA-Z0-9.\/:\[\]-]*(_[a-zA-Z0-9.\/:\[\]-]+)*[^\s]?__(?=[^a-zA-Z0-9]|\z)/, + **%w[* ** + ++ ` ``].to_h do |s| + # normal words that can be used within +word+ or *word* + [s, /#?[a-zA-Z0-9_.\/:\[\]-]+[^\s]?#{Regexp.escape(s)}(?=[^a-zA-Z0-9]|\z)/] + end + } # :nodoc: + + def initialize(string) + @scanner = StringScanner.new(string) + @last_match = nil + @scanner_negative_cache = Set.new + @stack = [] + @delimiters = {} + end + + # Return the current parsing node on @stack. + + def current + @stack.last + end + + # Parse and return an array of nodes. + # Node format: + # { + # type: :EM | :BOLD | :BOLD_WORD | :EM_WORD | :TT | :STRIKE | :HARD_BREAK | :TIDYLINK, + # url: string # only for :TIDYLINK + # children: [string_or_node, ...] + # } + + def parse + stack_push(:root, nil) + while true + type, token, value = scan_token + close = nil + tidylink_url = nil + case type + when :node + current[:children] << value + invalidate_open_tidylinks if value[:type] == :TIDYLINK + when :eof + close = :root + when :tidylink_open + stack_push(:tidylink, token) + when :tidylink_close + close = :tidylink + if value + tidylink_url = value + else + # Tidylink closing brace without URL part. Treat opening and closing braces as normal text + # `{labelnodes}...` case. + current[:children] << token + end + when :invalidated_tidylink_close + # `{...{label}[url]...}` case. Nested tidylink invalidates outer one. The last `}` closes the invalidated tidylink. + current[:children] << token + close = :invalidated_tidylink + when :text + current[:children] << token + when :open + stack_push(value, token) + when :close + if @delimiters[value] + close = value + else + # closing tag without matching opening tag. Treat as normal text. + current[:children] << token + end + end + + next unless close + + while current[:delimiter] != close + children = current[:children] + open_token = current[:token] + stack_pop + current[:children] << open_token if open_token + current[:children].concat(children) + end + + token = current[:token] + children = compact_string(current[:children]) + stack_pop + + return children if close == :root + + if close == :tidylink || close == :invalidated_tidylink + if tidylink_url + current[:children] << { type: :TIDYLINK, children: children, url: tidylink_url } + invalidate_open_tidylinks + else + current[:children] << token + current[:children].concat(children) + end + else + current[:children] << { type: TAGS[close], children: children } + end + end + end + + private + + # When a valid tidylink node is encountered, invalidate all nested tidylinks. + + def invalidate_open_tidylinks + return unless @delimiters[:tidylink] + + @delimiters[:invalidated_tidylink] ||= [] + @delimiters[:tidylink].each do |idx| + @delimiters[:invalidated_tidylink] << idx + @stack[idx][:delimiter] = :invalidated_tidylink + end + @delimiters.delete(:tidylink) + end + + # Pop the top node off the stack when node is closed by a closing delimiter or an error. + + def stack_pop + delimiter = current[:delimiter] + @delimiters[delimiter].pop + @delimiters.delete(delimiter) if @delimiters[delimiter].empty? + @stack.pop + end + + # Push a new node onto the stack when encountering an opening delimiter. + + def stack_push(delimiter, token) + node = { delimiter: delimiter, token: token, children: [] } + (@delimiters[delimiter] ||= []) << @stack.size + @stack << node + end + + # Compacts adjacent strings in +nodes+ into a single string. + + def compact_string(nodes) + nodes.chunk {|e| String === e }.flat_map do |is_str, elems| + is_str ? elems.join : elems + end + end + + # Scan from StringScanner with +pattern+ + # If +negative_cache+ is true, caches scan failure result. scan(pattern, negative_cache: true) return nil when it is called again after a failure. + # Be careful to use +negative_cache+ with a pattern and position that does not match after previous failure. + + def strscan(pattern, negative_cache: false) + return if negative_cache && @scanner_negative_cache.include?(pattern) + + string = @scanner.scan(pattern) + @last_match = string if string + @scanner_negative_cache << pattern if !string && negative_cache + string + end + + # Scan and return the next token for parsing. + # Returns [token_type, token_string_or_nil, extra_info] + + def scan_token + last_match = @last_match + token = strscan(SCANNER_REGEXP) + type, name = TOKENS[token] + + case type + when :word_pair + # If the character before word pair delimiter is alphanumeric, do not treat as word pair. + word_pair = strscan(WORD_REGEXPS[token]) unless /[a-zA-Z0-9]\z/.match?(last_match) + + if word_pair.nil? + [:text, token, nil] + elsif token == '__' && word_pair.match?(/\A[a-zA-Z]+__\z/) + # Special exception: __FILE__, __LINE__, __send__ should be treated as normal text. + [:text, "#{token}#{word_pair}", nil] + else + [:node, nil, { type: WORD_PAIRS[token], children: [word_pair.delete_suffix(token)] }] + end + when :open_tag + [:open, token, name] + when :close_tag + [:close, token, name] + when :code_start + if (codeblock = strscan(CODEBLOCK_REGEXPS[name], negative_cache: true)) + # Need to unescape `\\` and `\<`. + # RDoc also unescapes backslash + word separators, but this is not really necessary. + content = codeblock.delete_suffix("").gsub(/\\(.)/) { '\\<*+_`'.include?($1) ? $1 : $& } + [:node, nil, { type: :TT, children: content.empty? ? [] : [content] }] + else + [:text, token, nil] + end + when :standalone_tag + [:node, nil, { type: STANDALONE_TAGS[name], children: [] }] + when :tidylink_start + [:tidylink_open, token, nil] + when :tidylink_mid + if @delimiters[:tidylink] + if (url = read_tidylink_url) + [:tidylink_close, nil, url] + else + [:tidylink_close, token, nil] + end + elsif @delimiters[:invalidated_tidylink] + [:invalidated_tidylink_close, token, nil] + else + [:text, token, nil] + end + when :escape + next_char = strscan(/./) + if next_char.nil? + # backslash at end of string + [:text, '\\', nil] + elsif next_char && ESCAPING_CHARS.include?(next_char) + # escaped character + [:text, next_char, nil] + else + # If next_char not an escaping character, it is treated as text token with backslash + next_char + # For example, backslash of `\Ruby` (suppressed crossref) remains. + [:text, "\\#{next_char}", nil] + end + else + if token.nil? + [:eof, nil, nil] + elsif token.match?(/\A[A-Za-z0-9]*\z/) && (url = read_tidylink_url) + # Simplified tidylink: label[url] + [:node, nil, { type: :TIDYLINK, children: [token], url: url }] + else + [:text, token, nil] + end + end + end + + # Read the URL part of a tidylink from the current position. + # Returns nil if no valid URL part is found. + # URL part is enclosed in square brackets and may contain escaped brackets. + # Example: [http://example.com/?q=\[\]] represents http://example.com/?q=[]. + + def read_tidylink_url + bracketed_url = strscan(/\[([^\s\[\]\\]|\\[\[\]\\])+\]/) + bracketed_url[1...-1].gsub(/\\(.)/, '\1') if bracketed_url + end +end diff --git a/lib/rdoc/markup/parser.rb b/lib/rdoc/markup/parser.rb index 5f696ddb3e..95d08b7ec4 100644 --- a/lib/rdoc/markup/parser.rb +++ b/lib/rdoc/markup/parser.rb @@ -11,7 +11,7 @@ # The parser only handles the block-level constructs Paragraph, List, # ListItem, Heading, Verbatim, BlankLine, Rule and BlockQuote. # Inline markup such as \+blah\+ is handled separately by -# RDoc::Markup::AttributeManager. +# RDoc::Markup::InlineParser. # # To see what markup the Parser implements read RDoc. To see how to use # RDoc markup to format text in your program read RDoc::Markup. diff --git a/lib/rdoc/markup/regexp_handling.rb b/lib/rdoc/markup/regexp_handling.rb deleted file mode 100644 index c471fe73c7..0000000000 --- a/lib/rdoc/markup/regexp_handling.rb +++ /dev/null @@ -1,40 +0,0 @@ -# frozen_string_literal: true -## -# Hold details of a regexp handling sequence - -class RDoc::Markup::RegexpHandling - - ## - # Regexp handling type - - attr_reader :type - - ## - # Regexp handling text - - attr_accessor :text - - ## - # Creates a new regexp handling sequence of +type+ with +text+ - - def initialize(type, text) - @type, @text = type, text - end - - ## - # Regexp handlings are equal when the have the same text and type - - def ==(o) - self.text == o.text && self.type == o.type - end - - def inspect # :nodoc: - "#" % [ - object_id, @type, text.dump] - end - - def to_s # :nodoc: - "RegexpHandling: type=#{type} text=#{text.dump}" - end - -end diff --git a/lib/rdoc/markup/to_ansi.rb b/lib/rdoc/markup/to_ansi.rb index 1f25638916..b3556a0744 100644 --- a/lib/rdoc/markup/to_ansi.rb +++ b/lib/rdoc/markup/to_ansi.rb @@ -19,10 +19,57 @@ def initialize(markup = nil) ## # Maps attributes to ANSI sequences - def init_tags - add_tag :BOLD, "\e[1m", "\e[m" - add_tag :TT, "\e[7m", "\e[m" - add_tag :EM, "\e[4m", "\e[m" + ANSI_STYLE_CODES_ON = { + BOLD: 1, + TT: 7, + EM: 4, + STRIKE: 9 + } + + ANSI_STYLE_CODES_OFF = { + BOLD: 22, + TT: 27, + EM: 24, + STRIKE: 29 + } + + # Apply the given attributes by emitting ANSI sequences. + # Emitting attribute changes are deferred until new text is added and applied in batch. + # This method computes the necessary ANSI codes to transition from the + # current set of applied attributes to the new set of +attributes+. + + def apply_attributes(attributes) + before = @applied_attributes + after = attributes.sort + return if before == after + + if after.empty? + emit_inline("\e[m") + elsif !before.empty? && before.size > (before & after).size + 1 + codes = after.map {|attr| ANSI_STYLE_CODES_ON[attr] }.compact + emit_inline("\e[#{[0, *codes].join(';')}m") + else + off_codes = (before - after).map {|attr| ANSI_STYLE_CODES_OFF[attr] }.compact + on_codes = (after - before).map {|attr| ANSI_STYLE_CODES_ON[attr] }.compact + emit_inline("\e[#{(off_codes + on_codes).join(';')}m") + end + @applied_attributes = attributes + end + + def add_text(text) + attrs = @attributes.keys + if @applied_attributes != attrs + apply_attributes(attrs) + end + emit_inline(text) + end + + def handle_inline(text) + @applied_attributes = [] + res = super + res << "\e[m" unless @applied_attributes.empty? + @applied_attributes = [] + res end ## diff --git a/lib/rdoc/markup/to_bs.rb b/lib/rdoc/markup/to_bs.rb index e6c8a48217..f047e30803 100644 --- a/lib/rdoc/markup/to_bs.rb +++ b/lib/rdoc/markup/to_bs.rb @@ -17,14 +17,29 @@ def initialize(markup = nil) @in_em = false end - ## - # Sets a flag that is picked up by #annotate to do the right thing in - # #convert_string + def handle_inline(text) + initial_style = [] + initial_style << :BOLD if @in_b + initial_style << :EM if @in_em + super(text, initial_style) + end - def init_tags - add_tag :BOLD, '+b', '-b' - add_tag :EM, '+_', '-_' - add_tag :TT, '', '' # we need in_tt information maintained + def add_text(text) + attrs = @attributes.keys + if attrs.include? :BOLD + styled = +'' + text.chars.each do |c| + styled << "#{c}\b#{c}" + end + text = styled + elsif attrs.include? :EM + styled = +'' + text.chars.each do |c| + styled << "_\b#{c}" + end + text = styled + end + emit_inline(text) end ## @@ -68,39 +83,4 @@ def accept_list_item_start(list_item) def calculate_text_width(text) text.gsub(/_\x08/, '').gsub(/\x08./, '').size end - - ## - # Turns on or off regexp handling for +convert_string+ - - def annotate(tag) - case tag - when '+b' then @in_b = true - when '-b' then @in_b = false - when '+_' then @in_em = true - when '-_' then @in_em = false - end - '' - end - - ## - # Calls convert_string on the result of convert_regexp_handling - - def convert_regexp_handling(target) - convert_string super - end - - ## - # Adds bold or underline mixed with backspaces - - def convert_string(string) - return string unless @in_b or @in_em - chars = if @in_b then - string.chars.map do |char| "#{char}\b#{char}" end - elsif @in_em then - string.chars.map do |char| "_\b#{char}" end - end - - chars.join - end - end diff --git a/lib/rdoc/markup/to_html.rb b/lib/rdoc/markup/to_html.rb index e496c8c74e..2924b89b94 100644 --- a/lib/rdoc/markup/to_html.rb +++ b/lib/rdoc/markup/to_html.rb @@ -51,11 +51,10 @@ def initialize(options, markup = nil) @in_list_entry = nil @list = nil @th = nil + @in_tidylink_label = false @hard_break = "
\n" init_regexp_handlings - - init_tags end # :section: Regexp Handling @@ -72,6 +71,10 @@ def init_regexp_handlings # external links @markup.add_regexp_handling(/(?:link:|https?:|mailto:|ftp:|irc:|www\.)#{URL_CHARACTERS_REGEXP_STR}+\w/, :HYPERLINK) + + # suppress crossref: \#method \::method \ClassName \method_with_underscores + @markup.add_regexp_handling(/\\(?:[#:A-Z]|[a-z]+_[a-z0-9])/, :SUPPRESSED_CROSSREF) + init_link_notation_regexp_handlings end @@ -80,7 +83,6 @@ def init_regexp_handlings def init_link_notation_regexp_handlings add_regexp_handling_RDOCLINK - add_regexp_handling_TIDYLINK end def handle_RDOCLINK(url) # :nodoc: @@ -88,6 +90,7 @@ def handle_RDOCLINK(url) # :nodoc: when /^rdoc-ref:/ CGI.escapeHTML($') when /^rdoc-label:/ + return CGI.escapeHTML(url) if in_tidylink_label? text = $' text = case text @@ -113,11 +116,127 @@ def handle_RDOCLINK(url) # :nodoc: end end - ## - # +target+ is a
+ def handle_PLAIN_TEXT(text) + emit_inline(convert_string(text)) + end + + def handle_REGEXP_HANDLING_TEXT(text) + emit_inline(text) + end + + def handle_BOLD(nodes) + emit_inline('') + super + emit_inline('') + end + + def handle_EM(nodes) + emit_inline('') + super + emit_inline('') + end + + def handle_BOLD_WORD(word) + emit_inline('') + super + emit_inline('') + end + + def handle_EM_WORD(word) + emit_inline('') + super + emit_inline('') + end + + def handle_TT(code) + emit_inline('') + super + emit_inline('') + end + + def handle_STRIKE(nodes) + emit_inline('') + super + emit_inline('') + end + + def handle_HARD_BREAK + emit_inline('
') + end + + def emit_inline(text) + @inline_output << text + end - def handle_regexp_HARD_BREAK(target) - '
' + # Returns true if we are processing inside a tidy link label. + + def in_tidylink_label? + @in_tidylink_label + end + + # Special handling for tidy link labels. + # When a tidy link is {rdoc-image:path/to/image.jpg:alt text}[http://example.com], + # label part is normally considered RDOCLINK rdoc-image:path/to/image.jpg:alt and a text " text" + # but RDoc's test code expects the whole label part to be treated as RDOCLINK only in tidy link label. + # When a tidy link is {^1}[url] or {*1}[url], the label part needs to drop leading * or ^. + # TODO: reconsider this workaround. + + def apply_tidylink_label_special_handling(label, url) + # ^1 *1 will be converted to just 1 in tidy link label. + return label[1..] if label.match?(/\A[*^]\d+\z/) + + # rdoc-image in label specially allows spaces in alt text. + return handle_RDOCLINK(label) if label.start_with?('rdoc-image:') + end + + def handle_TIDYLINK(label_part, url) + # When url is an image, ignore label part (maybe bug?) and just generate img tag. + if url.match?(/\Ahttps?:\/\/.+\.(png|gif|jpg|jpeg|bmp)\z/) + emit_inline("") + return + elsif url.match?(/\Ardoc-image:/) + emit_inline(handle_RDOCLINK(url)) + return + end + + if label_part.size == 1 && String === label_part[0] + raw_label = label_part[0] + + @in_tidylink_label = true + special = apply_tidylink_label_special_handling(raw_label, url) + @in_tidylink_label = false + + if special + tag = gen_url(CGI.escapeHTML(url), special) + unless tag.empty? + emit_inline(tag) + return + end + end + end + + tag = gen_url(CGI.escapeHTML(url), '') + open_tag, close_tag = tag.split(/(?=<\/a>)/, 2) + valid_tag = open_tag && close_tag + emit_inline(open_tag) if valid_tag + @in_tidylink_label = true + traverse_inline_nodes(label_part) + @in_tidylink_label = false + emit_inline(close_tag) if valid_tag + end + + def handle_inline(text) # :nodoc: + @inline_output = +'' + super + out = @inline_output + @inline_output = nil + out + end + + # Converts suppressed cross-reference +text+ to HTML by removing the leading backslash. + + def handle_regexp_SUPPRESSED_CROSSREF(text) + convert_string(text.delete_prefix('\\')) end ## @@ -132,9 +251,10 @@ def handle_regexp_HARD_BREAK(target) # link::: # Reference to a local file relative to the output directory. - def handle_regexp_HYPERLINK(target) - url = CGI.escapeHTML(target.text) + def handle_regexp_HYPERLINK(text) + return convert_string(text) if in_tidylink_label? + url = CGI.escapeHTML(text) gen_url url, url end @@ -147,27 +267,8 @@ def handle_regexp_HYPERLINK(target) # For the +rdoc-label+ scheme the footnote and label prefixes are stripped # when creating a link. All other contents will be linked verbatim. - def handle_regexp_RDOCLINK(target) - handle_RDOCLINK target.text - end - - ## - # This +target+ is a link where the label is different from the URL - # label[url] or {long label}[url] - - def handle_regexp_TIDYLINK(target) - text = target.text - - if tidy_link_capturing? - return finish_tidy_link(text) - end - - if text.start_with?('{') && !text.include?('}') - start_tidy_link text - return '' - end - - convert_complete_tidy_link(text) + def handle_regexp_RDOCLINK(text) + handle_RDOCLINK text end # :section: Visitor @@ -415,16 +516,6 @@ def html_list_name(list_type, open_tag) tags[open_tag ? 0 : 1] end - ## - # Maps attributes to HTML tags - - def init_tags - add_tag :BOLD, "", "" - add_tag :TT, "", "" - add_tag :EM, "", "" - add_tag :STRIKE, "", "" - end - ## # Returns the HTML tag for +list_type+, possible using a label from # +list_item+ @@ -474,141 +565,10 @@ def parseable?(text) # Converts +item+ to HTML using RDoc::Text#to_html def to_html(item) - super convert_flow @am.flow item - end - - private - - def convert_flow(flow_items) - res = [] - - flow_items.each do |item| - case item - when String - append_flow_fragment res, convert_string(item) - when RDoc::Markup::AttrChanger - off_tags res, item - on_tags res, item - when RDoc::Markup::RegexpHandling - append_flow_fragment res, convert_regexp_handling(item) - else - raise "Unknown flow element: #{item.inspect}" - end - end - - res.join - end - - def append_flow_fragment(res, fragment) - return if fragment.nil? || fragment.empty? - - emit_tidy_link_fragment(res, fragment) - end - - def append_to_tidy_label(fragment) - @tidy_link_buffer << fragment - end - - ## - # Matches an entire tidy link with a braced label "{label}[url]". - # - # Capture 1: label contents. - # Capture 2: URL text. - # Capture 3: trailing content. - TIDY_LINK_WITH_BRACES = /\A\{(.*?)\}\[(.*?)\](.*)\z/ - - ## - # Matches the tail of a braced tidy link when the opening brace was - # consumed earlier while accumulating the label text. - # - # Capture 1: remaining label content. - # Capture 2: URL text. - # Capture 3: trailing content. - TIDY_LINK_WITH_BRACES_TAIL = /\A(.*?)\}\[(.*?)\](.*)\z/ - - ## - # Matches a tidy link with a single-word label "label[url]". - # - # Capture 1: the single-word label (no whitespace). - # Capture 2: URL text between the brackets. - TIDY_LINK_SINGLE_WORD = /\A(\S+)\[(.*?)\](.*)\z/ - - def convert_complete_tidy_link(text) - return text unless - text =~ TIDY_LINK_WITH_BRACES or text =~ TIDY_LINK_SINGLE_WORD - - label = $1 - url = CGI.escapeHTML($2) - - label_html = if /^rdoc-image:/ =~ label - handle_RDOCLINK(label) - else - render_tidy_link_label(label) - end - - gen_url url, label_html - end - - def emit_tidy_link_fragment(res, fragment) - if tidy_link_capturing? - append_to_tidy_label fragment - else - res << fragment - end - end - - def finish_tidy_link(text) - label_tail, url, trailing = extract_tidy_link_parts(text) - append_to_tidy_label CGI.escapeHTML(label_tail) unless label_tail.empty? - - return '' unless url - - label_html = @tidy_link_buffer - @tidy_link_buffer = nil - link = gen_url(url, label_html) - - return link if trailing.empty? - - link + CGI.escapeHTML(trailing) - end - - def extract_tidy_link_parts(text) - if text =~ TIDY_LINK_WITH_BRACES - [$1, CGI.escapeHTML($2), $3] - elsif text =~ TIDY_LINK_WITH_BRACES_TAIL - [$1, CGI.escapeHTML($2), $3] - elsif text =~ TIDY_LINK_SINGLE_WORD - [$1, CGI.escapeHTML($2), $3] - else - [text, nil, ''] - end - end - - def on_tags(res, item) - each_attr_tag(item.turn_on) do |tag| - emit_tidy_link_fragment(res, annotate(tag.on)) - @in_tt += 1 if tt? tag - end - end - - def off_tags(res, item) - each_attr_tag(item.turn_off, true) do |tag| - emit_tidy_link_fragment(res, annotate(tag.off)) - @in_tt -= 1 if tt? tag - end - end - - def start_tidy_link(text) - @tidy_link_buffer = String.new - append_to_tidy_label CGI.escapeHTML(text.delete_prefix('{')) - end - - def tidy_link_capturing? - !!@tidy_link_buffer - end - - def render_tidy_link_label(label) - RDoc::Markup::LinkLabelToHtml.render(label, @options, @from_path) + # Ideally, we should convert html characters at handle_PLAIN_TEXT or somewhere else, + # but we need to convert it here for now because to_html_characters converts pair of backticks to ’‘ and pair of double backticks to ”“. + # Known bugs: `...` in `def f(...); end` and `(c) in `` will be wrongly converted. + to_html_characters(handle_inline(item)) end end diff --git a/lib/rdoc/markup/to_html_crossref.rb b/lib/rdoc/markup/to_html_crossref.rb index ad600663bf..fad87a5802 100644 --- a/lib/rdoc/markup/to_html_crossref.rb +++ b/lib/rdoc/markup/to_html_crossref.rb @@ -50,8 +50,6 @@ def init_link_notation_regexp_handlings # will be processed as a tidylink first and will be broken. crossref_re = @options.hyperlink_all ? ALL_CROSSREF_REGEXP : CROSSREF_REGEXP @markup.add_regexp_handling crossref_re, :CROSSREF - - add_regexp_handling_TIDYLINK end ## @@ -80,9 +78,8 @@ def cross_reference(name, text = nil, code = true, rdoc_ref: false) # example, ToHtml is found, even without the RDoc::Markup:: prefix, # because we look for it in module Markup first. - def handle_regexp_CROSSREF(target) - name = target.text - + def handle_regexp_CROSSREF(name) + return convert_string(name) if in_tidylink_label? return name if @options.autolink_excluded_words&.include?(name) return name if name =~ /@[\w-]+\.[\w-]/ # labels that look like emails @@ -101,8 +98,8 @@ def handle_regexp_CROSSREF(target) # Handles rdoc-ref: scheme links and allows RDoc::Markup::ToHtml to # handle other schemes. - def handle_regexp_HYPERLINK(target) - url = target.text + def handle_regexp_HYPERLINK(url) + return convert_string(url) if in_tidylink_label? case url when /\Ardoc-ref:/ @@ -120,12 +117,14 @@ def handle_regexp_HYPERLINK(target) # All other contents are handled by # {the superclass}[rdoc-ref:RDoc::Markup::ToHtml#handle_regexp_RDOCLINK] - def handle_regexp_RDOCLINK(target) - url = target.text - + def handle_regexp_RDOCLINK(url) case url when /\Ardoc-ref:/ - cross_reference $', rdoc_ref: true + if in_tidylink_label? + convert_string(url) + else + cross_reference $', rdoc_ref: true + end else super end @@ -203,73 +202,30 @@ def link(name, text, code = true, rdoc_ref: false) end end - def convert_flow(flow_items, &block) - res = [] - - i = 0 - while i < flow_items.size - item = flow_items[i] - - case item - when RDoc::Markup::AttrChanger - if !tidy_link_capturing? && (text = convert_tt_crossref(flow_items, i)) - text = block.call(text, res) if block - append_flow_fragment res, text - i += 3 - next - end + def handle_TT(code) + emit_inline(tt_cross_reference(code) || "#{CGI.escapeHTML code}") + end - off_tags res, item - on_tags res, item - i += 1 - when String - text = convert_string(item) - text = block.call(text, res) if block - append_flow_fragment res, text - i += 1 - when RDoc::Markup::RegexpHandling - text = convert_regexp_handling(item) - text = block.call(text, res) if block - append_flow_fragment res, text - i += 1 - else - raise "Unknown flow element: #{item.inspect}" - end + # Applies additional special handling on top of the one defined in ToHtml. + # When a tidy link is {Foo}[rdoc-ref:Foo], the label part is surrounded by . + # TODO: reconsider this workaround. + def apply_tidylink_label_special_handling(label, url) + if url == "rdoc-ref:#{label}" && cross_reference(label).include?('') + "#{convert_string(label)}" + else + super end - - res.join('') end - private - - ## - # Detects ... spans that contain a single cross-reference candidate. - # When the candidate occupies the whole span (aside from trailing - # punctuation), the tt markup is replaced by the resolved cross-reference. - - def convert_tt_crossref(flow_items, index) - opener = flow_items[index] - return unless tt_tag?(opener.turn_on) - - string = flow_items[index + 1] - closer = flow_items[index + 2] - - return unless String === string - return unless RDoc::Markup::AttrChanger === closer - return unless tt_tag?(closer.turn_off, true) + def tt_cross_reference(code) + return if in_tidylink_label? crossref_regexp = @options.hyperlink_all ? ALL_CROSSREF_REGEXP : CROSSREF_REGEXP - match = crossref_regexp.match(string) - return unless match - return unless match.begin(1).zero? - - trailing = match.post_match - # Only convert when the remainder is punctuation/whitespace so other tt text stays literal. - return unless trailing.match?(/\A[[:punct:]\s]*\z/) - - text = cross_reference(string) - return if text == string + match = crossref_regexp.match(code) + return unless match && match.begin(1).zero? + return unless match.post_match.match?(/\A[[:punct:]\s]*\z/) - text + ref = cross_reference(code) + ref if ref != code end end diff --git a/lib/rdoc/markup/to_html_snippet.rb b/lib/rdoc/markup/to_html_snippet.rb index a671cf0f1d..52cc4543f3 100644 --- a/lib/rdoc/markup/to_html_snippet.rb +++ b/lib/rdoc/markup/to_html_snippet.rb @@ -107,8 +107,8 @@ def accept_list_start(list) def accept_verbatim(verbatim) throw :done if @characters >= @character_limit input = verbatim.text.rstrip - - text = truncate input + text = truncate(input, @character_limit - @characters) + @characters += input.length text << ' ...' unless text == input super RDoc::Markup::Verbatim.new text @@ -128,16 +128,8 @@ def start_accepting ## # Removes escaping from the cross-references in +target+ - def handle_regexp_CROSSREF(target) - target.text.sub(/\A\\/, '') - end - - ## - # +target+ is a
- - def handle_regexp_HARD_BREAK(target) - @characters -= 4 - '
' + def handle_regexp_CROSSREF(text) + text.sub(/\A\\/, '') end ## @@ -212,74 +204,82 @@ def convert(content) end_accepting end - ## - # Converts flow items +flow+ + def handle_PLAIN_TEXT(text) # :nodoc: + return if inline_limit_reached? - def convert_flow(flow) - throw :done if @characters >= @character_limit + truncated = truncate(text, @inline_character_limit) + @inline_character_limit -= text.size + emit_inline(convert_string(truncated)) + end - res = [] - @mask = 0 - - flow.each do |item| - case item - when RDoc::Markup::AttrChanger then - off_tags res, item - on_tags res, item - when String then - text = convert_string item - res << truncate(text) - when RDoc::Markup::RegexpHandling then - text = convert_regexp_handling item - res << truncate(text) - else - raise "Unknown flow element: #{item.inspect}" - end - - if @characters >= @character_limit then - off_tags res, RDoc::Markup::AttrChanger.new(0, @mask) - break - end - end + def handle_REGEXP_HANDLING_TEXT(text) # :nodoc: + return if inline_limit_reached? - res << ' ...' if @characters >= @character_limit + # We can't truncate text including html tags. + # Just emit as is, and count all characters including html tag part. + emit_inline(text) + @inline_character_limit -= text.size + end - res.join + def handle_BOLD(nodes) + super unless inline_limit_reached? end - ## - # Maintains a bitmask to allow HTML elements to be closed properly. See - # RDoc::Markup::Formatter. + def handle_BOLD_WORD(word) + super unless inline_limit_reached? + end - def on_tags(res, item) - @mask ^= item.turn_on + def handle_EM(nodes) + super unless inline_limit_reached? + end - super + def handle_EM_WORD(word) + super unless inline_limit_reached? end - ## - # Maintains a bitmask to allow HTML elements to be closed properly. See - # RDoc::Markup::Formatter. + def handle_TT(code) + super unless inline_limit_reached? + end - def off_tags(res, item) - @mask ^= item.turn_off + def handle_STRIKE(nodes) + super unless inline_limit_reached? + end - super + def handle_HARD_BREAK + super unless inline_limit_reached? end - ## - # Truncates +text+ at the end of the first word after the character_limit. + def handle_TIDYLINK(label_part, url) + traverse_inline_nodes(label_part) unless inline_limit_reached? + end - def truncate(text) - length = text.length - characters = @characters - @characters += length + def inline_limit_reached? + @inline_character_limit <= 0 + end - return text if @characters < @character_limit + def handle_inline(text) + limit = @character_limit - @characters + return ['', 0] if limit <= 0 + @inline_character_limit = limit + res = super + res << ' ...' if @inline_character_limit <= 0 + @characters += limit - @inline_character_limit + res + end + + def to_html(item) + throw :done if @characters >= @character_limit + to_html_characters(handle_inline(item)) + end + + ## + # Truncates +text+ at the end of the first word after the limit. - remaining = @character_limit - characters + def truncate(text, limit) + return text if limit >= text.size + return '' if limit <= 0 - text =~ /\A(.{#{remaining},}?)(\s|$)/m # TODO word-break instead of \s? + text =~ /\A(.{#{limit},}?)(\s|$)/m # TODO word-break instead of \s? $1 end diff --git a/lib/rdoc/markup/to_label.rb b/lib/rdoc/markup/to_label.rb index 22ec07f9c1..4ab030bb70 100644 --- a/lib/rdoc/markup/to_label.rb +++ b/lib/rdoc/markup/to_label.rb @@ -18,20 +18,33 @@ def initialize(markup = nil) super nil, markup @markup.add_regexp_handling RDoc::CrossReference::CROSSREF_REGEXP, :CROSSREF - @markup.add_regexp_handling(/(((\{.*?\})|\b\S+?)\[\S+?\])/, :TIDYLINK) - add_tag :BOLD, '', '' - add_tag :TT, '', '' - add_tag :EM, '', '' + @res = [] + end + + def handle_PLAIN_TEXT(text) + @res << text + end + + def handle_REGEXP_HANDLING_TEXT(text) + @res << text + end + + def handle_TT(text) + @res << text + end + def extract_plaintext(text) @res = [] + handle_inline(text) + @res.join end ## # Converts +text+ to an HTML-safe label using GitHub-style anchor formatting. def convert(text) - label = convert_flow @am.flow text + label = extract_plaintext(text) RDoc::Text.to_anchor(label) end @@ -41,7 +54,7 @@ def convert(text) # Used for generating backward-compatible anchor aliases. def convert_legacy(text) - label = convert_flow @am.flow text + label = extract_plaintext(text) CGI.escape(label).gsub('%', '-').sub(/^-/, '') end @@ -50,23 +63,10 @@ def convert_legacy(text) # Converts the CROSSREF +target+ to plain text, removing the suppression # marker, if any - def handle_regexp_CROSSREF(target) - text = target.text - + def handle_regexp_CROSSREF(text) text.sub(/^\\/, '') end - ## - # Converts the TIDYLINK +target+ to just the text part - - def handle_regexp_TIDYLINK(target) - text = target.text - - return text unless text =~ /\{(.*?)\}\[(.*?)\]/ or text =~ /(\S+)\[(.*?)\]/ - - $1 - end - alias accept_blank_line ignore alias accept_block_quote ignore alias accept_heading ignore @@ -79,7 +79,6 @@ def handle_regexp_TIDYLINK(target) alias accept_rule ignore alias accept_verbatim ignore alias end_accepting ignore - alias handle_regexp_HARD_BREAK ignore alias start_accepting ignore end diff --git a/lib/rdoc/markup/to_markdown.rb b/lib/rdoc/markup/to_markdown.rb index ff3d1e6da8..4c19e460c4 100644 --- a/lib/rdoc/markup/to_markdown.rb +++ b/lib/rdoc/markup/to_markdown.rb @@ -20,27 +20,10 @@ def initialize(markup = nil) @headings[6] = ['###### ', ''] add_regexp_handling_RDOCLINK - add_regexp_handling_TIDYLINK @hard_break = " \n" end - ## - # Maps attributes to HTML sequences - - def init_tags - add_tag :BOLD, '**', '**' - add_tag :EM, '*', '*' - add_tag :TT, '`', '`' - end - - ## - # Adds a newline to the output - - def handle_regexp_HARD_BREAK(target) - " \n" - end - ## # Finishes consumption of `list` @@ -94,6 +77,65 @@ def accept_list_item_start(list_item) end end + def add_tag(tag, simple_tag, content) + if content.match?(/\A[\w\s]+\z/) + emit_inline("#{simple_tag}#{content}#{simple_tag}") + else + emit_inline("<#{tag}>#{content}") + end + end + + def handle_tag(nodes, simple_tag, tag) + if nodes.size == 1 && String === nodes[0] + content = apply_regexp_handling(nodes[0]).map do |text, converted| + converted ? text : convert_string(text) + end.join + add_tag(tag, simple_tag, content) + else + emit_inline("<#{tag}>") + traverse_inline_nodes(nodes) + emit_inline("") + end + end + + def handle_TIDYLINK(label_part, url) + if url =~ /^rdoc-label:foot/ then + emit_inline(handle_rdoc_link(url)) + else + emit_inline('[') + traverse_inline_nodes(label_part) + emit_inline("](#{url})") + end + end + + def handle_BOLD(nodes) + handle_tag(nodes, '**', 'strong') + end + + def handle_EM(nodes) + handle_tag(nodes, '*', 'em') + end + + def handle_BOLD_WORD(word) + add_tag('strong', '**', convert_string(word)) + end + + def handle_EM_WORD(word) + add_tag('em', '*', convert_string(word)) + end + + def handle_TT(text) + add_tag('code', '`', convert_string(text)) + end + + def handle_STRIKE(nodes) + handle_tag(nodes, '~~', 's') + end + + def handle_HARD_BREAK + emit_inline(" \n") + end + ## # Prepares the visitor for consuming `list` @@ -163,29 +205,11 @@ def handle_rdoc_link(url) end end - ## - # Converts the RDoc markup tidylink into a Markdown.style link. - - def handle_regexp_TIDYLINK(target) - text = target.text - - return text unless text =~ /\{(.*?)\}\[(.*?)\]/ or text =~ /(\S+)\[(.*?)\]/ - - label = $1 - url = $2 - - if url =~ /^rdoc-label:foot/ then - handle_rdoc_link url - else - gen_url url, label - end - end - ## # Converts the rdoc-...: links into a Markdown.style links. - def handle_regexp_RDOCLINK(target) - handle_rdoc_link target.text + def handle_regexp_RDOCLINK(text) + handle_rdoc_link text end end diff --git a/lib/rdoc/markup/to_rdoc.rb b/lib/rdoc/markup/to_rdoc.rb index 4185e8bea9..5eab835980 100644 --- a/lib/rdoc/markup/to_rdoc.rb +++ b/lib/rdoc/markup/to_rdoc.rb @@ -57,21 +57,11 @@ def initialize(markup = nil) @markup.add_regexp_handling(/\\\S/, :SUPPRESSED_CROSSREF) @width = 78 - init_tags @headings = DEFAULT_HEADINGS.dup @hard_break = "\n" end - ## - # Maps attributes to HTML sequences - - def init_tags - add_tag :BOLD, "", "" - add_tag :TT, "", "" - add_tag :EM, "", "" - end - ## # Adds +blank_line+ to the output @@ -282,12 +272,90 @@ def calculate_text_width(text) text.size end + def handle_PLAIN_TEXT(text) + add_text(text) + end + + def handle_REGEXP_HANDLING_TEXT(text) + add_text(text) + end + + def handle_BOLD(target) + on(:BOLD) + super + off(:BOLD) + end + + def handle_EM(target) + on(:EM) + super + off(:EM) + end + + def handle_BOLD_WORD(word) + on(:BOLD) + super + off(:BOLD) + end + + def handle_EM_WORD(word) + on(:EM) + super + off(:EM) + end + + def handle_TT(code) + on(:TT) + super + off(:TT) + end + + def handle_STRIKE(target) + on(:STRIKE) + super + off(:STRIKE) + end + + def handle_HARD_BREAK + add_text("\n") + end + + def handle_TIDYLINK(label_part, url) + super + add_text("( #{url} )") + end + + def handle_inline(text, initial_attributes = []) + @attributes = Hash.new(0) + initial_attributes.each { |attr| on(attr) } + out = @inline_output = +'' + super(text) + @inline_output = nil + out + end + + def on(attr) + @attributes[attr] += 1 + end + + def off(attr) + @attributes[attr] -= 1 + @attributes.delete(attr) if @attributes[attr] == 0 + end + + def add_text(text) + emit_inline(text) + end + + def emit_inline(text) + @inline_output << text + end + ## - # Applies attribute-specific markup to +text+ using RDoc::AttributeManager + # Applies attribute-specific markup to +text+ using RDoc::Markup::InlineParser def attributes(text) - flow = @am.flow text.dup - convert_flow flow + handle_inline(text) end ## @@ -300,17 +368,8 @@ def end_accepting ## # Removes preceding \\ from the suppressed crossref +target+ - def handle_regexp_SUPPRESSED_CROSSREF(target) - text = target.text - text = text.sub('\\', '') unless in_tt? - text - end - - ## - # Adds a newline to the output - - def handle_regexp_HARD_BREAK(target) - "\n" + def handle_regexp_SUPPRESSED_CROSSREF(text) + text.sub('\\', '') end ## diff --git a/lib/rdoc/markup/to_test.rb b/lib/rdoc/markup/to_test.rb index 3cf20f384b..f2b7febe95 100644 --- a/lib/rdoc/markup/to_test.rb +++ b/lib/rdoc/markup/to_test.rb @@ -18,8 +18,16 @@ def end_accepting @res end + def handle_PLAIN_TEXT(text) + @res << text + end + + def handle_REGEXP_HANDLING_TEXT(text) + @res << text + end + def accept_paragraph(paragraph) - @res << convert_flow(@am.flow(paragraph.text)) + handle_inline(paragraph.text) end def accept_raw(raw) diff --git a/lib/rdoc/markup/to_tt_only.rb b/lib/rdoc/markup/to_tt_only.rb index 7b59473aaf..19e3622f75 100644 --- a/lib/rdoc/markup/to_tt_only.rb +++ b/lib/rdoc/markup/to_tt_only.rb @@ -20,8 +20,6 @@ class RDoc::Markup::ToTtOnly < RDoc::Markup::Formatter def initialize(markup = nil) super nil, markup - - add_tag :TT, nil, nil end ## @@ -82,22 +80,18 @@ def do_nothing(markup_item) # Extracts tt sections from +text+ def tt_sections(text) - flow = @am.flow text.dup - - flow.each do |item| - case item - when String then - @res << item if in_tt? - when RDoc::Markup::AttrChanger then - off_tags res, item - on_tags res, item - when RDoc::Markup::RegexpHandling then - @res << convert_regexp_handling(item) if in_tt? # TODO can this happen? + parsed = RDoc::Markup::InlineParser.new(text).parse + traverse = -> node { + next if String === node + if node[:type] == :TT + res << nil + res << node[:children][0] || '' + res << nil else - raise "Unknown flow element: #{item.inspect}" + node[:children].each(&traverse) end - end - + } + parsed.each(&traverse) res end diff --git a/lib/rdoc/text.rb b/lib/rdoc/text.rb index 7f715ae861..94c84037c8 100644 --- a/lib/rdoc/text.rb +++ b/lib/rdoc/text.rb @@ -193,11 +193,15 @@ def strip_stars(text) text.gsub(/^\s+$/, empty) end + def to_html(text) + to_html_characters(text) + end + ## # Converts ampersand, dashes, ellipsis, quotes, copyright and registered # trademark symbols in +text+ to properly encoded characters. - def to_html(text) + def to_html_characters(text) html = (''.encode text.encoding).dup encoded = RDoc::Text::TO_HTML_CHARACTERS[text.encoding] @@ -210,15 +214,12 @@ def to_html(text) until s.eos? do case when s.scan(/<(tt|code)>.*?<\/\1>/) then # skip contents of tt - html << s.matched.gsub('\\\\', '\\') + html << s.matched when s.scan(/<(tt|code)>.*?/) then warn "mismatched <#{s[1]}> tag" # TODO signal file/line html << s.matched when s.scan(/<[^>]+\/?s*>/) then # skip HTML tags html << s.matched - when s.scan(/\\(\S)/) then # unhandled suppressed crossref - html << s[1] - after_word = nil when s.scan(/\.\.\.(\.?)/) then html << s[1] << encoded[:ellipsis] after_word = nil diff --git a/test/rdoc/markup/attribute_manager_test.rb b/test/rdoc/markup/attribute_manager_test.rb deleted file mode 100644 index 903966b8e2..0000000000 --- a/test/rdoc/markup/attribute_manager_test.rb +++ /dev/null @@ -1,474 +0,0 @@ -# frozen_string_literal: true -require_relative '../helper' - -class RDocMarkupAttributeManagerTest < RDoc::TestCase - - def setup - super - - @am = RDoc::Markup::AttributeManager.new - - @bold_on = @am.changed_attribute_by_name([], [:BOLD]) - @bold_off = @am.changed_attribute_by_name([:BOLD], []) - - @tt_on = @am.changed_attribute_by_name([], [:TT]) - @tt_off = @am.changed_attribute_by_name([:TT], []) - - @em_on = @am.changed_attribute_by_name([], [:EM]) - @em_off = @am.changed_attribute_by_name([:EM], []) - - @strike_on = @am.changed_attribute_by_name([], [:STRIKE]) - @strike_off = @am.changed_attribute_by_name([:STRIKE], []) - - @bold_em_on = @am.changed_attribute_by_name([], [:BOLD] | [:EM]) - @bold_em_off = @am.changed_attribute_by_name([:BOLD] | [:EM], []) - - @em_then_bold = @am.changed_attribute_by_name([:EM], [:EM] | [:BOLD]) - - @em_to_bold = @am.changed_attribute_by_name([:EM], [:BOLD]) - - @am.add_word_pair("{", "}", :WOMBAT) - @wombat_on = @am.changed_attribute_by_name([], [:WOMBAT]) - @wombat_off = @am.changed_attribute_by_name([:WOMBAT], []) - - @klass = RDoc::Markup::AttributeManager - @formatter = RDoc::Markup::Formatter.new @rdoc.options - @formatter.add_tag :BOLD, '', '' - @formatter.add_tag :EM, '', '' - @formatter.add_tag :TT, '', '' - end - - def crossref(text) - crossref_bitmap = @am.attributes.bitmap_for(:_REGEXP_HANDLING_) | - @am.attributes.bitmap_for(:CROSSREF) - - [ @am.changed_attribute_by_name([], [:CROSSREF, :_REGEXP_HANDLING_]), - RDoc::Markup::RegexpHandling.new(crossref_bitmap, text), - @am.changed_attribute_by_name([:CROSSREF, :_REGEXP_HANDLING_], []) - ] - end - - def test_adding - assert_equal(["cat ", @wombat_on, "and", @wombat_off, " dog" ], - @am.flow("cat {and} dog")) - #assert_equal(["cat {and} dog" ], @am.flow("cat \\{and} dog")) - end - - def test_add_html_tag - @am.add_html("Test", :TEST) - tags = @am.html_tags - assert_equal(8, tags.size) - assert(tags.has_key?("test")) - end - - def test_add_regexp_handling - @am.add_regexp_handling "WikiWord", :WIKIWORD - regexp_handlings = @am.regexp_handlings - - assert_equal 1, regexp_handlings.size - assert regexp_handlings.assoc "WikiWord" - end - - def test_add_word_pair - @am.add_word_pair '%', '&', 'percent and' - - assert @am.word_pair_map.include?(/(%)(\S+)(&)/) - assert @am.protectable.include?('%') - assert !@am.protectable.include?('&') - end - - def test_add_word_pair_angle - e = assert_raise ArgumentError do - @am.add_word_pair '<', '>', 'angles' - end - - assert_equal "Word flags may not start with '<'", e.message - end - - def test_add_word_pair_invalid - assert_raise ArgumentError do - @am.add_word_pair("<", "<", :TEST) - end - end - - def test_add_word_pair_map - @am.add_word_pair("x", "y", :TEST) - - word_pair_map = @am.word_pair_map - - assert_includes word_pair_map.keys.map { |r| r.source }, "(x)(\\S+)(y)" - end - - def test_add_word_pair_matching - @am.add_word_pair '^', '^', 'caret' - - assert @am.matching_word_pairs.include?('^') - assert @am.protectable.include?('^') - end - - def test_basic - assert_equal(["cat"], @am.flow("cat")) - - assert_equal(["cat ", @bold_on, "and", @bold_off, " dog"], - @am.flow("cat *and* dog")) - - assert_equal(["cat ", @bold_on, "AND", @bold_off, " dog"], - @am.flow("cat *AND* dog")) - - assert_equal(["cat ", @em_on, "And", @em_off, " dog"], - @am.flow("cat _And_ dog")) - - assert_equal(["cat *and dog*"], @am.flow("cat *and dog*")) - - assert_equal(["*cat and* dog"], @am.flow("*cat and* dog")) - - assert_equal(["cat *and ", @bold_on, "dog", @bold_off], - @am.flow("cat *and *dog*")) - - assert_equal(["cat ", @em_on, "and", @em_off, " dog"], - @am.flow("cat _and_ dog")) - - assert_equal(["cat_and_dog"], - @am.flow("cat_and_dog")) - - assert_equal(["cat ", @tt_on, "and", @tt_off, " dog"], - @am.flow("cat +and+ dog")) - - assert_equal(["cat ", @tt_on, "X::Y", @tt_off, " dog"], - @am.flow("cat +X::Y+ dog")) - - assert_equal(["cat ", @bold_on, "a_b_c", @bold_off, " dog"], - @am.flow("cat *a_b_c* dog")) - - assert_equal(["cat __ dog"], - @am.flow("cat __ dog")) - - assert_equal(["cat ", @em_on, "_", @em_off, " dog"], - @am.flow("cat ___ dog")) - - assert_equal(["cat and ", @em_on, "5", @em_off, " dogs"], - @am.flow("cat and _5_ dogs")) - - assert_equal([@tt_on, "__id__", @tt_off], @am.flow("+__id__+")) - end - - def test_bold - assert_equal [@bold_on, 'bold', @bold_off], - @am.flow("*bold*") - - assert_equal [@bold_on, 'Bold:', @bold_off], - @am.flow("*Bold:*") - - assert_equal [@bold_on, '\\bold', @bold_off], - @am.flow("*\\bold*") - end - - def test_bold_html_escaped - assert_equal ['cat dog'], @am.flow('cat \dog') - end - - def test_strike_html_escaped - assert_equal ['cat dog'], @am.flow('cat \dog') - assert_equal ['cat dog'], @am.flow('cat \dog') - end - - def test_html_like_strike - assert_equal ["cat ", @strike_on, "dog", @strike_off], - @am.flow("cat dog") - end - - def test_html_like_strike_del - assert_equal ["cat ", @strike_on, "dog", @strike_off], - @am.flow("cat dog") - end - - def test_combined - assert_equal(["cat ", @em_on, "and", @em_off, " ", @bold_on, "dog", @bold_off], - @am.flow("cat _and_ *dog*")) - - assert_equal(["cat ", @em_on, "a__nd", @em_off, " ", @bold_on, "dog", @bold_off], - @am.flow("cat _a__nd_ *dog*")) - end - - def test_convert_attrs - str = '+foo+'.dup - attrs = RDoc::Markup::AttrSpan.new str.length, @am.exclusive_bitmap - - @am.convert_attrs str, attrs, true - @am.convert_attrs str, attrs - - assert_equal "\000foo\000", str - - str = '+:foo:+'.dup - attrs = RDoc::Markup::AttrSpan.new str.length, @am.exclusive_bitmap - - @am.convert_attrs str, attrs, true - @am.convert_attrs str, attrs - - assert_equal "\000:foo:\000", str - - str = '+x-y+'.dup - attrs = RDoc::Markup::AttrSpan.new str.length, @am.exclusive_bitmap - - @am.convert_attrs str, attrs, true - @am.convert_attrs str, attrs - - assert_equal "\000x-y\000", str - end - - def test_convert_attrs_ignores_code - assert_equal 'foo __send__ bar', output('foo __send__ bar') - end - - def test_convert_attrs_ignores_bold_inside_code - assert_equal 'foo *bold* bar', output('foo *bold* bar') - end - - def test_convert_attrs_ignores_em_inside_code - assert_equal 'foo _em_ bar', output('foo _em_ bar') - end - - def test_convert_attrs_ignores_tt_inside_code - assert_equal 'foo +tt+ bar', output('foo +tt+ bar') - end - - def test_convert_attrs_ignores_bold_inside_tt - assert_equal 'foo *bold* bar', output('foo *bold* bar') - end - - def test_convert_attrs_ignores_em_inside_tt - assert_equal 'foo _em_ bar', output('foo _em_ bar') - end - - def test_convert_attrs_ignores_tt_inside_tt - assert_equal 'foo +tt+ bar', output('foo +tt+ bar') - end - - def test_backtick_basic - assert_equal(["cat ", @tt_on, "and", @tt_off, " dog"], - @am.flow("cat `and` dog")) - - assert_equal(["cat ", @tt_on, "X::Y", @tt_off, " dog"], - @am.flow("cat `X::Y` dog")) - end - - def test_backtick_output - assert_equal 'cat and dog', output('cat `and` dog') - assert_equal 'cat X::Y dog', output('cat `X::Y` dog') - end - - def test_convert_attrs_ignores_backtick_inside_code - assert_equal 'foo `text` bar', output('foo `text` bar') - end - - def test_convert_attrs_ignores_backtick_inside_tt - assert_equal 'foo `text` bar', output('foo `text` bar') - end - - def test_backtick_escaped - assert_equal ['`text`'], @am.flow('\`text`') - end - - def test_convert_attrs_ignores_del_inside_code - assert_equal 'foo strike bar', output('foo strike bar') - end - - def test_convert_attrs_ignores_del_inside_tt - assert_equal 'foo strike bar', output('foo strike bar') - end - - def test_convert_attrs_ignores_s_inside_code - assert_equal 'foo strike bar', output('foo strike bar') - end - - def test_convert_attrs_ignores_tt - assert_equal 'foo __send__ bar', output('foo __send__ bar') - end - - def test_convert_attrs_preserves_double - assert_equal 'foo.__send__ :bar', output('foo.__send__ :bar') - assert_equal 'use __FILE__ to', output('use __FILE__ to') - end - - def test_convert_attrs_does_not_ignore_after_tt - assert_equal 'the IF:key directive', output('the IF:_key_ directive') - end - - def test_escapes - assert_equal 'text', output('text') - assert_equal 'text', output('\\text') - assert_equal '', output('\\') - assert_equal '', output('\\') - assert_equal '\\', output('\\\\') - assert_equal 'text', output('*text*') - assert_equal '*text*', output('\\*text*') - assert_equal '\\', output('\\') - assert_equal '\\text', output('\\text') - assert_equal '\\\\text', output('\\\\text') - assert_equal 'text \\ text', output('text \\ text') - - assert_equal 'and \\s matches space', - output('and \\s matches space') - assert_equal 'use text for code', - output('use \\text for code') - assert_equal 'use text for code', - output('use \\text\\ for code') - assert_equal 'use text for code', - output('use \\\\text for code') - assert_equal 'use text for code', - output('use \\text for code') - assert_equal 'use +text+ for code', - output('use \\+text+ for code') - assert_equal 'use text for code', - output('use \\+text+ for code') - assert_equal 'illegal not changed', - output('illegal not changed') - assert_equal 'unhandled

tag

unchanged', - output('unhandled

tag

unchanged') - end - - def test_exclude_tag - assert_equal 'aaa[:symbol]', output('+aaa+[:symbol]') - assert_equal 'aaa[:symbol]', output('+aaa[:symbol]+') - assert_equal 'aaa[:symbol]', output('aaa[:symbol]') - assert_equal 'index', output('index') - end - - def test_exclude_tag_flow - assert_equal [@tt_on, "aaa", @tt_off, "[:symbol]"], - @am.flow("+aaa+[:symbol]") - assert_equal [@tt_on, "aaa[:symbol]", @tt_off], - @am.flow("+aaa[:symbol]+") - assert_equal ["aaa[:symbol]"], - @am.flow("aaa[:symbol]") - end - - def test_html_like_em_bold - assert_equal ["cat ", @em_on, "and ", @em_to_bold, "dog", @bold_off], - @am.flow("cat and dog") - end - - def test_html_like_em_bold_SGML - assert_equal ["cat ", @em_on, "and ", @em_to_bold, "dog", @bold_off], - @am.flow("cat and dog") - end - - def test_html_like_em_bold_nested_1 - assert_equal(["cat ", @bold_em_on, "and", @bold_em_off, " dog"], - @am.flow("cat and dog")) - end - - def test_html_like_em_bold_nested_2 - assert_equal ["cat ", @em_on, "and ", @em_then_bold, "dog", @bold_em_off], - @am.flow("cat and dog") - end - - def test_html_like_em_bold_nested_mixed_case - assert_equal ["cat ", @em_on, "and ", @em_then_bold, "dog", @bold_em_off], - @am.flow("cat and dog") - end - - def test_html_like_em_bold_mixed_case - assert_equal ["cat ", @em_on, "and", @em_off, " ", @bold_on, "dog", @bold_off], - @am.flow("cat and dog") - end - - def test_html_like_teletype - assert_equal ["cat ", @tt_on, "dog", @tt_off], - @am.flow("cat dog") - end - - def test_html_like_teletype_em_bold_SGML - assert_equal [@tt_on, "cat", @tt_off, " ", @em_on, "and ", @em_to_bold, "dog", @bold_off], - @am.flow("cat and dog") - end - - def test_initial_html - html_tags = @am.html_tags - assert html_tags.is_a?(Hash) - assert_equal(7, html_tags.size) - end - - def test_initial_word_pairs - word_pairs = @am.matching_word_pairs - assert word_pairs.is_a?(Hash) - assert_equal(4, word_pairs.size) - end - - def test_mask_protected_sequence - def @am.str() @str end - def @am.str=(str) @str = str end - - @am.str = 'foo'.dup - @am.mask_protected_sequences - - assert_equal "foo", @am.str - - @am.str = 'foo\\'.dup - @am.mask_protected_sequences - - assert_equal "foo<\x04/code>", @am.str, 'escaped close' - - @am.str = 'foo\\\\'.dup - @am.mask_protected_sequences - - assert_equal "foo\\", @am.str, 'escaped backslash' - end - - def test_protect - assert_equal(['cat \\ dog'], - @am.flow('cat \\ dog')) - - assert_equal(["cat dog"], - @am.flow("cat \\dog")) - - assert_equal(["cat ", @em_on, "and", @em_off, " dog"], - @am.flow("cat and \\dog")) - - assert_equal(["*word* or text"], - @am.flow("\\*word* or \\text")) - - assert_equal(["_cat_", @em_on, "dog", @em_off], - @am.flow("\\_cat_dog")) - end - - def test_lost_tag_for_the_second_time - str = "cat dog" - assert_equal(["cat ", @tt_on, "dog", @tt_off], - @am.flow(str)) - assert_equal(["cat ", @tt_on, "dog", @tt_off], - @am.flow(str)) - end - - def test_regexp_handling - @am.add_regexp_handling(RDoc::CrossReference::CROSSREF_REGEXP, :CROSSREF) - - # - # The apostrophes in "cats'" and "dogs'" suppress the flagging of these - # words as potential cross-references, which is necessary for the unit - # tests. Unfortunately, the markup engine right now does not actually - # check whether a cross-reference is valid before flagging it. - # - assert_equal(["cats'"], @am.flow("cats'")) - - assert_equal(["cats' ", crossref("#fred"), " dogs'"].flatten, - @am.flow("cats' #fred dogs'")) - - assert_equal([crossref("#fred"), " dogs'"].flatten, - @am.flow("#fred dogs'")) - - assert_equal(["cats' ", crossref("#fred")].flatten, @am.flow("cats' #fred")) - - assert_equal(["(", crossref("#fred"), ")"].flatten, @am.flow("(#fred)")) - end - - def test_tt_html - assert_equal [@tt_on, '"\n"', @tt_off], - @am.flow('"\n"') - end - - def output(str) - @formatter.convert_flow @am.flow str - end - -end diff --git a/test/rdoc/markup/attributes_test.rb b/test/rdoc/markup/attributes_test.rb deleted file mode 100644 index b46b7a0bf9..0000000000 --- a/test/rdoc/markup/attributes_test.rb +++ /dev/null @@ -1,39 +0,0 @@ -# frozen_string_literal: true -require_relative '../helper' - -class RDocMarkupAttributesTest < RDoc::TestCase - - def setup - super - - @as = RDoc::Markup::Attributes.new - end - - def test_bitmap_for - assert_equal 2, @as.bitmap_for('two') - assert_equal 2, @as.bitmap_for('two') - assert_equal 4, @as.bitmap_for('three') - end - - def test_as_string - @as.bitmap_for 'two' - @as.bitmap_for 'three' - - assert_equal 'none', @as.as_string(0) - assert_equal '_REGEXP_HANDLING_', @as.as_string(1) - assert_equal 'two', @as.as_string(2) - assert_equal '_REGEXP_HANDLING_,two', @as.as_string(3) - end - - def test_each_name_of - @as.bitmap_for 'two' - @as.bitmap_for 'three' - - assert_equal %w[], @as.each_name_of(0).to_a - assert_equal %w[], @as.each_name_of(1).to_a - assert_equal %w[two], @as.each_name_of(2).to_a - assert_equal %w[three], @as.each_name_of(4).to_a - assert_equal %w[two three], @as.each_name_of(6).to_a - end - -end diff --git a/test/rdoc/markup/formatter_test.rb b/test/rdoc/markup/formatter_test.rb index e17fa7725d..2ff7323622 100644 --- a/test/rdoc/markup/formatter_test.rb +++ b/test/rdoc/markup/formatter_test.rb @@ -7,20 +7,38 @@ class ToTest < RDoc::Markup::Formatter def initialize(markup) super nil, markup - - add_tag :TT, '', '' end def accept_paragraph(paragraph) @res += attributes(paragraph.text) end + def handle_PLAIN_TEXT(text) + @res << text + end + + def handle_REGEXP_HANDLING_TEXT(text) + @res << text + end + + def handle_TT(text) + @res << "#{text}" + end + + def handle_TIDYLINK(label_part, url) + @res << '{' + super + @res << '}[' + url + ']' + end + def attributes(text) - convert_flow @am.flow text.dup + @res = +"" + handle_inline(text) + @res end - def handle_regexp_CAPS(target) - "handled #{target.text}" + def handle_regexp_CAPS(text) + "handled #{text}" end def start_accepting @@ -39,14 +57,7 @@ def setup @markup = @RM.new @markup.add_regexp_handling(/[A-Z]+/, :CAPS) - @attribute_manager = @markup.attribute_manager - @attributes = @attribute_manager.attributes - @to = ToTest.new @markup - - @caps = @attributes.bitmap_for :CAPS - @regexp_handling = @attributes.bitmap_for :_REGEXP_HANDLING_ - @tt = @attributes.bitmap_for :TT end def test_class_gen_relative_url @@ -63,9 +74,7 @@ def gen(from, to) end def regexp_handling_names - @attribute_manager.regexp_handlings.map do |_, mask| - @attributes.as_string mask - end + @to.instance_variable_get(:@markup).regexp_handlings.map(&:last).map(&:to_s) end def test_add_regexp_handling_RDOCLINK @@ -73,43 +82,15 @@ def test_add_regexp_handling_RDOCLINK assert_includes regexp_handling_names, 'RDOCLINK' - def @to.handle_regexp_RDOCLINK(target) - "<#{target.text}>" - end - - document = doc(para('{foo}[rdoc-label:bar].')) - - formatted = document.accept @to - - assert_equal '{foo}[].', formatted - end - - def test_add_regexp_handling_TIDYLINK - @to.add_regexp_handling_TIDYLINK - - assert_includes regexp_handling_names, 'TIDYLINK' - - def @to.handle_regexp_TIDYLINK(target) - "<#{target.text}>" + def @to.handle_regexp_RDOCLINK(text) + "<#{text}>" end - document = doc(para('foo[rdoc-label:bar].')) - - formatted = document.accept @to - - assert_equal '.', formatted - - document = doc(para('{foo}[rdoc-label:bar].')) - - formatted = document.accept @to - - assert_equal '<{foo}[rdoc-label:bar]>.', formatted - - document = doc(para('{abc}: {foo}[rdoc-label:bar].')) + document = doc(para('{foo rdoc-label:bar baz}[url]')) formatted = document.accept @to - assert_equal '{abc}: <{foo}[rdoc-label:bar]>.', formatted + assert_equal '{foo baz}[url]', formatted end def test_parse_url diff --git a/test/rdoc/markup/inline_parser_test.rb b/test/rdoc/markup/inline_parser_test.rb new file mode 100644 index 0000000000..60f03bedd7 --- /dev/null +++ b/test/rdoc/markup/inline_parser_test.rb @@ -0,0 +1,269 @@ +# frozen_string_literal: true + +require_relative '../helper' +require 'rdoc/markup/inline_parser' + +class RDocMarkupInlineParserTest < RDoc::TestCase + def parse(text) + RDoc::Markup::InlineParser.new(text).parse + end + + def em_node(*children) + { type: :EM, children: children } + end + + def bold_node(*children) + { type: :BOLD, children: children } + end + + def bold_word(text) + { type: :BOLD_WORD, children: [text] } + end + + def em_word(text) + { type: :EM_WORD, children: [text] } + end + + def strike_node(*children) + { type: :STRIKE, children: children } + end + + def tt_node(*children) + { type: :TT, children: children } + end + + def tidylink_node(children, url) + { type: :TIDYLINK, children: children, url: url } + end + + def hard_break_node + { type: :HARD_BREAK, children: [] } + end + + def test_escape + # Escaping backslash are removed, other backslashes (suppressed crossref) remains + assert_equal(['\\', bold_node('\\Array'), bold_node('\\#to_s'), bold_node('\\::new')], parse('\\\\\\Array\\#to_s\\::new')) + assert_equal(['_a_ +a+ b \\n \\ABC'], parse('\\_a_ \\+a+ \\b\\ \\n \\ABC')) + assert_equal([bold_node('')], parse('\\')) + assert_equal([em_node('')], parse('\\')) + assert_equal(['a\\'], parse('a\\')) + assert_equal([tidylink_node([''], 'url')], parse('{\\\\}[url]')) + # Unescape \\ and \< in code blocks + assert_equal([tt_node('p(%(\\)+"\\a\\n")')], parse('p(%(\\\\\\\\)+"\\a\\n")')) + end + + def test_bold + assert_equal([bold_node()], parse('')) + assert_equal(['*a b*'], parse('*a b*')) + assert_equal(['x*a* *b*x'], parse('x*a* *b*x')) + assert_equal([bold_word('bold')], parse('*bold*')) + assert_equal([bold_word('bold')], parse('**bold**')) + assert_equal([bold_node('bo ld')], parse('bo ld')) + assert_equal( + ['a ', bold_word('A'), ' b ', bold_word('B'), ' c ', bold_node('C C'), ' d'], + parse('a *A* b **B** c C C d') + ) + assert_equal([bold_node('a', em_node('b'), bold_node('c'), 'd')], parse('abcd')) + end + + def test_em + assert_equal([em_node()], parse('')) + assert_equal(['_a b_'], parse('_a b_')) + assert_equal(['x_a_ _b_x'], parse('x_a_ _b_x')) + assert_equal([em_word('em')], parse('_em_')) + assert_equal([em_word('F1LE')], parse('__F1LE__')) + assert_equal(['_foo_bar_baz'], parse('_foo_bar_baz')) + + # _ inside _em_ + assert_equal([em_word('foo_bar')], parse('_foo_bar_')) + + # non-alphanumeric after _ + assert_equal([em_word('host'), ':', em_word('port')], parse('_host_:_port_')) + + # Special exception + assert_equal(['__send__'], parse('__send__')) + assert_equal(['__FILE__'], parse('__FILE__')) + + assert_equal([em_node('e m')], parse('e m')) + assert_equal([em_node('e m')], parse('e m')) + assert_equal([em_node('a', bold_node('b'), em_node('c'), 'd')], parse('abcd')) + end + + def test_method_like_words + assert_equal([bold_word('::Foo.bar-baz')], parse('*::Foo.bar-baz*')) + assert_equal([bold_word('#foo_bar=')], parse('*#foo_bar=*')) + assert_equal([bold_word('#foo_bar!')], parse('*#foo_bar!*')) + assert_equal([bold_word('#foo_bar?')], parse('*#foo_bar?*')) + + assert_equal([em_word('::Foo.bar-baz')], parse('_::Foo.bar-baz_')) + assert_equal([em_word('#foo_bar=')], parse('_#foo_bar=_')) + assert_equal([em_word('#foo_bar!')], parse('_#foo_bar!_')) + assert_equal([em_word('#foo_bar?')], parse('_#foo_bar?_')) + + assert_equal([em_word('::Foo.bar-baz')], parse('__::Foo.bar-baz__')) + assert_equal([em_word('#foo_bar=')], parse('__#foo_bar=__')) + assert_equal([em_word('#foo_bar!')], parse('__#foo_bar!__')) + assert_equal([em_word('#foo_bar?')], parse('__#foo_bar?__')) + + assert_equal([tt_node('::Foo.bar-baz')], parse('+::Foo.bar-baz+')) + assert_equal([tt_node('#foo_bar=')], parse('+#foo_bar=+')) + assert_equal([tt_node('#foo_bar!')], parse('+#foo_bar!+')) + assert_equal([tt_node('#foo_bar?')], parse('+#foo_bar?+')) + end + + def test_tt + assert_equal([tt_node()], parse('')) + assert_equal(['`a b`'], parse('`a b`')) + assert_equal(['x`a` `b`x'], parse('x`a` `b`x')) + assert_equal([tt_node('code')], parse('`code`')) + assert_equal([tt_node('code')], parse('+code+')) + assert_equal([tt_node('code')], parse('++code++')) + assert_equal([tt_node('code')], parse('``code``')) + assert_equal([tt_node('code(1 + 2)')], parse('code(1 + 2)')) + assert_equal([tt_node('code(1 + 2)')], parse('code(1 + 2)')) + + # Detect closing tag with escaping + assert_equal([tt_node('a
b\\')], parse('a\\b\\\\')) + assert_equal([tt_node('ab\\')], parse('a\\b\\\\
')) + + # Close with nearest non-escaped closing tag + assert_equal([tt_node('ab'), 'cd'], parse('a\\bcd')) + assert_equal([tt_node('ab'), 'cd'], parse('a\\bcd')) + end + + def test_strike + assert_equal([strike_node()], parse('')) + assert_equal([strike_node('strike ')], parse('strike ')) + assert_equal([strike_node('strike ')], parse('strike ')) + assert_equal([strike_node('a', bold_node('b'), strike_node('c'), 'd')], parse('abcd')) + end + + def test_hard_break + assert_equal([hard_break_node], parse('
')) + assert_equal(['a', hard_break_node, 'b'], parse('a
b')) + assert_equal([hard_break_node, hard_break_node], parse('

')) + assert_equal([em_node('a', hard_break_node, 'b'), hard_break_node, 'c'], parse('a
b

c')) + end + + def test_simplified_tidylink + # Empty url is not allowed + assert_equal(['label[]'], parse('label[]')) + assert_equal([tidylink_node(['label'], 'url')], parse('label[url]')) + assert_equal([tidylink_node(['label'], 'http://example.com/?q=+1+')], parse('label[http://example.com/?q=+1+]')) + end + + def test_tidylink + # Empty label is allowed, empty url is not allowed + assert_equal([tidylink_node([], 'url')], parse('{}[url]')) + assert_equal(['{label}[]'], parse('{label}[]')) + + assert_equal( + [tidylink_node(['label'], 'http://example.com/')], + parse('{label}[http://example.com/]') + ) + assert_equal( + [tidylink_node(['label'], 'brac[ke]]t\\'), '_esc[]aped'], + parse('{label}[brac\[ke\]\]t\\\\]_esc\[\]aped') + ) + assert_equal( + ['See ', tidylink_node(['this link'], 'http://example.com/'), ' for more info.'], + parse('See {this link}[http://example.com/] for more info.') + ) + assert_equal( + [tidylink_node(['Label with ', bold_word('bold'), ' text'], 'http://example.com/')], + parse('{Label with *bold* text}[http://example.com/]') + ) + assert_equal( + [bold_node('bold', tidylink_node(['link'], 'http://example.com/'))], + parse('bold{link}[http://example.com/]') + ) + assert_equal( + [tidylink_node(['link'], 'http://example.com/?q=+1+')], + parse('{link}[http://example.com/?q=+1+]') + ) + assert_equal( + [tidylink_node([tt_node('}[]{')], 'url')], + parse('{}[]{}[url]') + ) + # Non-tidylink braces and brackets inside tidylink label are allowed + assert_equal( + [tidylink_node(['[a]{b}{c}d'], 'url')], + parse('{[a]{b}{c}d}[url]') + ) + end + + def test_invalid_nested_tidylink + # Simplified tidylink invalidates open tidylinks + assert_equal( + [bold_node('{a ', tidylink_node(['b'], 'url'), '}[', bold_word('c'), ']')], + parse('{a b[url]}[*c*]') + ) + # Normal tidylink invalidates open tidylinks + assert_equal( + [bold_node('{a ', tidylink_node(['b'], 'url'), '}[', bold_word('c'), ']')], + parse('{a {b}[url]}[*c*]') + ) + # Tidylink invalidates all open tidylinks + assert_equal( + [bold_node('{label', em_node('{label{label', tidylink_node(['label'], 'url'), '}[a]}[b]'), '}[c]')], + parse('{label{label{label{label}[url]}[a]}[b]}[c]') + ) + # Valid tidylink inside invalidated tidylink + assert_equal( + [bold_node('{', tidylink_node(['label1'], 'url1'), ' ', tidylink_node(['label2'], 'url2'), '}[', bold_word('b'), ']')], + parse('{{label1}[url1] {label2}[url2]}[*b*]') + ) + # Invalidated tidylink accepts tag break through brackets + assert_equal( + ['{', tidylink_node(['label'], 'url'), '}[', bold_node(']')], + parse('{{label}[url]}[]') + ) + end + + def test_unclosed_error_case + # Treat as normal text + assert_equal(['*unclosed bold'], parse('*unclosed bold')) + assert_equal(['_unclosed em'], parse('_unclosed em')) + assert_equal(['`unclosed tt'], parse('`unclosed tt')) + assert_equal(['unclosed tag'], parse('unclosed tag')) + assert_equal(['unclosed code'], parse('unclosed code')) + assert_equal(['{unclosed tidylink'], parse('{unclosed tidylink')) + assert_equal(['{label}[url'], parse('{label}[url')) + assert_equal(['label[url'], parse('label[url')) + end + + def test_unknown_tag_as_normal_text + # Even if opening and closing tags are present, treat as normal text + assert_equal(['', strike_node('')], parse('')) + end + + def test_invalid_closing_error_case + # No opening tag, then treat it as normal text + assert_equal([bold_node('')], parse('')) + + # (strike closing) shouldn't close (also strike) + assert_equal([bold_node(strike_node(''))], parse('')) + + # Closing tag will close the last opened tag. Tag that has no corresponding open/close pair remains as normal text + assert_equal([em_node(''), ''], parse('')) + + # Tag that has corresponding open/close pair will be parsed normally + assert_equal([em_node('a', bold_node('b'), 'cd')], parse('abcd')) + + # Unclosed code tag content will be parsed as normal rdoc + assert_equal([em_node('', bold_node('b'))], parse('b')) + + # Tidylink closing brace will close the last opened tidylink + assert_equal([tidylink_node(['', bold_node('b')], 'url'), ''], parse('{b}[url]')) + + # Tag closing will invalidate tidylink + assert_equal([em_node('{a', bold_node('b')), 'c}[url]'], parse('{abc}[url]')) + + # Unclosed tidylink url will parsed as normal rdoc + assert_equal(['label[http://example.com/?q=', bold_node(), tt_node('1')], parse('label[http://example.com/?q=+1+')) + assert_equal(['{label}[http://example.com/?q=', bold_node(), tt_node('1')], parse('{label}[http://example.com/?q=+1+')) + + # Closing brace invalidates unclosed tags + assert_equal(['{', bold_node('foo'), '}}[bar]'], parse('{foo}}[bar]')) + end +end diff --git a/test/rdoc/markup/markup_test.rb b/test/rdoc/markup/markup_test.rb index 9945d83ef1..979d0a8c29 100644 --- a/test/rdoc/markup/markup_test.rb +++ b/test/rdoc/markup/markup_test.rb @@ -41,26 +41,6 @@ def test_convert assert_equal expected, out end - def test_convert_custom_markup - str = <<-STR -{stricken} - STR - - m = RDoc::Markup.new - m.add_word_pair '{', '}', :STRIKE - - tt = RDoc::Markup::ToTest.new nil, m - tt.add_tag :STRIKE, 'STRIKE ', ' STRIKE' - - out = m.convert str, tt - - expected = [ - "STRIKE stricken STRIKE", - ] - - assert_equal expected, out - end - def test_convert_document doc = RDoc::Markup::Parser.parse <<-STR now is diff --git a/test/rdoc/markup/to_ansi_test.rb b/test/rdoc/markup/to_ansi_test.rb index 19556a0f67..36a4a8bc4a 100644 --- a/test/rdoc/markup/to_ansi_test.rb +++ b/test/rdoc/markup/to_ansi_test.rb @@ -377,4 +377,10 @@ def test_convert_list_note assert_equal expected, @to.convert(note_list) end + def test_ansi_on_off + assert_equal "\e[1mab\e[m", @to.attributes('ab') + assert_equal "\e[1;4ma\e[0;9mb\e[m", @to.attributes('ab') + assert_equal "\e[1;4ma\e[24;9mb\e[m", @to.attributes('ab') + assert_equal "\e[1;4ma\e[22mb\e[m", @to.attributes('ab') + end end diff --git a/test/rdoc/markup/to_html_crossref_test.rb b/test/rdoc/markup/to_html_crossref_test.rb index cf61460297..ce69aa50db 100644 --- a/test/rdoc/markup/to_html_crossref_test.rb +++ b/test/rdoc/markup/to_html_crossref_test.rb @@ -243,10 +243,27 @@ def test_handle_regexp_CROSSREF_show_hash_false def test_handle_regexp_CROSSREF_with_arg_looks_like_TIDYLINK result = @to.convert 'C1.m[:sym]' - assert_equal para("
C1.m[:sym]"), result, + assert_equal para("C1.m[:sym]"), result, 'C1.m[:sym]' end + def test_suppress_link_inside_tidylink_label + result = @to.convert '{rdoc-ref:C1.m http://example.com C1}[url]' + assert_equal para('rdoc-ref:C1.m http://example.com C1'), result + end + + def test_crossref_disabled_in_word_pair + result = @to.convert 'C1 *C1* _C1_ C1' + crossref = 'C1' + assert_equal para("#{crossref} C1 C1 #{crossref}"), result + end + + def test_suppressed_crossref + result = @to.convert 'C1 \C1 \CC1' + crossref = 'C1' + assert_equal para("#{crossref} C1 CC1"), result + end + def test_handle_regexp_HYPERLINK_rdoc readme = @store.add_file 'README.txt' readme.parser = RDoc::Parser::Simple @@ -266,31 +283,31 @@ def test_handle_regexp_HYPERLINK_rdoc assert_equal 'README.txt', link end - def test_handle_regexp_TIDYLINK_rdoc + def test_handle_TIDYLINK_rdoc readme = @store.add_file 'README.txt' readme.parser = RDoc::Parser::Simple @to = RDoc::Markup::ToHtmlCrossref.new @options, 'C2.html', @c2 - link = @to.handle_regexp_TIDYLINK tidy 'C2::C3' + link = @to.to_html tidy 'C2::C3' assert_equal 'tidy', link - link = @to.handle_regexp_TIDYLINK tidy 'C4' + link = @to.to_html tidy 'C4' assert_equal 'tidy', link - link = @to.handle_regexp_TIDYLINK tidy 'C1#m' + link = @to.to_html tidy 'C1#m' assert_equal 'tidy', link - link = @to.handle_regexp_TIDYLINK tidy 'README.txt' + link = @to.to_html tidy 'README.txt' assert_equal 'tidy', link end def test_handle_regexp_TIDYLINK_label - link = @to.handle_regexp_TIDYLINK tidy 'C1#m@foo' + link = @to.to_html tidy 'C1#m@foo' assert_equal "tidy", link, 'C1#m@foo' @@ -341,19 +358,15 @@ def para(text) end def REGEXP_HANDLING(text) - @to.handle_regexp_CROSSREF regexp_handling text + @to.handle_regexp_CROSSREF text end def hyper(reference) - RDoc::Markup::RegexpHandling.new 0, "rdoc-ref:#{reference}" - end - - def regexp_handling(text) - RDoc::Markup::RegexpHandling.new 0, text + "rdoc-ref:#{reference}" end def tidy(reference) - RDoc::Markup::RegexpHandling.new 0, "{tidy}[rdoc-ref:#{reference}]" + "{tidy}[rdoc-ref:#{reference}]" end end diff --git a/test/rdoc/markup/to_html_snippet_test.rb b/test/rdoc/markup/to_html_snippet_test.rb index a9bc00eef3..ce9118a18c 100644 --- a/test/rdoc/markup/to_html_snippet_test.rb +++ b/test/rdoc/markup/to_html_snippet_test.rb @@ -651,7 +651,7 @@ def test_convert_TIDYLINK_rdoc_label end def test_handle_regexp_HYPERLINK_link - target = RDoc::Markup::RegexpHandling.new 0, 'link:README.txt' + target = 'link:README.txt' link = @to.handle_regexp_HYPERLINK target @@ -675,24 +675,6 @@ def test_list_verbatim_2 assert_equal 17, @to.characters end - def test_on_tags - on = RDoc::Markup::AttrChanger.new 2, 0 - - @to.on_tags [], on - - assert_equal 2, @to.mask - end - - def test_off_tags - on = RDoc::Markup::AttrChanger.new 2, 0 - off = RDoc::Markup::AttrChanger.new 0, 2 - - @to.on_tags [], on - @to.off_tags [], off - - assert_equal 0, @to.mask - end - def test_to_html assert_equal "

--\n", util_format("--") assert_equal 2, @to.characters diff --git a/test/rdoc/markup/to_html_test.rb b/test/rdoc/markup/to_html_test.rb index 3d2b010c4b..459bcb140e 100644 --- a/test/rdoc/markup/to_html_test.rb +++ b/test/rdoc/markup/to_html_test.rb @@ -907,7 +907,7 @@ def test_gen_url_rb_file end def test_handle_regexp_HYPERLINK_link - target = RDoc::Markup::RegexpHandling.new 0, 'link:README.txt' + target = 'link:README.txt' link = @to.handle_regexp_HYPERLINK target @@ -915,7 +915,7 @@ def test_handle_regexp_HYPERLINK_link end def test_handle_regexp_HYPERLINK_irc - target = RDoc::Markup::RegexpHandling.new 0, 'irc://irc.freenode.net/#ruby-lang' + target = 'irc://irc.freenode.net/#ruby-lang' link = @to.handle_regexp_HYPERLINK target @@ -998,7 +998,7 @@ def test_parseable_eh end def test_to_html - assert_equal "\n

--

\n", util_format("--") + assert_equal "\n

--\\\\

\n", util_format("--\\\\\\\\") end def util_format(text) @@ -1040,6 +1040,11 @@ def test_accept_table assert_include(res[%r<]*>C1>], 'C1') end + def test_suppressed_crossref_and_backslashes + result = @to.convert('\\1 \\n \\Ruby \\::new \\foo_bar \\') + assert_equal "\n

\\1 \\n Ruby ::new foo_bar \\

\n", result + end + def test_gen_url_markdown_anchor assert_equal 'link', @to.gen_url('#hello-world', 'link') end @@ -1049,6 +1054,29 @@ def test_convert_tidy_link_markdown_anchor assert_equal "\n

link

\n", result end + def test_convert_hyperlink_disabled_inside_tidylink + result = @to.convert '{See http://example.com}[README.txt] http://example.com' + assert_equal "\n

See http://example.com example.com

\n", result + end + + def test_convert_rdoc_image_inside_tidylink + result = @to.convert '{See rdoc-image:image.png:text}[url] rdoc-image:image.jpg:text' + assert_equal "\n

See \"text\" \"text\"

\n", result + + # When `label =~ regexp_handling == 0`, label is handled specially in RDoc::Markup::ToHTML#apply_tidylink_label_special_regexp_handling + result = @to.convert '{rdoc-image:image.png:text}[url] rdoc-image:image.jpg:text' + assert_equal "\n

\"text\" \"text\"

\n", result + end + + def test_convert_rdoc_label_disabled_inside_tidylink + result = @to.convert '{See rdoc-label:label}[url] rdoc-label:label' + assert_equal "\n

See rdoc-label:label label

\n", result + + # When `label =~ regexp_handling == 0`, label is handled specially in RDoc::Markup::ToHTML#apply_tidylink_label_special_regexp_handling + result = @to.convert '{rdoc-label:label}[url] rdoc-label:label' + assert_equal "\n

rdoc-label:label label

\n", result + end + def assert_escaped(unexpected, code) result = @to.convert(code) assert_not_include result, unexpected diff --git a/test/rdoc/markup/to_rdoc_test.rb b/test/rdoc/markup/to_rdoc_test.rb index 1ee6e0b26a..ea7326ec6f 100644 --- a/test/rdoc/markup/to_rdoc_test.rb +++ b/test/rdoc/markup/to_rdoc_test.rb @@ -2,6 +2,35 @@ require_relative '../helper' class RDocMarkupToRDocTest < RDoc::Markup::TextFormatterTestCase + class ToBoldEmTT < RDoc::Markup::ToRdoc + def handle_BOLD(nodes) + emit_inline('') + super + emit_inline('') + end + + def handle_EM(nodes) + emit_inline('') + super + emit_inline('') + end + + def handle_BOLD_WORD(word) + emit_inline('') + super + emit_inline('') + end + + def handle_EM_WORD(word) + emit_inline('') + super + emit_inline('') + end + + def handle_TT(text) + emit_inline("#{text}") + end + end add_visitor_tests add_text_tests @@ -9,7 +38,7 @@ class RDocMarkupToRDocTest < RDoc::Markup::TextFormatterTestCase def setup super - @to = RDoc::Markup::ToRdoc.new + @to = ToBoldEmTT.new end def accept_blank_line diff --git a/test/rdoc/rdoc_text_test.rb b/test/rdoc/rdoc_text_test.rb index b691afa0ca..03c4167ac5 100644 --- a/test/rdoc/rdoc_text_test.rb +++ b/test/rdoc/rdoc_text_test.rb @@ -493,7 +493,8 @@ def test_to_html_apostrophe_entity end def test_to_html_backslash - assert_equal 'S', to_html('\\S') + # Don't handle unescaped crossref. It should be handled in RDoc::Markup::ToHtml, not in RDoc::Text + assert_equal '\\S', to_html('\\S') end def test_to_html_br @@ -562,8 +563,9 @@ def test_to_html_registered_trademark end def test_to_html_tt_tag + # tt tag content is already escaped assert_equal 'hi\'s', to_html('hi\'s') - assert_equal 'hi\\\'s', to_html('hi\\\\\'s') + assert_equal 'hi\\\\\'s', to_html('hi\\\\\'s') end def test_to_html_tt_tag_mismatch