Class | CodeRay::Scanners::HTML |
In: |
lib/coderay/scanners/html.rb
|
Parent: | Scanner |
ATTR_NAME | = | /[\w.:-]+/ |
ATTR_VALUE_UNQUOTED | = | ATTR_NAME |
TAG_END | = | /\/?>/ |
HEX | = | /[0-9a-fA-F]/ |
ENTITY | = | / & (?: \w+ | \# (?: \d+ | x#{HEX}+ ) ) ; /ox |
PLAIN_STRING_CONTENT | = | { "'" => /[^&'>\n]+/, '"' => /[^&">\n]+/, } |
# File lib/coderay/scanners/html.rb, line 45 45: def scan_tokens tokens, options 46: 47: state = @state 48: plain_string_content = @plain_string_content 49: 50: until eos? 51: 52: kind = nil 53: match = nil 54: 55: if scan(/\s+/m) 56: kind = :space 57: 58: else 59: 60: case state 61: 62: when :initial 63: if scan(/<!--.*?-->/m) 64: kind = :comment 65: elsif scan(/<!DOCTYPE.*?>/m) 66: kind = :doctype 67: elsif scan(/<\?xml.*?\?>/m) 68: kind = :preprocessor 69: elsif scan(/<\?.*?\?>|<%.*?%>/m) 70: kind = :comment 71: elsif scan(/<\/[-\w_.:]*>/m) 72: kind = :tag 73: elsif match = scan(/<[-\w_.:]+>?/m) 74: kind = :tag 75: state = :attribute unless match[-1] == ?> 76: elsif scan(/[^<>&]+/) 77: kind = :plain 78: elsif scan(/#{ENTITY}/ox) 79: kind = :entity 80: elsif scan(/[<>&]/) 81: kind = :error 82: else 83: raise_inspect '[BUG] else-case reached with state %p' % [state], tokens 84: end 85: 86: when :attribute 87: if scan(/#{TAG_END}/) 88: kind = :tag 89: state = :initial 90: elsif scan(/#{ATTR_NAME}/o) 91: kind = :attribute_name 92: state = :attribute_equal 93: else 94: kind = :error 95: getch 96: end 97: 98: when :attribute_equal 99: if scan(/=/) 100: kind = :operator 101: state = :attribute_value 102: elsif scan(/#{ATTR_NAME}/o) 103: kind = :attribute_name 104: elsif scan(/#{TAG_END}/o) 105: kind = :tag 106: state = :initial 107: elsif scan(/./) 108: kind = :error 109: state = :attribute 110: end 111: 112: when :attribute_value 113: if scan(/#{ATTR_VALUE_UNQUOTED}/o) 114: kind = :attribute_value 115: state = :attribute 116: elsif match = scan(/["']/) 117: tokens << [:open, :string] 118: state = :attribute_value_string 119: plain_string_content = PLAIN_STRING_CONTENT[match] 120: kind = :delimiter 121: elsif scan(/#{TAG_END}/o) 122: kind = :tag 123: state = :initial 124: else 125: kind = :error 126: getch 127: end 128: 129: when :attribute_value_string 130: if scan(plain_string_content) 131: kind = :content 132: elsif scan(/['"]/) 133: tokens << [matched, :delimiter] 134: tokens << [:close, :string] 135: state = :attribute 136: next 137: elsif scan(/#{ENTITY}/ox) 138: kind = :entity 139: elsif scan(/&/) 140: kind = :content 141: elsif scan(/[\n>]/) 142: tokens << [:close, :string] 143: kind = :error 144: state = :initial 145: end 146: 147: else 148: raise_inspect 'Unknown state: %p' % [state], tokens 149: 150: end 151: 152: end 153: 154: match ||= matched 155: if $DEBUG and not kind 156: raise_inspect 'Error token %p in line %d' % 157: [[match, kind], line], tokens, state 158: end 159: raise_inspect 'Empty token', tokens unless match 160: 161: tokens << [match, kind] 162: end 163: 164: if options[:keep_state] 165: @state = state 166: @plain_string_content = plain_string_content 167: end 168: 169: tokens 170: end