# File lib/sup/util.rb, line 316 def ascii out = "" each_byte do |b| if (b & 128) != 0 out << "\\x#{b.to_s 16}" else out << b.chr end end out.force_encoding Encoding::UTF_8 if out.respond_to? :force_encoding out end
# File lib/sup/util.rb, line 187 def camel_to_hyphy self.gsub(%r([a-z])([A-Z0-9])/, '\1-\2').downcase end
# File lib/sup/util.rb, line 307 def check begin fail "unexpected encoding #{encoding}" if respond_to?(:encoding) && !(encoding == Encoding::UTF_8 || encoding == Encoding::ASCII) fail "invalid encoding" if respond_to?(:valid_encoding?) && !valid_encoding? rescue raise CheckError.new($!.message) end end
nasty multibyte hack for ruby 1.8. if it's utf-8, split into chars using the utf8 regex and count those. otherwise, use the byte length.
# File lib/sup/util.rb, line 179 def display_length if RUBY_VERSION < '1.9.1' && ($encoding == "UTF-8" || $encoding == "utf8") scan(%r./).size else size end end
# File lib/sup/util.rb, line 294 def each &b each_line &b end
# File lib/sup/util.rb, line 191 def find_all_positions x ret = [] start = 0 while start < length pos = index x, start break if pos.nil? ret << pos start = pos + 1 end ret end
# File lib/sup/util.rb, line 283 def normalize_whitespace gsub(%r\t/, " ").gsub(%r\r/, "") end
# File lib/sup/util.rb, line 288 def ord self[0] end
a very complicated regex found on teh internets to split on commas, unless they occurr within double quotes.
# File lib/sup/util.rb, line 205 def split_on_commas split(%r,\s*(?=(?:[^"]*"[^"]*")*(?![^"]*"))/) end
ok, here we do it the hard way. got to have a remainder for purposes of tab-completing full email addresses
# File lib/sup/util.rb, line 211 def split_on_commas_with_remainder ret = [] state = :outstring pos = 0 region_start = 0 while pos <= length newpos = case state when :escaped_instring, :escaped_outstring then pos else index(%r[,"\\]/, pos) end if newpos char = self[newpos] else char = nil newpos = length end case char when "" state = case state when :outstring then :instring when :instring then :outstring when :escaped_instring then :instring when :escaped_outstring then :outstring end when ,,, nil state = case state when :outstring, :escaped_outstring then ret << self[region_start ... newpos].gsub(%r^\s+|\s+$/, "") region_start = newpos + 1 :outstring when :instring then :instring when :escaped_instring then :instring end when \\\ state = case state when :instring then :escaped_instring when :outstring then :escaped_outstring when :escaped_instring then :instring when :escaped_outstring then :outstring end end pos = newpos + 1 end remainder = case state when :instring self[region_start .. -1].gsub(%r^\s+/, "") else nil end [ret, remainder] end
takes a list of words, and returns an array of symbols. typically used in Sup for translating Ferret's representation of a list of labels (a string) to an array of label symbols.
split_on will be passed to String#split, so you can leave this nil for space.
# File lib/sup/util.rb, line 304 def to_set_of_symbols split_on=nil; Set.new split(split_on).map { |x| x.strip.intern } end
# File lib/sup/util.rb, line 329 def transcode src_encoding=$encoding Iconv.easy_decode $encoding, src_encoding, self end
# File lib/sup/util.rb, line 267 def wrap len ret = [] s = self while s.length > len cut = s[0 ... len].rindex(%r\s/) if cut ret << s[0 ... cut] s = s[(cut + 1) .. -1] else ret << s[0 ... len] s = s[len .. -1] end end ret << s end