Class Nokogiri::XML::SAX::ParserContext
In: lib/nokogiri/xml/sax/parser_context.rb
ext/nokogiri/xml_sax_parser.c
Parent: Object

include <html_sax_parser_context.h>

VALUE cNokogiriHtmlSaxParserContext ;

static void deallocate(xmlParserCtxtPtr ctxt) {

  NOKOGIRI_DEBUG_START(handler);

  ctxt->sax = NULL;

  htmlFreeParserCtxt(ctxt);

  NOKOGIRI_DEBUG_END(handler);

}

static VALUE parse_memory(VALUE klass, VALUE data, VALUE encoding) {

  if(NIL_P(data)) rb_raise(rb_eArgError, "data cannot be nil");
  if(!(int)RSTRING_LEN(data))
    rb_raise(rb_eRuntimeError, "data cannot be empty");

  htmlParserCtxtPtr ctxt = htmlCreateMemoryParserCtxt(
      StringValuePtr(data),
      (int)RSTRING_LEN(data)
  );

  if(RTEST(encoding)) {
    xmlCharEncoding enc = xmlParseCharEncoding(StringValuePtr(encoding));
    if(enc != XML_CHAR_ENCODING_ERROR) {
      xmlSwitchEncoding(ctxt, enc);
      if(ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
        rb_raise(rb_eRuntimeError, "Unsupported encoding %s",
            StringValuePtr(encoding));
      }
    }
  }

  return Data_Wrap_Struct(klass, NULL, deallocate, ctxt);

}

static VALUE parse_file(VALUE klass, VALUE filename, VALUE encoding) {

  htmlParserCtxtPtr ctxt = htmlCreateFileParserCtxt(
      StringValuePtr(filename),
      StringValuePtr(encoding)
  );
  return Data_Wrap_Struct(klass, NULL, deallocate, ctxt);

}

static VALUE parse_with(VALUE self, VALUE sax_handler) {

  if(!rb_obj_is_kind_of(sax_handler, cNokogiriXmlSaxParser))
    rb_raise(rb_eArgError, "argument must be a Nokogiri::XML::SAX::Parser");

  htmlParserCtxtPtr ctxt;
  Data_Get_Struct(self, htmlParserCtxt, ctxt);

  htmlSAXHandlerPtr sax;
  Data_Get_Struct(sax_handler, htmlSAXHandler, sax);

  // Free the sax handler since we'll assign our own
  if(ctxt->sax && ctxt->sax != (xmlSAXHandlerPtr)&xmlDefaultSAXHandler)
    xmlFree(ctxt->sax);

  ctxt->sax = sax;
  ctxt->userData = (void *)NOKOGIRI_SAX_TUPLE_NEW(ctxt, sax_handler);

  htmlParseDocument(ctxt);

  if(NULL != ctxt->myDoc) xmlFreeDoc(ctxt->myDoc);

  NOKOGIRI_SAX_TUPLE_DESTROY(ctxt->userData);
  return self;

}

void init_html_sax_parser_context() {

  VALUE nokogiri  = rb_define_module("Nokogiri");
  VALUE xml       = rb_define_module_under(nokogiri, "XML");
  VALUE html      = rb_define_module_under(nokogiri, "HTML");
  VALUE sax       = rb_define_module_under(xml, "SAX");
  VALUE hsax      = rb_define_module_under(html, "SAX");
  VALUE pc        = rb_define_class_under(sax, "ParserContext", rb_cObject);
  VALUE klass     = rb_define_class_under(hsax, "ParserContext", pc);

  cNokogiriHtmlSaxParserContext = klass;

  rb_define_singleton_method(klass, "memory", parse_memory, 2);
  rb_define_singleton_method(klass, "file", parse_file, 2);

  rb_define_method(klass, "parse_with", parse_with, 1);

}

Methods

Public Class methods

Parse file given filename

Parse io object with encoding

Parse the XML stored in memory in data

Public Instance methods

Use sax_handler and parse the current document

Should this parser replace entities? &amp; will get converted to ’&’ if set to true

Should this parser replace entities? &amp; will get converted to ’&’ if set to true

[Validate]