Skip to content

Commit 3d90c6d

Browse files
authored
Merge pull request #2169 from sparklemotion/2168-active-support-test-failure
fix(cruby): SAX and Push parser error handling in the presence of foreign handlers
2 parents ee69772 + bbf850c commit 3d90c6d

16 files changed

Lines changed: 129 additions & 11 deletions

CHANGELOG.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,13 @@ Nokogiri follows [Semantic Versioning](https://semver.org/), please see the [REA
44

55
---
66

7+
## v1.11.1 / unreleased
8+
9+
### Fixed
10+
11+
* [CRuby] If `libxml-ruby` is loaded before `nokogiri`, the SAX and Push parsers no longer call `libxml-ruby`'s handlers. Instead, they defensively override the libxml2 global handler before parsing. [[#2168](https://github.com/sparklemotion/nokogiri/issues/2168)]
12+
13+
714
## v1.11.0 / 2021-01-03
815

916
### Notes

ext/nokogiri/html_sax_parser_context.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,8 @@ parse_with(VALUE self, VALUE sax_handler)
9292
ctxt->sax = sax;
9393
ctxt->userData = (void *)NOKOGIRI_SAX_TUPLE_NEW(ctxt, sax_handler);
9494

95+
xmlSetStructuredErrorFunc(NULL, NULL);
96+
9597
rb_ensure(parse_doc, (VALUE)ctxt, parse_doc_finalize, (VALUE)ctxt);
9698

9799
return self;

ext/nokogiri/html_sax_push_parser.c

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,10 @@
99
static VALUE native_write(VALUE self, VALUE _chunk, VALUE _last_chunk)
1010
{
1111
xmlParserCtxtPtr ctx;
12-
const char * chunk = NULL;
13-
int size = 0;
14-
12+
const char * chunk = NULL;
13+
int size = 0;
14+
int status = 0;
15+
libxmlStructuredErrorHandlerState handler_state;
1516

1617
Data_Get_Struct(self, xmlParserCtxt, ctx);
1718

@@ -20,11 +21,16 @@ static VALUE native_write(VALUE self, VALUE _chunk, VALUE _last_chunk)
2021
size = (int)RSTRING_LEN(_chunk);
2122
}
2223

23-
if(htmlParseChunk(ctx, chunk, size, Qtrue == _last_chunk ? 1 : 0)) {
24-
if (!(ctx->options & XML_PARSE_RECOVER)) {
25-
xmlErrorPtr e = xmlCtxtGetLastError(ctx);
26-
Nokogiri_error_raise(NULL, e);
27-
}
24+
Nokogiri_structured_error_func_save_and_set(&handler_state, NULL, NULL);
25+
26+
status = htmlParseChunk(ctx, chunk, size, Qtrue == _last_chunk ? 1 : 0);
27+
28+
Nokogiri_structured_error_func_restore(&handler_state);
29+
30+
if ((status != 0) && !(ctx->options & XML_PARSE_RECOVER)) {
31+
// TODO: there appear to be no tests for this block
32+
xmlErrorPtr e = xmlCtxtGetLastError(ctx);
33+
Nokogiri_error_raise(NULL, e);
2834
}
2935

3036
return self;

ext/nokogiri/nokogiri.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
#include <nokogiri.h>
22

3+
void init_test_global_handlers(); /* in lieu of test_global_handlers.h */
4+
35
VALUE mNokogiri ;
46
VALUE mNokogiriXml ;
57
VALUE mNokogiriHtml ;
@@ -132,4 +134,5 @@ void Init_nokogiri()
132134
init_xml_relax_ng();
133135
init_nokogiri_io();
134136
init_xml_encoding_handler();
137+
init_test_global_handlers();
135138
}
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
#include <nokogiri.h>
2+
#include "libxml/xmlerror.h"
3+
4+
static VALUE foreign_error_handler_block = Qnil;
5+
6+
static void foreign_error_handler(void* user_data, xmlErrorPtr c_error)
7+
{
8+
rb_funcall(foreign_error_handler_block, rb_intern("call"), 0);
9+
}
10+
11+
/*
12+
* call-seq:
13+
* __foreign_error_handler { ... } -> nil
14+
*
15+
* Override libxml2's global error handlers to call the block. This method thus has very little
16+
* value except to test that Nokogiri is properly setting error handlers elsewhere in the code. See
17+
* test/helper.rb for how this is being used.
18+
*/
19+
static VALUE
20+
rb_foreign_error_handler(VALUE klass)
21+
{
22+
rb_need_block();
23+
foreign_error_handler_block = rb_block_proc();
24+
xmlSetStructuredErrorFunc(NULL, foreign_error_handler);
25+
return Qnil;
26+
}
27+
28+
/*
29+
* Document-module: Nokogiri::Test
30+
*
31+
* The Nokogiri::Test module should only be used for testing Nokogiri.
32+
* Do NOT use this outside of the Nokogiri test suite.
33+
*/
34+
void
35+
init_test_global_handlers()
36+
{
37+
VALUE mNokogiri = rb_define_module("Nokogiri");
38+
VALUE mNokogiriTest = rb_define_module_under(mNokogiri, "Test");
39+
40+
rb_define_singleton_method(mNokogiriTest, "__foreign_error_handler", rb_foreign_error_handler, 0);
41+
}

ext/nokogiri/xml_sax_parser_context.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,8 @@ parse_with(VALUE self, VALUE sax_handler)
120120
ctxt->sax = sax;
121121
ctxt->userData = (void *)NOKOGIRI_SAX_TUPLE_NEW(ctxt, sax_handler);
122122

123+
xmlSetStructuredErrorFunc(NULL, NULL);
124+
123125
rb_ensure(parse_doc, (VALUE)ctxt, parse_doc_finalize, (VALUE)ctxt);
124126

125127
return Qnil;

ext/nokogiri/xml_sax_push_parser.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,8 @@ static VALUE native_write(VALUE self, VALUE _chunk, VALUE _last_chunk)
3535
size = (int)RSTRING_LEN(_chunk);
3636
}
3737

38+
xmlSetStructuredErrorFunc(NULL, NULL);
39+
3840
if (xmlParseChunk(ctx, chunk, size, Qtrue == _last_chunk ? 1 : 0)) {
3941
if (!(ctx->options & XML_PARSE_RECOVER)) {
4042
xmlErrorPtr e = xmlCtxtGetLastError(ctx);

ext/nokogiri/xml_syntax_error.c

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,28 @@
11
#include <xml_syntax_error.h>
22

3+
void
4+
Nokogiri_structured_error_func_save(libxmlStructuredErrorHandlerState *handler_state)
5+
{
6+
/* this method is tightly coupled to the implementation of xmlSetStructuredErrorFunc */
7+
handler_state->user_data = xmlStructuredErrorContext;
8+
handler_state->handler = xmlStructuredError;
9+
}
10+
11+
void
12+
Nokogiri_structured_error_func_save_and_set(libxmlStructuredErrorHandlerState *handler_state,
13+
void *user_data,
14+
xmlStructuredErrorFunc handler)
15+
{
16+
Nokogiri_structured_error_func_save(handler_state);
17+
xmlSetStructuredErrorFunc(user_data, handler);
18+
}
19+
20+
void
21+
Nokogiri_structured_error_func_restore(libxmlStructuredErrorHandlerState *handler_state)
22+
{
23+
xmlSetStructuredErrorFunc(handler_state->user_data, handler_state->handler);
24+
}
25+
326
void Nokogiri_error_array_pusher(void * ctx, xmlErrorPtr error)
427
{
528
VALUE list = (VALUE)ctx;

ext/nokogiri/xml_syntax_error.h

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,23 @@
33

44
#include <nokogiri.h>
55

6+
typedef struct _libxmlStructuredErrorHandlerState {
7+
void *user_data;
8+
xmlStructuredErrorFunc handler;
9+
} libxmlStructuredErrorHandlerState ;
10+
611
void init_xml_syntax_error();
12+
13+
void Nokogiri_structured_error_func_save(libxmlStructuredErrorHandlerState *handler_state);
14+
void Nokogiri_structured_error_func_save_and_set(libxmlStructuredErrorHandlerState *handler_state,
15+
void *user_data,
16+
xmlStructuredErrorFunc handler);
17+
void Nokogiri_structured_error_func_restore(libxmlStructuredErrorHandlerState *handler_state);
18+
719
VALUE Nokogiri_wrap_xml_syntax_error(xmlErrorPtr error);
8-
void Nokogiri_error_array_pusher(void * ctx, xmlErrorPtr error);
9-
NORETURN(void Nokogiri_error_raise(void * ctx, xmlErrorPtr error));
20+
void Nokogiri_error_array_pusher(void *ctx, xmlErrorPtr error);
21+
NORETURN(void Nokogiri_error_raise(void *ctx, xmlErrorPtr error));
1022

1123
extern VALUE cNokogiriXmlSyntaxError;
12-
#endif
1324

25+
#endif /* NOKOGIRI_XML_SYNTAX_ERROR */

test/helper.rb

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,16 @@ class TestCase < MiniTest::Spec
5858
XSLT_FILE = File.join(ASSETS_DIR, 'staff.xslt')
5959
XPATH_FILE = File.join(ASSETS_DIR, 'slow-xpath.xml')
6060

61+
def setup
62+
@fake_error_handler_called = false
63+
Nokogiri::Test.__foreign_error_handler do
64+
@fake_error_handler_called = true
65+
end if Nokogiri.uses_libxml?
66+
end
67+
6168
def teardown
69+
refute(@fake_error_handler_called, "the fake error handler should never get called") if Nokogiri.uses_libxml?
70+
6271
if ENV['NOKOGIRI_GC']
6372
STDOUT.putc '!'
6473
if RUBY_PLATFORM =~ /java/
@@ -135,6 +144,11 @@ class Doc < XML::SAX::Document
135144
attr_reader :xmldecls
136145
attr_reader :processing_instructions
137146

147+
def initialize
148+
@errors = []
149+
super
150+
end
151+
138152
def xmldecl version, encoding, standalone
139153
@xmldecls = [version, encoding, standalone].compact
140154
super

0 commit comments

Comments
 (0)