forked from unixpickle/ANHTML
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathANHTMLDocument.m
More file actions
161 lines (133 loc) · 4.67 KB
/
ANHTMLDocument.m
File metadata and controls
161 lines (133 loc) · 4.67 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
//
// ANHTMLDocument.m
// ANHTML
//
// Created by Alex Nichol on 11/18/11.
// Copyright (c) 2011 __MyCompanyName__. All rights reserved.
//
#import "ANHTMLDocument.h"
@interface ANHTMLDocument (Private)
- (void)beginParsing:(ANHTMLDocumentParseFlags)flags;
- (void)closeCurrentElement;
- (void)pushCurrentElement:(ANHTMLElement *)anElement;
- (BOOL)isAutocloseTag:(NSString *)tag circumstance:(ANHTMLAutocloseStrictness)strictness;
@end
@implementation ANHTMLDocument
- (id)initWithDocumentData:(NSData *)documentData {
self = [self initWithDocumentData:documentData flags:ANHTMLDocumentParseAutocloseTags];
return self;
}
- (id)initWithDocumentData:(NSData *)documentData flags:(ANHTMLDocumentParseFlags)flags {
if ((self = [super init])) {
parser = [[ANHTMLParser alloc] initWithDocumentData:documentData];
if (!parser) return nil;
[self beginParsing:flags];
}
return self;
}
- (id)initWithDocumentString:(NSString *)documentString flags:(ANHTMLDocumentParseFlags)flags {
if ((self = [super init])) {
parser = [[ANHTMLParser alloc] initWithDocumentString:documentString];
if (!parser) return nil;
[self beginParsing:flags];
}
return self;
}
- (ANHTMLElement *)rootElement {
return rootElement;
}
#pragma mark - Parsing -
- (void)beginParsing:(ANHTMLDocumentParseFlags)flags {
parseFlags = flags;
rootElement = [[ANHTMLElement alloc] initWithElementName:@"" attributes:[[ANHTMLAttributes alloc] init]];
currentElement = rootElement;
[parser setDelegate:self];
[parser parse];
// We only have a nameless root element for cases where the document
// does not have a single element as it's root. If it does however have
// one root element, we can substitute our nameless root element with the
// singleton sub-element.
if ([[rootElement children] count] == 1) {
ANHTMLNode * node = [[rootElement children] lastObject];
if ([node isKindOfClass:[ANHTMLElement class]]) {
ANHTMLElement * element = (ANHTMLElement *)node;
element.parentNode = nil;
rootElement = element;
}
}
}
#pragma mark ANHTMLParserDelegate
- (void)htmlParser:(ANHTMLParser *)parser didStartElement:(NSString *)name attributes:(NSDictionary *)attributes {
if (currentElement.parentNode) {
// we might want to close this
if ([self isAutocloseTag:currentElement.elementName circumstance:ANHTMLAutocloseStrictnessChild]) {
[self closeCurrentElement];
}
}
ANHTMLAttributes * attributeObj = [[ANHTMLAttributes alloc] initWithAttributeDict:attributes];
ANHTMLElement * element = [[ANHTMLElement alloc] initWithElementName:name attributes:attributeObj];
[self pushCurrentElement:element];
if ([self isAutocloseTag:name circumstance:ANHTMLAutocloseStrictnessImmediately]) {
[self closeCurrentElement];
}
}
- (void)htmlParser:(ANHTMLParser *)parser didEndElement:(NSString *)name {
// find the matching open element, close to that.
// ideally, this would be currentElement.parentNode
if ([self isAutocloseTag:name circumstance:ANHTMLAutocloseStrictnessAny]) {
return;
}
ANHTMLElement * jumpToClose = currentElement;
while (![jumpToClose compareName:name]) {
jumpToClose = jumpToClose.parentNode;
if (!jumpToClose) return;
}
currentElement = jumpToClose.parentNode;
}
- (void)htmlParser:(ANHTMLParser *)parser didEncounterPlainText:(NSString *)text {
ANHTMLTextNode * textNode = [[ANHTMLTextNode alloc] initWithNodeText:text];
textNode.parentNode = currentElement;
[[currentElement children] addObject:textNode];
}
- (unichar)htmlParser:(ANHTMLParser *)parser characterForEscapeCode:(NSString *)code {
if ([code isEqualToString:@"nbsp"]) {
return ' ';
}
return 0;
}
#pragma mark Element Tree
- (void)closeCurrentElement {
if (currentElement.parentNode) {
currentElement = (ANHTMLElement *)currentElement.parentNode;
}
}
- (void)pushCurrentElement:(ANHTMLElement *)anElement {
anElement.parentNode = currentElement;
[[currentElement children] addObject:anElement];
currentElement = anElement;
}
#pragma mark Autoclosing
- (BOOL)isAutocloseTag:(NSString *)tag circumstance:(ANHTMLAutocloseStrictness)strictness {
const struct {
__unsafe_unretained NSString * name;
ANHTMLAutocloseStrictness strictness;
} closeTags[] = {
{@"br", ANHTMLAutocloseStrictnessImmediately},
{@"hr", ANHTMLAutocloseStrictnessImmediately},
{@"meta", ANHTMLAutocloseStrictnessImmediately},
{@"img", ANHTMLAutocloseStrictnessImmediately},
{@"link", ANHTMLAutocloseStrictnessChild}
};
const NSUInteger numCloseTags = 5;
if ((parseFlags & ANHTMLDocumentParseAutocloseTags) != 0) {
for (NSUInteger i = 0; i < numCloseTags; i++) {
if ([closeTags[i].name caseInsensitiveCompare:tag] == NSOrderedSame) {
if (closeTags[i].strictness <= strictness) {
return YES;
}
}
}
}
return NO;
}
@end