added old data

This commit is contained in:
TinyAtoms
2020-08-01 19:26:11 -03:00
commit 276ef453dc
2923 changed files with 307078 additions and 0 deletions

2
project3/node_modules/htmlparser2/.gitattributes generated vendored Normal file
View File

@@ -0,0 +1,2 @@
# Auto detect text files and perform LF normalization
* text eol=lf

30
project3/node_modules/htmlparser2/.jscsrc generated vendored Normal file
View File

@@ -0,0 +1,30 @@
{
"requireCurlyBraces": ["do", "switch", "return", "try", "catch"],
"requireSpaceBeforeBinaryOperators": ["+", "-", "/", "*", "=", "==", "===", "!=", "!==", ">", "<", ">=", "<="],
"requireSpaceAfterBinaryOperators": ["+", "-", "/", "*", "=", "==", "===", "!=", "!==", ">", "<", ">=", "<="],
"requireSpaceAfterKeywords": ["else", "do", "switch", "return", "try"],
"disallowSpaceAfterKeywords": ["if", "catch", "for", "while"],
"disallowSpacesInFunctionExpression": { "beforeOpeningCurlyBrace": true },
"requireCapitalizedConstructors": true,
"requireCommaBeforeLineBreak": true,
"requireDotNotation": true,
"requireParenthesesAroundIIFE": true,
"disallowEmptyBlocks": true,
"disallowSpaceAfterPrefixUnaryOperators": ["!"],
"disallowSpaceBeforeBinaryOperators": [","],
"disallowSpaceAfterPrefixUnaryOperators": ["++", "--", "+", "-", "~", "!"],
"disallowSpaceBeforePostfixUnaryOperators": ["++", "--"],
"disallowKeywords": ["with"],
"disallowMultipleLineStrings": true,
"disallowTrailingWhitespace": true,
"validateIndentation": "\t",
"validateLineBreaks": "LF",
"validateQuoteMarks": "\"",
"safeContextKeyword": "_this"
}

8
project3/node_modules/htmlparser2/.travis.yml generated vendored Normal file
View File

@@ -0,0 +1,8 @@
language: node_js
node_js:
- 0.10
- 0.11
sudo: false
script: npm run coveralls

18
project3/node_modules/htmlparser2/LICENSE generated vendored Normal file
View File

@@ -0,0 +1,18 @@
Copyright 2010, 2011, Chris Winberry <chris@winberry.net>. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
IN THE SOFTWARE.

91
project3/node_modules/htmlparser2/README.md generated vendored Normal file
View File

@@ -0,0 +1,91 @@
# htmlparser2
[![NPM version](http://img.shields.io/npm/v/htmlparser2.svg?style=flat)](https://npmjs.org/package/htmlparser2)
[![Downloads](https://img.shields.io/npm/dm/htmlparser2.svg?style=flat)](https://npmjs.org/package/htmlparser2)
[![Build Status](http://img.shields.io/travis/fb55/htmlparser2/master.svg?style=flat)](http://travis-ci.org/fb55/htmlparser2)
[![Coverage](http://img.shields.io/coveralls/fb55/htmlparser2.svg?style=flat)](https://coveralls.io/r/fb55/htmlparser2)
A forgiving HTML/XML/RSS parser. The parser can handle streams and provides a callback interface.
## Installation
npm install htmlparser2
A live demo of htmlparser2 is available [here](http://demos.forbeslindesay.co.uk/htmlparser2/).
## Usage
```javascript
var htmlparser = require("htmlparser2");
var parser = new htmlparser.Parser({
onopentag: function(name, attribs){
if(name === "script" && attribs.type === "text/javascript"){
console.log("JS! Hooray!");
}
},
ontext: function(text){
console.log("-->", text);
},
onclosetag: function(tagname){
if(tagname === "script"){
console.log("That's it?!");
}
}
}, {decodeEntities: true});
parser.write("Xyz <script type='text/javascript'>var foo = '<<bar>>';</ script>");
parser.end();
```
Output (simplified):
```javascript
--> Xyz
JS! Hooray!
--> var foo = '<<bar>>';
That's it?!
```
## Documentation
Read more about the parser and its options in the [wiki](https://github.com/fb55/htmlparser2/wiki/Parser-options).
## Get a DOM
The `DomHandler` (known as `DefaultHandler` in the original `htmlparser` module) produces a DOM (document object model) that can be manipulated using the [`DomUtils`](https://github.com/fb55/DomUtils) helper.
The `DomHandler`, while still bundled with this module, was moved to its [own module](https://github.com/fb55/domhandler). Have a look at it for further information.
## Parsing RSS/RDF/Atom Feeds
```javascript
new htmlparser.FeedHandler(function(<error> error, <object> feed){
...
});
```
Note: While the provided feed handler works for most feeds, you might want to use [danmactough/node-feedparser](https://github.com/danmactough/node-feedparser), which is much better tested and actively maintained.
## Performance
After having some artificial benchmarks for some time, __@AndreasMadsen__ published his [`htmlparser-benchmark`](https://github.com/AndreasMadsen/htmlparser-benchmark), which benchmarks HTML parses based on real-world websites.
At the time of writing, the latest versions of all supported parsers show the following performance characteristics on [Travis CI](https://travis-ci.org/AndreasMadsen/htmlparser-benchmark/builds/10805007) (please note that Travis doesn't guarantee equal conditions for all tests):
```
gumbo-parser : 34.9208 ms/file ± 21.4238
html-parser : 24.8224 ms/file ± 15.8703
html5 : 419.597 ms/file ± 264.265
htmlparser : 60.0722 ms/file ± 384.844
htmlparser2-dom: 12.0749 ms/file ± 6.49474
htmlparser2 : 7.49130 ms/file ± 5.74368
hubbub : 30.4980 ms/file ± 16.4682
libxmljs : 14.1338 ms/file ± 18.6541
parse5 : 22.0439 ms/file ± 15.3743
sax : 49.6513 ms/file ± 26.6032
```
## How does this module differ from [node-htmlparser](https://github.com/tautologistics/node-htmlparser)?
This is a fork of the `htmlparser` module. The main difference is that this is intended to be used only with node (it runs on other platforms using [browserify](https://github.com/substack/node-browserify)). `htmlparser2` was rewritten multiple times and, while it maintains an API that's compatible with `htmlparser` in most cases, the projects don't share any code anymore.
The parser now provides a callback interface close to [sax.js](https://github.com/isaacs/sax-js) (originally targeted at [readabilitySAX](https://github.com/fb55/readabilitysax)). As a result, old handlers won't work anymore.
The `DefaultHandler` and the `RssHandler` were renamed to clarify their purpose (to `DomHandler` and `FeedHandler`). The old names are still available when requiring `htmlparser2`, your code should work as expected.

View File

@@ -0,0 +1,55 @@
module.exports = CollectingHandler;
function CollectingHandler(cbs){
this._cbs = cbs || {};
this.events = [];
}
var EVENTS = require("./").EVENTS;
Object.keys(EVENTS).forEach(function(name){
if(EVENTS[name] === 0){
name = "on" + name;
CollectingHandler.prototype[name] = function(){
this.events.push([name]);
if(this._cbs[name]) this._cbs[name]();
};
} else if(EVENTS[name] === 1){
name = "on" + name;
CollectingHandler.prototype[name] = function(a){
this.events.push([name, a]);
if(this._cbs[name]) this._cbs[name](a);
};
} else if(EVENTS[name] === 2){
name = "on" + name;
CollectingHandler.prototype[name] = function(a, b){
this.events.push([name, a, b]);
if(this._cbs[name]) this._cbs[name](a, b);
};
} else {
throw Error("wrong number of arguments");
}
});
CollectingHandler.prototype.onreset = function(){
this.events = [];
if(this._cbs.onreset) this._cbs.onreset();
};
CollectingHandler.prototype.restart = function(){
if(this._cbs.onreset) this._cbs.onreset();
for(var i = 0, len = this.events.length; i < len; i++){
if(this._cbs[this.events[i][0]]){
var num = this.events[i].length;
if(num === 1){
this._cbs[this.events[i][0]]();
} else if(num === 2){
this._cbs[this.events[i][0]](this.events[i][1]);
} else {
this._cbs[this.events[i][0]](this.events[i][1], this.events[i][2]);
}
}
}
};

95
project3/node_modules/htmlparser2/lib/FeedHandler.js generated vendored Normal file
View File

@@ -0,0 +1,95 @@
var index = require("./index.js"),
DomHandler = index.DomHandler,
DomUtils = index.DomUtils;
//TODO: make this a streamable handler
function FeedHandler(callback, options){
this.init(callback, options);
}
require("util").inherits(FeedHandler, DomHandler);
FeedHandler.prototype.init = DomHandler;
function getElements(what, where){
return DomUtils.getElementsByTagName(what, where, true);
}
function getOneElement(what, where){
return DomUtils.getElementsByTagName(what, where, true, 1)[0];
}
function fetch(what, where, recurse){
return DomUtils.getText(
DomUtils.getElementsByTagName(what, where, recurse, 1)
).trim();
}
function addConditionally(obj, prop, what, where, recurse){
var tmp = fetch(what, where, recurse);
if(tmp) obj[prop] = tmp;
}
var isValidFeed = function(value){
return value === "rss" || value === "feed" || value === "rdf:RDF";
};
FeedHandler.prototype.onend = function(){
var feed = {},
feedRoot = getOneElement(isValidFeed, this.dom),
tmp, childs;
if(feedRoot){
if(feedRoot.name === "feed"){
childs = feedRoot.children;
feed.type = "atom";
addConditionally(feed, "id", "id", childs);
addConditionally(feed, "title", "title", childs);
if((tmp = getOneElement("link", childs)) && (tmp = tmp.attribs) && (tmp = tmp.href)) feed.link = tmp;
addConditionally(feed, "description", "subtitle", childs);
if((tmp = fetch("updated", childs))) feed.updated = new Date(tmp);
addConditionally(feed, "author", "email", childs, true);
feed.items = getElements("entry", childs).map(function(item){
var entry = {}, tmp;
item = item.children;
addConditionally(entry, "id", "id", item);
addConditionally(entry, "title", "title", item);
if((tmp = getOneElement("link", item)) && (tmp = tmp.attribs) && (tmp = tmp.href)) entry.link = tmp;
if((tmp = fetch("summary", item) || fetch("content", item))) entry.description = tmp;
if((tmp = fetch("updated", item))) entry.pubDate = new Date(tmp);
return entry;
});
} else {
childs = getOneElement("channel", feedRoot.children).children;
feed.type = feedRoot.name.substr(0, 3);
feed.id = "";
addConditionally(feed, "title", "title", childs);
addConditionally(feed, "link", "link", childs);
addConditionally(feed, "description", "description", childs);
if((tmp = fetch("lastBuildDate", childs))) feed.updated = new Date(tmp);
addConditionally(feed, "author", "managingEditor", childs, true);
feed.items = getElements("item", feedRoot.children).map(function(item){
var entry = {}, tmp;
item = item.children;
addConditionally(entry, "id", "guid", item);
addConditionally(entry, "title", "title", item);
addConditionally(entry, "link", "link", item);
addConditionally(entry, "description", "description", item);
if((tmp = fetch("pubDate", item))) entry.pubDate = new Date(tmp);
return entry;
});
}
}
this.dom = feed;
DomHandler.prototype._handleCallback.call(
this, feedRoot ? null : Error("couldn't find root of feed")
);
};
module.exports = FeedHandler;

350
project3/node_modules/htmlparser2/lib/Parser.js generated vendored Normal file
View File

@@ -0,0 +1,350 @@
var Tokenizer = require("./Tokenizer.js");
/*
Options:
xmlMode: Disables the special behavior for script/style tags (false by default)
lowerCaseAttributeNames: call .toLowerCase for each attribute name (true if xmlMode is `false`)
lowerCaseTags: call .toLowerCase for each tag name (true if xmlMode is `false`)
*/
/*
Callbacks:
oncdataend,
oncdatastart,
onclosetag,
oncomment,
oncommentend,
onerror,
onopentag,
onprocessinginstruction,
onreset,
ontext
*/
var formTags = {
input: true,
option: true,
optgroup: true,
select: true,
button: true,
datalist: true,
textarea: true
};
var openImpliesClose = {
tr : { tr:true, th:true, td:true },
th : { th:true },
td : { thead:true, th:true, td:true },
body : { head:true, link:true, script:true },
li : { li:true },
p : { p:true },
h1 : { p:true },
h2 : { p:true },
h3 : { p:true },
h4 : { p:true },
h5 : { p:true },
h6 : { p:true },
select : formTags,
input : formTags,
output : formTags,
button : formTags,
datalist: formTags,
textarea: formTags,
option : { option:true },
optgroup: { optgroup:true }
};
var voidElements = {
__proto__: null,
area: true,
base: true,
basefont: true,
br: true,
col: true,
command: true,
embed: true,
frame: true,
hr: true,
img: true,
input: true,
isindex: true,
keygen: true,
link: true,
meta: true,
param: true,
source: true,
track: true,
wbr: true,
//common self closing svg elements
path: true,
circle: true,
ellipse: true,
line: true,
rect: true,
use: true,
stop: true,
polyline: true,
polygon: true
};
var re_nameEnd = /\s|\//;
function Parser(cbs, options){
this._options = options || {};
this._cbs = cbs || {};
this._tagname = "";
this._attribname = "";
this._attribvalue = "";
this._attribs = null;
this._stack = [];
this.startIndex = 0;
this.endIndex = null;
this._lowerCaseTagNames = "lowerCaseTags" in this._options ?
!!this._options.lowerCaseTags :
!this._options.xmlMode;
this._lowerCaseAttributeNames = "lowerCaseAttributeNames" in this._options ?
!!this._options.lowerCaseAttributeNames :
!this._options.xmlMode;
this._tokenizer = new Tokenizer(this._options, this);
if(this._cbs.onparserinit) this._cbs.onparserinit(this);
}
require("util").inherits(Parser, require("events").EventEmitter);
Parser.prototype._updatePosition = function(initialOffset){
if(this.endIndex === null){
if(this._tokenizer._sectionStart <= initialOffset){
this.startIndex = 0;
} else {
this.startIndex = this._tokenizer._sectionStart - initialOffset;
}
}
else this.startIndex = this.endIndex + 1;
this.endIndex = this._tokenizer.getAbsoluteIndex();
};
//Tokenizer event handlers
Parser.prototype.ontext = function(data){
this._updatePosition(1);
this.endIndex--;
if(this._cbs.ontext) this._cbs.ontext(data);
};
Parser.prototype.onopentagname = function(name){
if(this._lowerCaseTagNames){
name = name.toLowerCase();
}
this._tagname = name;
if(!this._options.xmlMode && name in openImpliesClose) {
for(
var el;
(el = this._stack[this._stack.length - 1]) in openImpliesClose[name];
this.onclosetag(el)
);
}
if(this._options.xmlMode || !(name in voidElements)){
this._stack.push(name);
}
if(this._cbs.onopentagname) this._cbs.onopentagname(name);
if(this._cbs.onopentag) this._attribs = {};
};
Parser.prototype.onopentagend = function(){
this._updatePosition(1);
if(this._attribs){
if(this._cbs.onopentag) this._cbs.onopentag(this._tagname, this._attribs);
this._attribs = null;
}
if(!this._options.xmlMode && this._cbs.onclosetag && this._tagname in voidElements){
this._cbs.onclosetag(this._tagname);
}
this._tagname = "";
};
Parser.prototype.onclosetag = function(name){
this._updatePosition(1);
if(this._lowerCaseTagNames){
name = name.toLowerCase();
}
if(this._stack.length && (!(name in voidElements) || this._options.xmlMode)){
var pos = this._stack.lastIndexOf(name);
if(pos !== -1){
if(this._cbs.onclosetag){
pos = this._stack.length - pos;
while(pos--) this._cbs.onclosetag(this._stack.pop());
}
else this._stack.length = pos;
} else if(name === "p" && !this._options.xmlMode){
this.onopentagname(name);
this._closeCurrentTag();
}
} else if(!this._options.xmlMode && (name === "br" || name === "p")){
this.onopentagname(name);
this._closeCurrentTag();
}
};
Parser.prototype.onselfclosingtag = function(){
if(this._options.xmlMode || this._options.recognizeSelfClosing){
this._closeCurrentTag();
} else {
this.onopentagend();
}
};
Parser.prototype._closeCurrentTag = function(){
var name = this._tagname;
this.onopentagend();
//self-closing tags will be on the top of the stack
//(cheaper check than in onclosetag)
if(this._stack[this._stack.length - 1] === name){
if(this._cbs.onclosetag){
this._cbs.onclosetag(name);
}
this._stack.pop();
}
};
Parser.prototype.onattribname = function(name){
if(this._lowerCaseAttributeNames){
name = name.toLowerCase();
}
this._attribname = name;
};
Parser.prototype.onattribdata = function(value){
this._attribvalue += value;
};
Parser.prototype.onattribend = function(){
if(this._cbs.onattribute) this._cbs.onattribute(this._attribname, this._attribvalue);
if(
this._attribs &&
!Object.prototype.hasOwnProperty.call(this._attribs, this._attribname)
){
this._attribs[this._attribname] = this._attribvalue;
}
this._attribname = "";
this._attribvalue = "";
};
Parser.prototype._getInstructionName = function(value){
var idx = value.search(re_nameEnd),
name = idx < 0 ? value : value.substr(0, idx);
if(this._lowerCaseTagNames){
name = name.toLowerCase();
}
return name;
};
Parser.prototype.ondeclaration = function(value){
if(this._cbs.onprocessinginstruction){
var name = this._getInstructionName(value);
this._cbs.onprocessinginstruction("!" + name, "!" + value);
}
};
Parser.prototype.onprocessinginstruction = function(value){
if(this._cbs.onprocessinginstruction){
var name = this._getInstructionName(value);
this._cbs.onprocessinginstruction("?" + name, "?" + value);
}
};
Parser.prototype.oncomment = function(value){
this._updatePosition(4);
if(this._cbs.oncomment) this._cbs.oncomment(value);
if(this._cbs.oncommentend) this._cbs.oncommentend();
};
Parser.prototype.oncdata = function(value){
this._updatePosition(1);
if(this._options.xmlMode || this._options.recognizeCDATA){
if(this._cbs.oncdatastart) this._cbs.oncdatastart();
if(this._cbs.ontext) this._cbs.ontext(value);
if(this._cbs.oncdataend) this._cbs.oncdataend();
} else {
this.oncomment("[CDATA[" + value + "]]");
}
};
Parser.prototype.onerror = function(err){
if(this._cbs.onerror) this._cbs.onerror(err);
};
Parser.prototype.onend = function(){
if(this._cbs.onclosetag){
for(
var i = this._stack.length;
i > 0;
this._cbs.onclosetag(this._stack[--i])
);
}
if(this._cbs.onend) this._cbs.onend();
};
//Resets the parser to a blank state, ready to parse a new HTML document
Parser.prototype.reset = function(){
if(this._cbs.onreset) this._cbs.onreset();
this._tokenizer.reset();
this._tagname = "";
this._attribname = "";
this._attribs = null;
this._stack = [];
if(this._cbs.onparserinit) this._cbs.onparserinit(this);
};
//Parses a complete HTML document and pushes it to the handler
Parser.prototype.parseComplete = function(data){
this.reset();
this.end(data);
};
Parser.prototype.write = function(chunk){
this._tokenizer.write(chunk);
};
Parser.prototype.end = function(chunk){
this._tokenizer.end(chunk);
};
Parser.prototype.pause = function(){
this._tokenizer.pause();
};
Parser.prototype.resume = function(){
this._tokenizer.resume();
};
//alias for backwards compat
Parser.prototype.parseChunk = Parser.prototype.write;
Parser.prototype.done = Parser.prototype.end;
module.exports = Parser;

27
project3/node_modules/htmlparser2/lib/ProxyHandler.js generated vendored Normal file
View File

@@ -0,0 +1,27 @@
module.exports = ProxyHandler;
function ProxyHandler(cbs){
this._cbs = cbs || {};
}
var EVENTS = require("./").EVENTS;
Object.keys(EVENTS).forEach(function(name){
if(EVENTS[name] === 0){
name = "on" + name;
ProxyHandler.prototype[name] = function(){
if(this._cbs[name]) this._cbs[name]();
};
} else if(EVENTS[name] === 1){
name = "on" + name;
ProxyHandler.prototype[name] = function(a){
if(this._cbs[name]) this._cbs[name](a);
};
} else if(EVENTS[name] === 2){
name = "on" + name;
ProxyHandler.prototype[name] = function(a, b){
if(this._cbs[name]) this._cbs[name](a, b);
};
} else {
throw Error("wrong number of arguments");
}
});

35
project3/node_modules/htmlparser2/lib/Stream.js generated vendored Normal file
View File

@@ -0,0 +1,35 @@
module.exports = Stream;
var Parser = require("./WritableStream.js");
function Stream(options){
Parser.call(this, new Cbs(this), options);
}
require("util").inherits(Stream, Parser);
Stream.prototype.readable = true;
function Cbs(scope){
this.scope = scope;
}
var EVENTS = require("../").EVENTS;
Object.keys(EVENTS).forEach(function(name){
if(EVENTS[name] === 0){
Cbs.prototype["on" + name] = function(){
this.scope.emit(name);
};
} else if(EVENTS[name] === 1){
Cbs.prototype["on" + name] = function(a){
this.scope.emit(name, a);
};
} else if(EVENTS[name] === 2){
Cbs.prototype["on" + name] = function(a, b){
this.scope.emit(name, a, b);
};
} else {
throw Error("wrong number of arguments!");
}
});

906
project3/node_modules/htmlparser2/lib/Tokenizer.js generated vendored Normal file
View File

@@ -0,0 +1,906 @@
module.exports = Tokenizer;
var decodeCodePoint = require("entities/lib/decode_codepoint.js"),
entityMap = require("entities/maps/entities.json"),
legacyMap = require("entities/maps/legacy.json"),
xmlMap = require("entities/maps/xml.json"),
i = 0,
TEXT = i++,
BEFORE_TAG_NAME = i++, //after <
IN_TAG_NAME = i++,
IN_SELF_CLOSING_TAG = i++,
BEFORE_CLOSING_TAG_NAME = i++,
IN_CLOSING_TAG_NAME = i++,
AFTER_CLOSING_TAG_NAME = i++,
//attributes
BEFORE_ATTRIBUTE_NAME = i++,
IN_ATTRIBUTE_NAME = i++,
AFTER_ATTRIBUTE_NAME = i++,
BEFORE_ATTRIBUTE_VALUE = i++,
IN_ATTRIBUTE_VALUE_DQ = i++, // "
IN_ATTRIBUTE_VALUE_SQ = i++, // '
IN_ATTRIBUTE_VALUE_NQ = i++,
//declarations
BEFORE_DECLARATION = i++, // !
IN_DECLARATION = i++,
//processing instructions
IN_PROCESSING_INSTRUCTION = i++, // ?
//comments
BEFORE_COMMENT = i++,
IN_COMMENT = i++,
AFTER_COMMENT_1 = i++,
AFTER_COMMENT_2 = i++,
//cdata
BEFORE_CDATA_1 = i++, // [
BEFORE_CDATA_2 = i++, // C
BEFORE_CDATA_3 = i++, // D
BEFORE_CDATA_4 = i++, // A
BEFORE_CDATA_5 = i++, // T
BEFORE_CDATA_6 = i++, // A
IN_CDATA = i++, // [
AFTER_CDATA_1 = i++, // ]
AFTER_CDATA_2 = i++, // ]
//special tags
BEFORE_SPECIAL = i++, //S
BEFORE_SPECIAL_END = i++, //S
BEFORE_SCRIPT_1 = i++, //C
BEFORE_SCRIPT_2 = i++, //R
BEFORE_SCRIPT_3 = i++, //I
BEFORE_SCRIPT_4 = i++, //P
BEFORE_SCRIPT_5 = i++, //T
AFTER_SCRIPT_1 = i++, //C
AFTER_SCRIPT_2 = i++, //R
AFTER_SCRIPT_3 = i++, //I
AFTER_SCRIPT_4 = i++, //P
AFTER_SCRIPT_5 = i++, //T
BEFORE_STYLE_1 = i++, //T
BEFORE_STYLE_2 = i++, //Y
BEFORE_STYLE_3 = i++, //L
BEFORE_STYLE_4 = i++, //E
AFTER_STYLE_1 = i++, //T
AFTER_STYLE_2 = i++, //Y
AFTER_STYLE_3 = i++, //L
AFTER_STYLE_4 = i++, //E
BEFORE_ENTITY = i++, //&
BEFORE_NUMERIC_ENTITY = i++, //#
IN_NAMED_ENTITY = i++,
IN_NUMERIC_ENTITY = i++,
IN_HEX_ENTITY = i++, //X
j = 0,
SPECIAL_NONE = j++,
SPECIAL_SCRIPT = j++,
SPECIAL_STYLE = j++;
function whitespace(c){
return c === " " || c === "\n" || c === "\t" || c === "\f" || c === "\r";
}
function characterState(char, SUCCESS){
return function(c){
if(c === char) this._state = SUCCESS;
};
}
function ifElseState(upper, SUCCESS, FAILURE){
var lower = upper.toLowerCase();
if(upper === lower){
return function(c){
if(c === lower){
this._state = SUCCESS;
} else {
this._state = FAILURE;
this._index--;
}
};
} else {
return function(c){
if(c === lower || c === upper){
this._state = SUCCESS;
} else {
this._state = FAILURE;
this._index--;
}
};
}
}
function consumeSpecialNameChar(upper, NEXT_STATE){
var lower = upper.toLowerCase();
return function(c){
if(c === lower || c === upper){
this._state = NEXT_STATE;
} else {
this._state = IN_TAG_NAME;
this._index--; //consume the token again
}
};
}
function Tokenizer(options, cbs){
this._state = TEXT;
this._buffer = "";
this._sectionStart = 0;
this._index = 0;
this._bufferOffset = 0; //chars removed from _buffer
this._baseState = TEXT;
this._special = SPECIAL_NONE;
this._cbs = cbs;
this._running = true;
this._ended = false;
this._xmlMode = !!(options && options.xmlMode);
this._decodeEntities = !!(options && options.decodeEntities);
}
Tokenizer.prototype._stateText = function(c){
if(c === "<"){
if(this._index > this._sectionStart){
this._cbs.ontext(this._getSection());
}
this._state = BEFORE_TAG_NAME;
this._sectionStart = this._index;
} else if(this._decodeEntities && this._special === SPECIAL_NONE && c === "&"){
if(this._index > this._sectionStart){
this._cbs.ontext(this._getSection());
}
this._baseState = TEXT;
this._state = BEFORE_ENTITY;
this._sectionStart = this._index;
}
};
Tokenizer.prototype._stateBeforeTagName = function(c){
if(c === "/"){
this._state = BEFORE_CLOSING_TAG_NAME;
} else if(c === ">" || this._special !== SPECIAL_NONE || whitespace(c)) {
this._state = TEXT;
} else if(c === "!"){
this._state = BEFORE_DECLARATION;
this._sectionStart = this._index + 1;
} else if(c === "?"){
this._state = IN_PROCESSING_INSTRUCTION;
this._sectionStart = this._index + 1;
} else if(c === "<"){
this._cbs.ontext(this._getSection());
this._sectionStart = this._index;
} else {
this._state = (!this._xmlMode && (c === "s" || c === "S")) ?
BEFORE_SPECIAL : IN_TAG_NAME;
this._sectionStart = this._index;
}
};
Tokenizer.prototype._stateInTagName = function(c){
if(c === "/" || c === ">" || whitespace(c)){
this._emitToken("onopentagname");
this._state = BEFORE_ATTRIBUTE_NAME;
this._index--;
}
};
Tokenizer.prototype._stateBeforeCloseingTagName = function(c){
if(whitespace(c));
else if(c === ">"){
this._state = TEXT;
} else if(this._special !== SPECIAL_NONE){
if(c === "s" || c === "S"){
this._state = BEFORE_SPECIAL_END;
} else {
this._state = TEXT;
this._index--;
}
} else {
this._state = IN_CLOSING_TAG_NAME;
this._sectionStart = this._index;
}
};
Tokenizer.prototype._stateInCloseingTagName = function(c){
if(c === ">" || whitespace(c)){
this._emitToken("onclosetag");
this._state = AFTER_CLOSING_TAG_NAME;
this._index--;
}
};
Tokenizer.prototype._stateAfterCloseingTagName = function(c){
//skip everything until ">"
if(c === ">"){
this._state = TEXT;
this._sectionStart = this._index + 1;
}
};
Tokenizer.prototype._stateBeforeAttributeName = function(c){
if(c === ">"){
this._cbs.onopentagend();
this._state = TEXT;
this._sectionStart = this._index + 1;
} else if(c === "/"){
this._state = IN_SELF_CLOSING_TAG;
} else if(!whitespace(c)){
this._state = IN_ATTRIBUTE_NAME;
this._sectionStart = this._index;
}
};
Tokenizer.prototype._stateInSelfClosingTag = function(c){
if(c === ">"){
this._cbs.onselfclosingtag();
this._state = TEXT;
this._sectionStart = this._index + 1;
} else if(!whitespace(c)){
this._state = BEFORE_ATTRIBUTE_NAME;
this._index--;
}
};
Tokenizer.prototype._stateInAttributeName = function(c){
if(c === "=" || c === "/" || c === ">" || whitespace(c)){
this._cbs.onattribname(this._getSection());
this._sectionStart = -1;
this._state = AFTER_ATTRIBUTE_NAME;
this._index--;
}
};
Tokenizer.prototype._stateAfterAttributeName = function(c){
if(c === "="){
this._state = BEFORE_ATTRIBUTE_VALUE;
} else if(c === "/" || c === ">"){
this._cbs.onattribend();
this._state = BEFORE_ATTRIBUTE_NAME;
this._index--;
} else if(!whitespace(c)){
this._cbs.onattribend();
this._state = IN_ATTRIBUTE_NAME;
this._sectionStart = this._index;
}
};
Tokenizer.prototype._stateBeforeAttributeValue = function(c){
if(c === "\""){
this._state = IN_ATTRIBUTE_VALUE_DQ;
this._sectionStart = this._index + 1;
} else if(c === "'"){
this._state = IN_ATTRIBUTE_VALUE_SQ;
this._sectionStart = this._index + 1;
} else if(!whitespace(c)){
this._state = IN_ATTRIBUTE_VALUE_NQ;
this._sectionStart = this._index;
this._index--; //reconsume token
}
};
Tokenizer.prototype._stateInAttributeValueDoubleQuotes = function(c){
if(c === "\""){
this._emitToken("onattribdata");
this._cbs.onattribend();
this._state = BEFORE_ATTRIBUTE_NAME;
} else if(this._decodeEntities && c === "&"){
this._emitToken("onattribdata");
this._baseState = this._state;
this._state = BEFORE_ENTITY;
this._sectionStart = this._index;
}
};
Tokenizer.prototype._stateInAttributeValueSingleQuotes = function(c){
if(c === "'"){
this._emitToken("onattribdata");
this._cbs.onattribend();
this._state = BEFORE_ATTRIBUTE_NAME;
} else if(this._decodeEntities && c === "&"){
this._emitToken("onattribdata");
this._baseState = this._state;
this._state = BEFORE_ENTITY;
this._sectionStart = this._index;
}
};
Tokenizer.prototype._stateInAttributeValueNoQuotes = function(c){
if(whitespace(c) || c === ">"){
this._emitToken("onattribdata");
this._cbs.onattribend();
this._state = BEFORE_ATTRIBUTE_NAME;
this._index--;
} else if(this._decodeEntities && c === "&"){
this._emitToken("onattribdata");
this._baseState = this._state;
this._state = BEFORE_ENTITY;
this._sectionStart = this._index;
}
};
Tokenizer.prototype._stateBeforeDeclaration = function(c){
this._state = c === "[" ? BEFORE_CDATA_1 :
c === "-" ? BEFORE_COMMENT :
IN_DECLARATION;
};
Tokenizer.prototype._stateInDeclaration = function(c){
if(c === ">"){
this._cbs.ondeclaration(this._getSection());
this._state = TEXT;
this._sectionStart = this._index + 1;
}
};
Tokenizer.prototype._stateInProcessingInstruction = function(c){
if(c === ">"){
this._cbs.onprocessinginstruction(this._getSection());
this._state = TEXT;
this._sectionStart = this._index + 1;
}
};
Tokenizer.prototype._stateBeforeComment = function(c){
if(c === "-"){
this._state = IN_COMMENT;
this._sectionStart = this._index + 1;
} else {
this._state = IN_DECLARATION;
}
};
Tokenizer.prototype._stateInComment = function(c){
if(c === "-") this._state = AFTER_COMMENT_1;
};
Tokenizer.prototype._stateAfterComment1 = function(c){
if(c === "-"){
this._state = AFTER_COMMENT_2;
} else {
this._state = IN_COMMENT;
}
};
Tokenizer.prototype._stateAfterComment2 = function(c){
if(c === ">"){
//remove 2 trailing chars
this._cbs.oncomment(this._buffer.substring(this._sectionStart, this._index - 2));
this._state = TEXT;
this._sectionStart = this._index + 1;
} else if(c !== "-"){
this._state = IN_COMMENT;
}
// else: stay in AFTER_COMMENT_2 (`--->`)
};
Tokenizer.prototype._stateBeforeCdata1 = ifElseState("C", BEFORE_CDATA_2, IN_DECLARATION);
Tokenizer.prototype._stateBeforeCdata2 = ifElseState("D", BEFORE_CDATA_3, IN_DECLARATION);
Tokenizer.prototype._stateBeforeCdata3 = ifElseState("A", BEFORE_CDATA_4, IN_DECLARATION);
Tokenizer.prototype._stateBeforeCdata4 = ifElseState("T", BEFORE_CDATA_5, IN_DECLARATION);
Tokenizer.prototype._stateBeforeCdata5 = ifElseState("A", BEFORE_CDATA_6, IN_DECLARATION);
Tokenizer.prototype._stateBeforeCdata6 = function(c){
if(c === "["){
this._state = IN_CDATA;
this._sectionStart = this._index + 1;
} else {
this._state = IN_DECLARATION;
this._index--;
}
};
Tokenizer.prototype._stateInCdata = function(c){
if(c === "]") this._state = AFTER_CDATA_1;
};
Tokenizer.prototype._stateAfterCdata1 = characterState("]", AFTER_CDATA_2);
Tokenizer.prototype._stateAfterCdata2 = function(c){
if(c === ">"){
//remove 2 trailing chars
this._cbs.oncdata(this._buffer.substring(this._sectionStart, this._index - 2));
this._state = TEXT;
this._sectionStart = this._index + 1;
} else if(c !== "]") {
this._state = IN_CDATA;
}
//else: stay in AFTER_CDATA_2 (`]]]>`)
};
Tokenizer.prototype._stateBeforeSpecial = function(c){
if(c === "c" || c === "C"){
this._state = BEFORE_SCRIPT_1;
} else if(c === "t" || c === "T"){
this._state = BEFORE_STYLE_1;
} else {
this._state = IN_TAG_NAME;
this._index--; //consume the token again
}
};
Tokenizer.prototype._stateBeforeSpecialEnd = function(c){
if(this._special === SPECIAL_SCRIPT && (c === "c" || c === "C")){
this._state = AFTER_SCRIPT_1;
} else if(this._special === SPECIAL_STYLE && (c === "t" || c === "T")){
this._state = AFTER_STYLE_1;
}
else this._state = TEXT;
};
Tokenizer.prototype._stateBeforeScript1 = consumeSpecialNameChar("R", BEFORE_SCRIPT_2);
Tokenizer.prototype._stateBeforeScript2 = consumeSpecialNameChar("I", BEFORE_SCRIPT_3);
Tokenizer.prototype._stateBeforeScript3 = consumeSpecialNameChar("P", BEFORE_SCRIPT_4);
Tokenizer.prototype._stateBeforeScript4 = consumeSpecialNameChar("T", BEFORE_SCRIPT_5);
Tokenizer.prototype._stateBeforeScript5 = function(c){
if(c === "/" || c === ">" || whitespace(c)){
this._special = SPECIAL_SCRIPT;
}
this._state = IN_TAG_NAME;
this._index--; //consume the token again
};
Tokenizer.prototype._stateAfterScript1 = ifElseState("R", AFTER_SCRIPT_2, TEXT);
Tokenizer.prototype._stateAfterScript2 = ifElseState("I", AFTER_SCRIPT_3, TEXT);
Tokenizer.prototype._stateAfterScript3 = ifElseState("P", AFTER_SCRIPT_4, TEXT);
Tokenizer.prototype._stateAfterScript4 = ifElseState("T", AFTER_SCRIPT_5, TEXT);
Tokenizer.prototype._stateAfterScript5 = function(c){
if(c === ">" || whitespace(c)){
this._special = SPECIAL_NONE;
this._state = IN_CLOSING_TAG_NAME;
this._sectionStart = this._index - 6;
this._index--; //reconsume the token
}
else this._state = TEXT;
};
Tokenizer.prototype._stateBeforeStyle1 = consumeSpecialNameChar("Y", BEFORE_STYLE_2);
Tokenizer.prototype._stateBeforeStyle2 = consumeSpecialNameChar("L", BEFORE_STYLE_3);
Tokenizer.prototype._stateBeforeStyle3 = consumeSpecialNameChar("E", BEFORE_STYLE_4);
Tokenizer.prototype._stateBeforeStyle4 = function(c){
if(c === "/" || c === ">" || whitespace(c)){
this._special = SPECIAL_STYLE;
}
this._state = IN_TAG_NAME;
this._index--; //consume the token again
};
Tokenizer.prototype._stateAfterStyle1 = ifElseState("Y", AFTER_STYLE_2, TEXT);
Tokenizer.prototype._stateAfterStyle2 = ifElseState("L", AFTER_STYLE_3, TEXT);
Tokenizer.prototype._stateAfterStyle3 = ifElseState("E", AFTER_STYLE_4, TEXT);
Tokenizer.prototype._stateAfterStyle4 = function(c){
if(c === ">" || whitespace(c)){
this._special = SPECIAL_NONE;
this._state = IN_CLOSING_TAG_NAME;
this._sectionStart = this._index - 5;
this._index--; //reconsume the token
}
else this._state = TEXT;
};
Tokenizer.prototype._stateBeforeEntity = ifElseState("#", BEFORE_NUMERIC_ENTITY, IN_NAMED_ENTITY);
Tokenizer.prototype._stateBeforeNumericEntity = ifElseState("X", IN_HEX_ENTITY, IN_NUMERIC_ENTITY);
//for entities terminated with a semicolon
Tokenizer.prototype._parseNamedEntityStrict = function(){
//offset = 1
if(this._sectionStart + 1 < this._index){
var entity = this._buffer.substring(this._sectionStart + 1, this._index),
map = this._xmlMode ? xmlMap : entityMap;
if(map.hasOwnProperty(entity)){
this._emitPartial(map[entity]);
this._sectionStart = this._index + 1;
}
}
};
//parses legacy entities (without trailing semicolon)
Tokenizer.prototype._parseLegacyEntity = function(){
var start = this._sectionStart + 1,
limit = this._index - start;
if(limit > 6) limit = 6; //the max length of legacy entities is 6
while(limit >= 2){ //the min length of legacy entities is 2
var entity = this._buffer.substr(start, limit);
if(legacyMap.hasOwnProperty(entity)){
this._emitPartial(legacyMap[entity]);
this._sectionStart += limit + 1;
return;
} else {
limit--;
}
}
};
Tokenizer.prototype._stateInNamedEntity = function(c){
if(c === ";"){
this._parseNamedEntityStrict();
if(this._sectionStart + 1 < this._index && !this._xmlMode){
this._parseLegacyEntity();
}
this._state = this._baseState;
} else if((c < "a" || c > "z") && (c < "A" || c > "Z") && (c < "0" || c > "9")){
if(this._xmlMode);
else if(this._sectionStart + 1 === this._index);
else if(this._baseState !== TEXT){
if(c !== "="){
this._parseNamedEntityStrict();
}
} else {
this._parseLegacyEntity();
}
this._state = this._baseState;
this._index--;
}
};
Tokenizer.prototype._decodeNumericEntity = function(offset, base){
var sectionStart = this._sectionStart + offset;
if(sectionStart !== this._index){
//parse entity
var entity = this._buffer.substring(sectionStart, this._index);
var parsed = parseInt(entity, base);
this._emitPartial(decodeCodePoint(parsed));
this._sectionStart = this._index;
} else {
this._sectionStart--;
}
this._state = this._baseState;
};
Tokenizer.prototype._stateInNumericEntity = function(c){
if(c === ";"){
this._decodeNumericEntity(2, 10);
this._sectionStart++;
} else if(c < "0" || c > "9"){
if(!this._xmlMode){
this._decodeNumericEntity(2, 10);
} else {
this._state = this._baseState;
}
this._index--;
}
};
Tokenizer.prototype._stateInHexEntity = function(c){
if(c === ";"){
this._decodeNumericEntity(3, 16);
this._sectionStart++;
} else if((c < "a" || c > "f") && (c < "A" || c > "F") && (c < "0" || c > "9")){
if(!this._xmlMode){
this._decodeNumericEntity(3, 16);
} else {
this._state = this._baseState;
}
this._index--;
}
};
Tokenizer.prototype._cleanup = function (){
if(this._sectionStart < 0){
this._buffer = "";
this._index = 0;
this._bufferOffset += this._index;
} else if(this._running){
if(this._state === TEXT){
if(this._sectionStart !== this._index){
this._cbs.ontext(this._buffer.substr(this._sectionStart));
}
this._buffer = "";
this._index = 0;
this._bufferOffset += this._index;
} else if(this._sectionStart === this._index){
//the section just started
this._buffer = "";
this._index = 0;
this._bufferOffset += this._index;
} else {
//remove everything unnecessary
this._buffer = this._buffer.substr(this._sectionStart);
this._index -= this._sectionStart;
this._bufferOffset += this._sectionStart;
}
this._sectionStart = 0;
}
};
//TODO make events conditional
Tokenizer.prototype.write = function(chunk){
if(this._ended) this._cbs.onerror(Error(".write() after done!"));
this._buffer += chunk;
this._parse();
};
Tokenizer.prototype._parse = function(){
while(this._index < this._buffer.length && this._running){
var c = this._buffer.charAt(this._index);
if(this._state === TEXT) {
this._stateText(c);
} else if(this._state === BEFORE_TAG_NAME){
this._stateBeforeTagName(c);
} else if(this._state === IN_TAG_NAME) {
this._stateInTagName(c);
} else if(this._state === BEFORE_CLOSING_TAG_NAME){
this._stateBeforeCloseingTagName(c);
} else if(this._state === IN_CLOSING_TAG_NAME){
this._stateInCloseingTagName(c);
} else if(this._state === AFTER_CLOSING_TAG_NAME){
this._stateAfterCloseingTagName(c);
} else if(this._state === IN_SELF_CLOSING_TAG){
this._stateInSelfClosingTag(c);
}
/*
* attributes
*/
else if(this._state === BEFORE_ATTRIBUTE_NAME){
this._stateBeforeAttributeName(c);
} else if(this._state === IN_ATTRIBUTE_NAME){
this._stateInAttributeName(c);
} else if(this._state === AFTER_ATTRIBUTE_NAME){
this._stateAfterAttributeName(c);
} else if(this._state === BEFORE_ATTRIBUTE_VALUE){
this._stateBeforeAttributeValue(c);
} else if(this._state === IN_ATTRIBUTE_VALUE_DQ){
this._stateInAttributeValueDoubleQuotes(c);
} else if(this._state === IN_ATTRIBUTE_VALUE_SQ){
this._stateInAttributeValueSingleQuotes(c);
} else if(this._state === IN_ATTRIBUTE_VALUE_NQ){
this._stateInAttributeValueNoQuotes(c);
}
/*
* declarations
*/
else if(this._state === BEFORE_DECLARATION){
this._stateBeforeDeclaration(c);
} else if(this._state === IN_DECLARATION){
this._stateInDeclaration(c);
}
/*
* processing instructions
*/
else if(this._state === IN_PROCESSING_INSTRUCTION){
this._stateInProcessingInstruction(c);
}
/*
* comments
*/
else if(this._state === BEFORE_COMMENT){
this._stateBeforeComment(c);
} else if(this._state === IN_COMMENT){
this._stateInComment(c);
} else if(this._state === AFTER_COMMENT_1){
this._stateAfterComment1(c);
} else if(this._state === AFTER_COMMENT_2){
this._stateAfterComment2(c);
}
/*
* cdata
*/
else if(this._state === BEFORE_CDATA_1){
this._stateBeforeCdata1(c);
} else if(this._state === BEFORE_CDATA_2){
this._stateBeforeCdata2(c);
} else if(this._state === BEFORE_CDATA_3){
this._stateBeforeCdata3(c);
} else if(this._state === BEFORE_CDATA_4){
this._stateBeforeCdata4(c);
} else if(this._state === BEFORE_CDATA_5){
this._stateBeforeCdata5(c);
} else if(this._state === BEFORE_CDATA_6){
this._stateBeforeCdata6(c);
} else if(this._state === IN_CDATA){
this._stateInCdata(c);
} else if(this._state === AFTER_CDATA_1){
this._stateAfterCdata1(c);
} else if(this._state === AFTER_CDATA_2){
this._stateAfterCdata2(c);
}
/*
* special tags
*/
else if(this._state === BEFORE_SPECIAL){
this._stateBeforeSpecial(c);
} else if(this._state === BEFORE_SPECIAL_END){
this._stateBeforeSpecialEnd(c);
}
/*
* script
*/
else if(this._state === BEFORE_SCRIPT_1){
this._stateBeforeScript1(c);
} else if(this._state === BEFORE_SCRIPT_2){
this._stateBeforeScript2(c);
} else if(this._state === BEFORE_SCRIPT_3){
this._stateBeforeScript3(c);
} else if(this._state === BEFORE_SCRIPT_4){
this._stateBeforeScript4(c);
} else if(this._state === BEFORE_SCRIPT_5){
this._stateBeforeScript5(c);
}
else if(this._state === AFTER_SCRIPT_1){
this._stateAfterScript1(c);
} else if(this._state === AFTER_SCRIPT_2){
this._stateAfterScript2(c);
} else if(this._state === AFTER_SCRIPT_3){
this._stateAfterScript3(c);
} else if(this._state === AFTER_SCRIPT_4){
this._stateAfterScript4(c);
} else if(this._state === AFTER_SCRIPT_5){
this._stateAfterScript5(c);
}
/*
* style
*/
else if(this._state === BEFORE_STYLE_1){
this._stateBeforeStyle1(c);
} else if(this._state === BEFORE_STYLE_2){
this._stateBeforeStyle2(c);
} else if(this._state === BEFORE_STYLE_3){
this._stateBeforeStyle3(c);
} else if(this._state === BEFORE_STYLE_4){
this._stateBeforeStyle4(c);
}
else if(this._state === AFTER_STYLE_1){
this._stateAfterStyle1(c);
} else if(this._state === AFTER_STYLE_2){
this._stateAfterStyle2(c);
} else if(this._state === AFTER_STYLE_3){
this._stateAfterStyle3(c);
} else if(this._state === AFTER_STYLE_4){
this._stateAfterStyle4(c);
}
/*
* entities
*/
else if(this._state === BEFORE_ENTITY){
this._stateBeforeEntity(c);
} else if(this._state === BEFORE_NUMERIC_ENTITY){
this._stateBeforeNumericEntity(c);
} else if(this._state === IN_NAMED_ENTITY){
this._stateInNamedEntity(c);
} else if(this._state === IN_NUMERIC_ENTITY){
this._stateInNumericEntity(c);
} else if(this._state === IN_HEX_ENTITY){
this._stateInHexEntity(c);
}
else {
this._cbs.onerror(Error("unknown _state"), this._state);
}
this._index++;
}
this._cleanup();
};
Tokenizer.prototype.pause = function(){
this._running = false;
};
Tokenizer.prototype.resume = function(){
this._running = true;
if(this._index < this._buffer.length){
this._parse();
}
if(this._ended){
this._finish();
}
};
Tokenizer.prototype.end = function(chunk){
if(this._ended) this._cbs.onerror(Error(".end() after done!"));
if(chunk) this.write(chunk);
this._ended = true;
if(this._running) this._finish();
};
Tokenizer.prototype._finish = function(){
//if there is remaining data, emit it in a reasonable way
if(this._sectionStart < this._index){
this._handleTrailingData();
}
this._cbs.onend();
};
Tokenizer.prototype._handleTrailingData = function(){
var data = this._buffer.substr(this._sectionStart);
if(this._state === IN_CDATA || this._state === AFTER_CDATA_1 || this._state === AFTER_CDATA_2){
this._cbs.oncdata(data);
} else if(this._state === IN_COMMENT || this._state === AFTER_COMMENT_1 || this._state === AFTER_COMMENT_2){
this._cbs.oncomment(data);
} else if(this._state === IN_NAMED_ENTITY && !this._xmlMode){
this._parseLegacyEntity();
if(this._sectionStart < this._index){
this._state = this._baseState;
this._handleTrailingData();
}
} else if(this._state === IN_NUMERIC_ENTITY && !this._xmlMode){
this._decodeNumericEntity(2, 10);
if(this._sectionStart < this._index){
this._state = this._baseState;
this._handleTrailingData();
}
} else if(this._state === IN_HEX_ENTITY && !this._xmlMode){
this._decodeNumericEntity(3, 16);
if(this._sectionStart < this._index){
this._state = this._baseState;
this._handleTrailingData();
}
} else if(
this._state !== IN_TAG_NAME &&
this._state !== BEFORE_ATTRIBUTE_NAME &&
this._state !== BEFORE_ATTRIBUTE_VALUE &&
this._state !== AFTER_ATTRIBUTE_NAME &&
this._state !== IN_ATTRIBUTE_NAME &&
this._state !== IN_ATTRIBUTE_VALUE_SQ &&
this._state !== IN_ATTRIBUTE_VALUE_DQ &&
this._state !== IN_ATTRIBUTE_VALUE_NQ &&
this._state !== IN_CLOSING_TAG_NAME
){
this._cbs.ontext(data);
}
//else, ignore remaining data
//TODO add a way to remove current tag
};
Tokenizer.prototype.reset = function(){
Tokenizer.call(this, {xmlMode: this._xmlMode, decodeEntities: this._decodeEntities}, this._cbs);
};
Tokenizer.prototype.getAbsoluteIndex = function(){
return this._bufferOffset + this._index;
};
Tokenizer.prototype._getSection = function(){
return this._buffer.substring(this._sectionStart, this._index);
};
Tokenizer.prototype._emitToken = function(name){
this._cbs[name](this._getSection());
this._sectionStart = -1;
};
Tokenizer.prototype._emitPartial = function(value){
if(this._baseState !== TEXT){
this._cbs.onattribdata(value); //TODO implement the new event
} else {
this._cbs.ontext(value);
}
};

View File

@@ -0,0 +1,21 @@
module.exports = Stream;
var Parser = require("./Parser.js"),
WritableStream = require("stream").Writable || require("readable-stream").Writable;
function Stream(cbs, options){
var parser = this._parser = new Parser(cbs, options);
WritableStream.call(this, {decodeStrings: false});
this.once("finish", function(){
parser.end();
});
}
require("util").inherits(Stream, WritableStream);
WritableStream.prototype._write = function(chunk, encoding, cb){
this._parser.write(chunk);
cb();
};

68
project3/node_modules/htmlparser2/lib/index.js generated vendored Normal file
View File

@@ -0,0 +1,68 @@
var Parser = require("./Parser.js"),
DomHandler = require("domhandler");
function defineProp(name, value){
delete module.exports[name];
module.exports[name] = value;
return value;
}
module.exports = {
Parser: Parser,
Tokenizer: require("./Tokenizer.js"),
ElementType: require("domelementtype"),
DomHandler: DomHandler,
get FeedHandler(){
return defineProp("FeedHandler", require("./FeedHandler.js"));
},
get Stream(){
return defineProp("Stream", require("./Stream.js"));
},
get WritableStream(){
return defineProp("WritableStream", require("./WritableStream.js"));
},
get ProxyHandler(){
return defineProp("ProxyHandler", require("./ProxyHandler.js"));
},
get DomUtils(){
return defineProp("DomUtils", require("domutils"));
},
get CollectingHandler(){
return defineProp("CollectingHandler", require("./CollectingHandler.js"));
},
// For legacy support
DefaultHandler: DomHandler,
get RssHandler(){
return defineProp("RssHandler", this.FeedHandler);
},
//helper methods
parseDOM: function(data, options){
var handler = new DomHandler(options);
new Parser(handler, options).end(data);
return handler.dom;
},
parseFeed: function(feed, options){
var handler = new module.exports.FeedHandler(options);
new Parser(handler, options).end(feed);
return handler.dom;
},
createDomStream: function(cb, options, elementCb){
var handler = new DomHandler(cb, options, elementCb);
return new Parser(handler, options);
},
// List of all events that the parser emits
EVENTS: { /* Format: eventname: number of arguments */
attribute: 2,
cdatastart: 0,
cdataend: 0,
text: 1,
processinginstruction: 2,
comment: 1,
commentend: 0,
closetag: 1,
opentag: 2,
opentagname: 1,
error: 1,
end: 0
}
};

100
project3/node_modules/htmlparser2/package.json generated vendored Normal file
View File

@@ -0,0 +1,100 @@
{
"_from": "htmlparser2@3.8.x",
"_id": "htmlparser2@3.8.3",
"_inBundle": false,
"_integrity": "sha1-mWwosZFRaovoZQGn15dX5ccMEGg=",
"_location": "/htmlparser2",
"_phantomChildren": {},
"_requested": {
"type": "range",
"registry": true,
"raw": "htmlparser2@3.8.x",
"name": "htmlparser2",
"escapedName": "htmlparser2",
"rawSpec": "3.8.x",
"saveSpec": null,
"fetchSpec": "3.8.x"
},
"_requiredBy": [
"/jshint"
],
"_resolved": "https://registry.npmjs.org/htmlparser2/-/htmlparser2-3.8.3.tgz",
"_shasum": "996c28b191516a8be86501a7d79757e5c70c1068",
"_spec": "htmlparser2@3.8.x",
"_where": "/home/massiveatoms/Desktop/cs142/project3/node_modules/jshint",
"author": {
"name": "Felix Boehm",
"email": "me@feedic.com"
},
"browser": {
"readable-stream": false
},
"bugs": {
"url": "http://github.com/fb55/htmlparser2/issues"
},
"bundleDependencies": false,
"dependencies": {
"domelementtype": "1",
"domhandler": "2.3",
"domutils": "1.5",
"entities": "1.0",
"readable-stream": "1.1"
},
"deprecated": false,
"description": "Fast & forgiving HTML/XML/RSS parser",
"devDependencies": {
"coveralls": "*",
"istanbul": "*",
"jscs": "1.5.8",
"jshint": "2",
"mocha": "1",
"mocha-lcov-reporter": "*"
},
"directories": {
"lib": "lib/"
},
"homepage": "https://github.com/fb55/htmlparser2#readme",
"jshintConfig": {
"eqeqeq": true,
"freeze": true,
"latedef": "nofunc",
"noarg": true,
"nonbsp": true,
"quotmark": "double",
"undef": true,
"unused": true,
"trailing": true,
"eqnull": true,
"proto": true,
"smarttabs": true,
"node": true,
"globals": {
"describe": true,
"it": true
}
},
"keywords": [
"html",
"parser",
"streams",
"xml",
"dom",
"rss",
"feed",
"atom"
],
"license": "MIT",
"main": "lib/index.js",
"name": "htmlparser2",
"repository": {
"type": "git",
"url": "git://github.com/fb55/htmlparser2.git"
},
"scripts": {
"coveralls": "npm run lint && npm run lcov && (cat coverage/lcov.info | coveralls || exit 0)",
"lcov": "istanbul cover _mocha --report lcovonly -- -R spec",
"lint": "jshint lib test && jscs lib test",
"test": "mocha && npm run lint"
},
"version": "3.8.3"
}

9
project3/node_modules/htmlparser2/test/01-events.js generated vendored Normal file
View File

@@ -0,0 +1,9 @@
var helper = require("./test-helper.js");
helper.mochaTest("Events", __dirname, function(test, cb){
helper.writeToParser(
helper.getEventCollector(cb),
test.options.parser,
test.html
);
});

23
project3/node_modules/htmlparser2/test/02-stream.js generated vendored Normal file
View File

@@ -0,0 +1,23 @@
var helper = require("./test-helper.js"),
Stream = require("..").WritableStream,
fs = require("fs"),
path = require("path");
helper.mochaTest("Stream", __dirname, function(test, cb){
var filePath = path.join(__dirname, "Documents", test.file);
fs.createReadStream(filePath).pipe(
new Stream(
helper.getEventCollector(function(err, events){
cb(err, events);
var handler = helper.getEventCollector(cb),
stream = new Stream(handler, test.options);
fs.readFile(filePath, function(err, data){
if(err) throw err;
else stream.end(data);
});
}
), test.options)
).on("error", cb);
});

19
project3/node_modules/htmlparser2/test/03-feed.js generated vendored Normal file
View File

@@ -0,0 +1,19 @@
//Runs tests for feeds
var helper = require("./test-helper.js"),
FeedHandler = require("..").RssHandler,
fs = require("fs"),
path = require("path");
helper.mochaTest("Feeds", __dirname, function(test, cb){
fs.readFile(
path.join(__dirname, "Documents", test.file),
function(err, file){
helper.writeToParser(
new FeedHandler(cb),
{ xmlMode: true },
file.toString()
);
}
);
});

View File

@@ -0,0 +1,25 @@
<?xml version="1.0" encoding="utf-8"?>
<!-- http://en.wikipedia.org/wiki/Atom_%28standard%29 -->
<feed xmlns="http://www.w3.org/2005/Atom">
<title>Example Feed</title>
<subtitle>A subtitle.</subtitle>
<link href="http://example.org/feed/" rel="self" />
<link href="http://example.org/" />
<id>urn:uuid:60a76c80-d399-11d9-b91C-0003939e0af6</id>
<updated>2003-12-13T18:30:02Z</updated>
<author>
<name>John Doe</name>
<email>johndoe@example.com</email>
</author>
<entry>
<title>Atom-Powered Robots Run Amok</title>
<link href="http://example.org/2003/12/13/atom03" />
<link rel="alternate" type="text/html" href="http://example.org/2003/12/13/atom03.html"/>
<link rel="edit" href="http://example.org/2003/12/13/atom03/edit"/>
<id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id>
<updated>2003-12-13T18:30:02Z</updated>
<content type="html"><p>Some content.</p></content>
</entry>
</feed>

View File

@@ -0,0 +1,16 @@
<!doctype html>
<html>
<head>
<title>Attributes test</title>
</head>
<body>
<!-- Normal attributes -->
<button id="test0" class="value0" title="value1">class="value0" title="value1"</button>
<!-- Attributes with no quotes or value -->
<button id="test1" class=value2 disabled>class=value2 disabled</button>
<!-- Attributes with no space between them. No valid, but accepted by the browser -->
<button id="test2" class="value4"title="value5">class="value4"title="value5"</button>
</body>
</html>

View File

@@ -0,0 +1 @@
<!DOCTYPE html><html><title>The Title</title><body>Hello world</body></html>

View File

@@ -0,0 +1,63 @@
<?xml version="1.0" encoding="UTF-8"?>
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns="http://purl.org/rss/1.0/" xmlns:ev="http://purl.org/rss/1.0/modules/event/" xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:syn="http://purl.org/rss/1.0/modules/syndication/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:admin="http://webns.net/mvcb/">
<channel rdf:about="http://sfbay.craigslist.org/ccc/">
<title>craigslist | all community in SF bay area</title>
<link>http://sfbay.craigslist.org/ccc/</link>
<description/>
<dc:language>en-us</dc:language>
<dc:rights>Copyright 2011 craigslist, inc.</dc:rights>
<dc:publisher>webmaster@craigslist.org</dc:publisher>
<dc:creator>webmaster@craigslist.org</dc:creator>
<dc:source>http://sfbay.craigslist.org/ccc//</dc:source>
<dc:title>craigslist | all community in SF bay area</dc:title>
<dc:type>Collection</dc:type>
<syn:updateBase>2011-11-04T09:39:10-07:00</syn:updateBase>
<syn:updateFrequency>4</syn:updateFrequency>
<syn:updatePeriod>hourly</syn:updatePeriod>
<items>
<rdf:Seq>
<rdf:li rdf:resource="http://sfbay.craigslist.org/sby/muc/2681301534.html"/>
</rdf:Seq>
</items>
</channel>
<item rdf:about="http://sfbay.craigslist.org/sby/muc/2681301534.html">
<title><![CDATA[ Music Equipment Repair and Consignment ]]></title>
<link>
http://sfbay.craigslist.org/sby/muc/2681301534.html
</link>
<description><![CDATA[
San Jose Rock Shop offers musical instrument repair and consignment! (408) 215-2065<br> <br> We are pleased to announce our NEW LOCATION: 1199 N 5th st. San Jose, ca 95112. Please call ahead, by appointment only.<br> <br> Recently featured by Metro Newspaper in their 2011 Best of the Silicon Valley edition see it online here:<br> <a href="http://www.metroactive.com/best-of-silicon-valley/2011/music-nightlife/editor-picks.html" rel="nofollow">http://www.metroactive.com/best-of-silicon-valley/2011/music-nightlife/editor-picks.html</a><br> <br> Guitar Set up (acoustic and electronic) $40!<!-- END CLTAGS -->
]]></description>
<dc:date>2011-11-04T09:35:17-07:00</dc:date>
<dc:language>en-us</dc:language>
<dc:rights>Copyright 2011 craigslist, inc.</dc:rights>
<dc:source>
http://sfbay.craigslist.org/sby/muc/2681301534.html
</dc:source>
<dc:title><![CDATA[ Music Equipment Repair and Consignment ]]></dc:title>
<dc:type>text</dc:type>
<dcterms:issued>2011-11-04T09:35:17-07:00</dcterms:issued>
</item>
<item rdf:about="http://sfbay.craigslist.org/eby/rid/2685010755.html">
<title><![CDATA[
Ride Offered - Oakland/BART to LA/SFV - TODAY 3PM 11/04 (oakland north / temescal)
]]></title>
<link>
http://sfbay.craigslist.org/eby/rid/2685010755.html
</link>
<description><![CDATA[
Im offering a lift for up to two people from Oakland (or near any BART station in the East Bay/580/880 Corridor, or San Jose/Morgan Hill, Gilroy) to the San Fernando Valley / Los Angeles area. Specifically, Im leaving from Oakland between 2:30 and 3:00pm (this is flexible, but if I leave too late my girlfriend will kill me), and heading to Woodland Hills via the 580, I-5, 405, and 101.<!-- END CLTAGS -->
]]></description>
<dc:date>2011-11-04T09:34:54-07:00</dc:date>
<dc:language>en-us</dc:language>
<dc:rights>Copyright 2011 craigslist, inc.</dc:rights>
<dc:source>
http://sfbay.craigslist.org/eby/rid/2685010755.html
</dc:source>
<dc:title><![CDATA[
Ride Offered - Oakland/BART to LA/SFV - TODAY 3PM 11/04 (oakland north / temescal)
]]></dc:title>
<dc:type>text</dc:type>
<dcterms:issued>2011-11-04T09:34:54-07:00</dcterms:issued>
</item>
</rdf:RDF>

View File

@@ -0,0 +1,48 @@
<?xml version="1.0"?>
<!-- http://cyber.law.harvard.edu/rss/examples/rss2sample.xml -->
<rss version="2.0">
<channel>
<title>Liftoff News</title>
<link>http://liftoff.msfc.nasa.gov/</link>
<description>Liftoff to Space Exploration.</description>
<language>en-us</language>
<pubDate>Tue, 10 Jun 2003 04:00:00 GMT</pubDate>
<lastBuildDate>Tue, 10 Jun 2003 09:41:01 GMT</lastBuildDate>
<docs>http://blogs.law.harvard.edu/tech/rss</docs>
<generator>Weblog Editor 2.0</generator>
<managingEditor>editor@example.com</managingEditor>
<webMaster>webmaster@example.com</webMaster>
<item>
<title>Star City</title>
<link>http://liftoff.msfc.nasa.gov/news/2003/news-starcity.asp</link>
<description>How do Americans get ready to work with Russians aboard the International Space Station? They take a crash course in culture, language and protocol at Russia's &lt;a href="http://howe.iki.rssi.ru/GCTC/gctc_e.htm"&gt;Star City&lt;/a&gt;.</description>
<pubDate>Tue, 03 Jun 2003 09:39:21 GMT</pubDate>
<guid>http://liftoff.msfc.nasa.gov/2003/06/03.html#item573</guid>
</item>
<item>
<description>Sky watchers in Europe, Asia, and parts of Alaska and Canada will experience a &lt;a href="http://science.nasa.gov/headlines/y2003/30may_solareclipse.htm"&gt;partial eclipse of the Sun&lt;/a&gt; on Saturday, May 31st.</description>
<pubDate>Fri, 30 May 2003 11:06:42 GMT</pubDate>
<guid>http://liftoff.msfc.nasa.gov/2003/05/30.html#item572</guid>
</item>
<item>
<title>The Engine That Does More</title>
<link>http://liftoff.msfc.nasa.gov/news/2003/news-VASIMR.asp</link>
<description>Before man travels to Mars, NASA hopes to design new engines that will let us fly through the Solar System more quickly. The proposed VASIMR engine would do that.</description>
<pubDate>Tue, 27 May 2003 08:37:32 GMT</pubDate>
<guid>http://liftoff.msfc.nasa.gov/2003/05/27.html#item571</guid>
</item>
<item>
<title>Astronauts' Dirty Laundry</title>
<link>http://liftoff.msfc.nasa.gov/news/2003/news-laundry.asp</link>
<description>Compared to earlier spacecraft, the International Space Station has many luxuries, but laundry facilities are not one of them. Instead, astronauts have other options.</description>
<pubDate>Tue, 20 May 2003 08:56:02 GMT</pubDate>
<guid>http://liftoff.msfc.nasa.gov/2003/05/20.html#item570</guid>
</item>
</channel>
</rss>

View File

@@ -0,0 +1,44 @@
{
"name": "simple",
"options": {
"handler": {},
"parser": {}
},
"html": "<h1 class=test>adsf</h1>",
"expected": [
{
"event": "opentagname",
"data": [
"h1"
]
},
{
"event": "attribute",
"data": [
"class",
"test"
]
},
{
"event": "opentag",
"data": [
"h1",
{
"class": "test"
}
]
},
{
"event": "text",
"data": [
"adsf"
]
},
{
"event": "closetag",
"data": [
"h1"
]
}
]
}

View File

@@ -0,0 +1,63 @@
{
"name": "Template script tags",
"options": {
"handler": {},
"parser": {}
},
"html": "<p><script type=\"text/template\"><h1>Heading1</h1></script></p>",
"expected": [
{
"event": "opentagname",
"data": [
"p"
]
},
{
"event": "opentag",
"data": [
"p",
{}
]
},
{
"event": "opentagname",
"data": [
"script"
]
},
{
"event": "attribute",
"data": [
"type",
"text/template"
]
},
{
"event": "opentag",
"data": [
"script",
{
"type": "text/template"
}
]
},
{
"event": "text",
"data": [
"<h1>Heading1</h1>"
]
},
{
"event": "closetag",
"data": [
"script"
]
},
{
"event": "closetag",
"data": [
"p"
]
}
]
}

View File

@@ -0,0 +1,46 @@
{
"name": "Lowercase tags",
"options": {
"handler": {},
"parser": {
"lowerCaseTags": true
}
},
"html": "<H1 class=test>adsf</H1>",
"expected": [
{
"event": "opentagname",
"data": [
"h1"
]
},
{
"event": "attribute",
"data": [
"class",
"test"
]
},
{
"event": "opentag",
"data": [
"h1",
{
"class": "test"
}
]
},
{
"event": "text",
"data": [
"adsf"
]
},
{
"event": "closetag",
"data": [
"h1"
]
}
]
}

View File

@@ -0,0 +1,50 @@
{
"name": "CDATA",
"options": {
"handler": {},
"parser": {"xmlMode": true}
},
"html": "<tag><![CDATA[ asdf ><asdf></adsf><> fo]]></tag><![CD>",
"expected": [
{
"event": "opentagname",
"data": [
"tag"
]
},
{
"event": "opentag",
"data": [
"tag",
{}
]
},
{
"event": "cdatastart",
"data": []
},
{
"event": "text",
"data": [
" asdf ><asdf></adsf><> fo"
]
},
{
"event": "cdataend",
"data": []
},
{
"event": "closetag",
"data": [
"tag"
]
},
{
"event": "processinginstruction",
"data": [
"![CD",
"![CD"
]
}
]
}

View File

@@ -0,0 +1,35 @@
{
"name": "CDATA (inside special)",
"options": {
"handler": {},
"parser": {}
},
"html": "<script>/*<![CDATA[*/ asdf ><asdf></adsf><> fo/*]]>*/</script>",
"expected": [
{
"event": "opentagname",
"data": [
"script"
]
},
{
"event": "opentag",
"data": [
"script",
{}
]
},
{
"event": "text",
"data": [
"/*<![CDATA[*/ asdf ><asdf></adsf><> fo/*]]>*/"
]
},
{
"event": "closetag",
"data": [
"script"
]
}
]
}

View File

@@ -0,0 +1,16 @@
{
"name": "leading lt",
"options": {
"handler": {},
"parser": {}
},
"html": ">a>",
"expected": [
{
"event": "text",
"data": [
">a>"
]
}
]
}

View File

@@ -0,0 +1,67 @@
{
"name": "Self-closing tags",
"options": {
"handler": {
},
"parser": {
}
},
"html": "<a href=http://test.com/>Foo</a><hr / >",
"expected": [
{
"event": "opentagname",
"data": [
"a"
]
},
{
"event": "attribute",
"data": [
"href",
"http://test.com/"
]
},
{
"event": "opentag",
"data": [
"a",
{
"href": "http://test.com/"
}
]
},
{
"event": "text",
"data": [
"Foo"
]
},
{
"event": "closetag",
"data": [
"a"
]
},
{
"event": "opentagname",
"data": [
"hr"
]
},
{
"event": "opentag",
"data": [
"hr",
{}
]
},
{
"event": "closetag",
"data": [
"hr"
]
}
]
}

View File

@@ -0,0 +1,71 @@
{
"name": "Implicit close tags",
"options": {},
"html": "<ol><li class=test><div><table style=width:100%><tr><th>TH<td colspan=2><h3>Heading</h3><tr><td><div>Div</div><td><div>Div2</div></table></div><li><div><h3>Heading 2</h3></div></li></ol><p>Para<h4>Heading 4</h4>",
"expected": [
{ "event": "opentagname", "data": [ "ol" ] },
{ "event": "opentag", "data": [ "ol", {} ] },
{ "event": "opentagname", "data": [ "li" ] },
{ "event": "attribute", "data": [ "class", "test" ] },
{ "event": "opentag", "data": [ "li", { "class": "test" } ] },
{ "event": "opentagname", "data": [ "div" ] },
{ "event": "opentag", "data": [ "div", {} ] },
{ "event": "opentagname", "data": [ "table" ] },
{ "event": "attribute", "data": [ "style", "width:100%" ] },
{ "event": "opentag", "data": [ "table", { "style": "width:100%" } ] },
{ "event": "opentagname", "data": [ "tr" ] },
{ "event": "opentag", "data": [ "tr", {} ] },
{ "event": "opentagname", "data": [ "th" ] },
{ "event": "opentag", "data": [ "th", {} ] },
{ "event": "text", "data": [ "TH" ] },
{ "event": "closetag", "data": [ "th" ] },
{ "event": "opentagname", "data": [ "td" ] },
{ "event": "attribute", "data": [ "colspan", "2" ] },
{ "event": "opentag", "data": [ "td", { "colspan": "2" } ] },
{ "event": "opentagname", "data": [ "h3" ] },
{ "event": "opentag", "data": [ "h3", {} ] },
{ "event": "text", "data": [ "Heading" ] },
{ "event": "closetag", "data": [ "h3" ] },
{ "event": "closetag", "data": [ "td" ] },
{ "event": "closetag", "data": [ "tr" ] },
{ "event": "opentagname", "data": [ "tr" ] },
{ "event": "opentag", "data": [ "tr", {} ] },
{ "event": "opentagname", "data": [ "td" ] },
{ "event": "opentag", "data": [ "td", {} ] },
{ "event": "opentagname", "data": [ "div" ] },
{ "event": "opentag", "data": [ "div", {} ] },
{ "event": "text", "data": [ "Div" ] },
{ "event": "closetag", "data": [ "div" ] },
{ "event": "closetag", "data": [ "td" ] },
{ "event": "opentagname", "data": [ "td" ] },
{ "event": "opentag", "data": [ "td", {} ] },
{ "event": "opentagname", "data": [ "div" ] },
{ "event": "opentag", "data": [ "div", {} ] },
{ "event": "text", "data": [ "Div2" ] },
{ "event": "closetag", "data": [ "div" ] },
{ "event": "closetag", "data": [ "td" ] },
{ "event": "closetag", "data": [ "tr" ] },
{ "event": "closetag", "data": [ "table" ] },
{ "event": "closetag", "data": [ "div" ] },
{ "event": "closetag", "data": [ "li" ] },
{ "event": "opentagname", "data": [ "li" ] },
{ "event": "opentag", "data": [ "li", {} ] },
{ "event": "opentagname", "data": [ "div" ] },
{ "event": "opentag", "data": [ "div", {} ] },
{ "event": "opentagname", "data": [ "h3" ] },
{ "event": "opentag", "data": [ "h3", {} ] },
{ "event": "text", "data": [ "Heading 2" ] },
{ "event": "closetag", "data": [ "h3" ] },
{ "event": "closetag", "data": [ "div" ] },
{ "event": "closetag", "data": [ "li" ] },
{ "event": "closetag", "data": [ "ol" ] },
{ "event": "opentagname", "data": [ "p" ] },
{ "event": "opentag", "data": [ "p", {} ] },
{ "event": "text", "data": [ "Para" ] },
{ "event": "closetag", "data": [ "p" ] },
{ "event": "opentagname", "data": [ "h4" ] },
{ "event": "opentag", "data": [ "h4", {} ] },
{ "event": "text", "data": [ "Heading 4" ] },
{ "event": "closetag", "data": [ "h4" ] }
]
}

View File

@@ -0,0 +1,68 @@
{
"name": "attributes (no white space, no value, no quotes)",
"options": {
"handler": {},
"parser": {}
},
"html": "<button class=\"test0\"title=\"test1\" disabled value=test2>adsf</button>",
"expected": [
{
"event": "opentagname",
"data": [
"button"
]
},
{
"event": "attribute",
"data": [
"class",
"test0"
]
},
{
"event": "attribute",
"data": [
"title",
"test1"
]
},
{
"event": "attribute",
"data": [
"disabled",
""
]
},
{
"event": "attribute",
"data": [
"value",
"test2"
]
},
{
"event": "opentag",
"data": [
"button",
{
"class": "test0",
"title": "test1",
"disabled": "",
"value": "test2"
}
]
},
{
"event": "text",
"data": [
"adsf"
]
},
{
"event": "closetag",
"data": [
"button"
]
}
]
}

View File

@@ -0,0 +1,52 @@
{
"name": "crazy attribute",
"options": {
"handler": {},
"parser": {}
},
"html": "<p < = '' FAIL>stuff</p><a",
"expected": [
{
"event": "opentagname",
"data": [
"p"
]
},
{
"event": "attribute",
"data": [
"<",
""
]
},
{
"event": "attribute",
"data": [
"fail",
""
]
},
{
"event": "opentag",
"data": [
"p",
{
"<": "",
"fail": ""
}
]
},
{
"event": "text",
"data": [
"stuff"
]
},
{
"event": "closetag",
"data": [
"p"
]
}
]
}

View File

@@ -0,0 +1,54 @@
{
"name": "Scripts creating other scripts",
"options": {
"handler": {},
"parser": {}
},
"html": "<p><script>var str = '<script></'+'script>';</script></p>",
"expected": [
{
"event": "opentagname",
"data": [
"p"
]
},
{
"event": "opentag",
"data": [
"p",
{}
]
},
{
"event": "opentagname",
"data": [
"script"
]
},
{
"event": "opentag",
"data": [
"script",
{}
]
},
{
"event": "text",
"data": [
"var str = '<script></'+'script>';"
]
},
{
"event": "closetag",
"data": [
"script"
]
},
{
"event": "closetag",
"data": [
"p"
]
}
]
}

View File

@@ -0,0 +1,20 @@
{
"name": "Long comment ending",
"options": {
"handler": {},
"parser": {}
},
"html": "<meta id='before'><!-- text ---><meta id='after'>",
"expected": [
{ "event": "opentagname", "data": [ "meta" ] },
{ "event": "attribute", "data": [ "id", "before" ] },
{ "event": "opentag", "data": [ "meta", {"id": "before"} ] },
{ "event": "closetag", "data": [ "meta" ] },
{ "event": "comment", "data": [ " text -" ] },
{ "event": "commentend", "data": [] },
{ "event": "opentagname", "data": [ "meta" ] },
{ "event": "attribute", "data": [ "id", "after" ] },
{ "event": "opentag", "data": [ "meta", {"id": "after"} ] },
{ "event": "closetag", "data": [ "meta" ] }
]
}

View File

@@ -0,0 +1,22 @@
{
"name": "Long CDATA ending",
"options": {
"handler": {},
"parser": {"xmlMode": true}
},
"html": "<before /><tag><![CDATA[ text ]]]></tag><after />",
"expected": [
{ "event": "opentagname", "data": [ "before" ] },
{ "event": "opentag", "data": [ "before", {} ] },
{ "event": "closetag", "data": [ "before" ] },
{ "event": "opentagname", "data": [ "tag" ] },
{ "event": "opentag", "data": [ "tag", {} ] },
{ "event": "cdatastart", "data": [] },
{ "event": "text", "data": [ " text ]" ] },
{ "event": "cdataend", "data": [] },
{ "event": "closetag", "data": [ "tag" ] },
{ "event": "opentagname", "data": [ "after" ] },
{ "event": "opentag", "data": [ "after", {} ] },
{ "event": "closetag", "data": [ "after" ] }
]
}

View File

@@ -0,0 +1,27 @@
{
"name": "Implicit open p and br tags",
"options": {
"handler": {},
"parser": {}
},
"html": "<div>Hallo</p>World</br></ignore></div></p></br>",
"expected": [
{ "event": "opentagname", "data": [ "div" ] },
{ "event": "opentag", "data": [ "div", {} ] },
{ "event": "text", "data": [ "Hallo" ] },
{ "event": "opentagname", "data": [ "p" ] },
{ "event": "opentag", "data": [ "p", {} ] },
{ "event": "closetag", "data": [ "p" ] },
{ "event": "text", "data": [ "World" ] },
{ "event": "opentagname", "data": [ "br" ] },
{ "event": "opentag", "data": [ "br", {} ] },
{ "event": "closetag", "data": [ "br" ] },
{ "event": "closetag", "data": [ "div" ] },
{ "event": "opentagname", "data": [ "p" ] },
{ "event": "opentag", "data": [ "p", {} ] },
{ "event": "closetag", "data": [ "p" ] },
{ "event": "opentagname", "data": [ "br" ] },
{ "event": "opentag", "data": [ "br", {} ] },
{ "event": "closetag", "data": [ "br" ] }
]
}

View File

@@ -0,0 +1,16 @@
{
"name": "lt followed by whitespace",
"options": {
"handler": {},
"parser": {}
},
"html": "a < b",
"expected": [
{
"event": "text",
"data": [
"a < b"
]
}
]
}

View File

@@ -0,0 +1,45 @@
{
"name": "double attribute",
"options": {
"handler": {},
"parser": {}
},
"html": "<h1 class=test class=boo></h1>",
"expected": [
{
"event": "opentagname",
"data": [
"h1"
]
},
{
"event": "attribute",
"data": [
"class",
"test"
]
},
{
"event": "attribute",
"data": [
"class",
"boo"
]
},
{
"event": "opentag",
"data": [
"h1",
{
"class": "test"
}
]
},
{
"event": "closetag",
"data": [
"h1"
]
}
]
}

View File

@@ -0,0 +1,16 @@
{
"name": "numeric entities",
"options": {
"handler": {},
"parser": {"decodeEntities": true}
},
"html": "&#x61;&#x62&#99;&#100&#x66g&#x;&#x68",
"expected": [
{
"event": "text",
"data": [
"abcdfg&#x;h"
]
}
]
}

View File

@@ -0,0 +1,16 @@
{
"name": "legacy entities",
"options": {
"handler": {},
"parser": {"decodeEntities": true}
},
"html": "&AMPel&iacutee&ampeer;s&lter",
"expected": [
{
"event": "text",
"data": [
"&el\u00EDe&eer;s<er"
]
}
]
}

View File

@@ -0,0 +1,16 @@
{
"name": "named entities",
"options": {
"handler": {},
"parser": {"decodeEntities": true}
},
"html": "&amp;el&lt;er&CounterClockwiseContourIntegral;foo&bar",
"expected": [
{
"event": "text",
"data": [
"&el<er\u2233foo&bar"
]
}
]
}

View File

@@ -0,0 +1,16 @@
{
"name": "xml entities",
"options": {
"handler": {},
"parser": {"decodeEntities": true, "xmlMode": true}
},
"html": "&amp;&gt;&amp&lt;&uuml;&#x61;&#x62&#99;&#100&#101",
"expected": [
{
"event": "text",
"data": [
"&>&amp<&uuml;a&#x62c&#100&#101"
]
}
]
}

View File

@@ -0,0 +1,38 @@
{
"name": "entity in attribute",
"options": {
"handler": {},
"parser": {"decodeEntities": true}
},
"html": "<a href='http://example.com/p&#x61;ge?param=value&param2&param3=&lt;val&; & &'>",
"expected": [
{
"event": "opentagname",
"data": [
"a"
]
},
{
"event": "attribute",
"data": [
"href",
"http://example.com/page?param=value&param2&param3=<val&; & &"
]
},
{
"event": "opentag",
"data": [
"a",
{
"href": "http://example.com/page?param=value&param2&param3=<val&; & &"
}
]
},
{
"event": "closetag",
"data": [
"a"
]
}
]
}

View File

@@ -0,0 +1,41 @@
{
"name": "double brackets",
"options": {
"handler": {},
"parser": {}
},
"html": "<<princess-purpose>>testing</princess-purpose>",
"expected": [
{
"event": "text",
"data": [
"<"
]
},
{
"event": "opentagname",
"data": [
"princess-purpose"
]
},
{
"event": "opentag",
"data": [
"princess-purpose",
{}
]
},
{
"event": "text",
"data": [
">testing"
]
},
{
"event": "closetag",
"data": [
"princess-purpose"
]
}
]
}

View File

@@ -0,0 +1,16 @@
{
"name": "legacy entities",
"options": {
"handler": {},
"parser": {"decodeEntities": true}
},
"html": "M&M",
"expected": [
{
"event": "text",
"data": [
"M&M"
]
}
]
}

View File

@@ -0,0 +1,133 @@
{
"name": "Special special tags",
"options": {},
"html": "<sCriPT></scripter</soo</sCript><STyLE></styler</STylE><sCiPt><stylee><scriptee><soo>",
"expected": [
{
"event": "opentagname",
"data": [
"script"
]
},
{
"event": "opentag",
"data": [
"script",
{}
]
},
{
"event": "text",
"data": [
"</scripter</soo"
]
},
{
"event": "closetag",
"data": [
"script"
]
},
{
"event": "opentagname",
"data": [
"style"
]
},
{
"event": "opentag",
"data": [
"style",
{}
]
},
{
"event": "text",
"data": [
"</styler"
]
},
{
"event": "closetag",
"data": [
"style"
]
},
{
"event": "opentagname",
"data": [
"scipt"
]
},
{
"event": "opentag",
"data": [
"scipt",
{}
]
},
{
"event": "opentagname",
"data": [
"stylee"
]
},
{
"event": "opentag",
"data": [
"stylee",
{}
]
},
{
"event": "opentagname",
"data": [
"scriptee"
]
},
{
"event": "opentag",
"data": [
"scriptee",
{}
]
},
{
"event": "opentagname",
"data": [
"soo"
]
},
{
"event": "opentag",
"data": [
"soo",
{}
]
},
{
"event": "closetag",
"data": [
"soo"
]
},
{
"event": "closetag",
"data": [
"scriptee"
]
},
{
"event": "closetag",
"data": [
"stylee"
]
},
{
"event": "closetag",
"data": [
"scipt"
]
}
]
}

View File

@@ -0,0 +1,13 @@
{
"name": "Empty tag name",
"options": {},
"html": "< ></ >",
"expected": [
{
"event": "text",
"data": [
"< ></ >"
]
}
]
}

View File

@@ -0,0 +1,35 @@
{
"name": "Not quite closed",
"options": {},
"html": "<foo /bar></foo bar>",
"expected": [
{
"event": "opentagname",
"data": [
"foo"
]
},
{
"event": "attribute",
"data": [
"bar",
""
]
},
{
"event": "opentag",
"data": [
"foo",
{
"bar": ""
}
]
},
{
"event": "closetag",
"data": [
"foo"
]
}
]
}

View File

@@ -0,0 +1,62 @@
{
"name": "Entities in attributes",
"options": {
"handler": {},
"parser": {"decodeEntities": true}
},
"html": "<foo bar=&amp; baz=\"&amp;\" boo='&amp;' noo=>",
"expected": [
{
"event": "opentagname",
"data": [
"foo"
]
},
{
"event": "attribute",
"data": [
"bar",
"&"
]
},
{
"event": "attribute",
"data": [
"baz",
"&"
]
},
{
"event": "attribute",
"data": [
"boo",
"&"
]
},
{
"event": "attribute",
"data": [
"noo",
""
]
},
{
"event": "opentag",
"data": [
"foo",
{
"bar": "&",
"baz": "&",
"boo": "&",
"noo": ""
}
]
},
{
"event": "closetag",
"data": [
"foo"
]
}
]
}

View File

@@ -0,0 +1,9 @@
{
"name": "CDATA in HTML",
"options": {},
"html": "<![CDATA[ foo ]]>",
"expected": [
{ "event": "comment", "data": [ "[CDATA[ foo ]]" ] },
{ "event": "commentend", "data": [] }
]
}

View File

@@ -0,0 +1,18 @@
{
"name": "Comment edge-cases",
"options": {},
"html": "<!-foo><!-- --- --><!--foo",
"expected": [
{
"event": "processinginstruction",
"data": [
"!-foo",
"!-foo"
]
},
{ "event": "comment", "data": [ " --- " ] },
{ "event": "commentend", "data": [] },
{ "event": "comment", "data": [ "foo" ] },
{ "event": "commentend", "data": [] }
]
}

View File

@@ -0,0 +1,22 @@
{
"name": "CDATA edge-cases",
"options": {
"parser": {"recognizeCDATA": true}
},
"html": "<![CDATA><![CDATA[[]]sdaf]]><![CDATA[foo",
"expected": [
{
"event": "processinginstruction",
"data": [
"![cdata",
"![CDATA"
]
},
{ "event": "cdatastart", "data": [] },
{ "event": "text", "data": [ "[]]sdaf" ] },
{ "event": "cdataend", "data": [] },
{ "event": "cdatastart", "data": [] },
{ "event": "text", "data": [ "foo" ] },
{ "event": "cdataend", "data": [] }
]
}

View File

@@ -0,0 +1,9 @@
{
"name": "Comment false ending",
"options": {},
"html": "<!-- a-b-> -->",
"expected": [
{ "event": "comment", "data": [ " a-b-> " ] },
{ "event": "commentend", "data": [] }
]
}

34
project3/node_modules/htmlparser2/test/Feeds/01-rss.js generated vendored Normal file
View File

@@ -0,0 +1,34 @@
exports.name = "RSS (2.0)";
exports.file = "/RSS_Example.xml";
exports.expected = {
type: "rss",
id: "",
title: "Liftoff News",
link: "http://liftoff.msfc.nasa.gov/",
description: "Liftoff to Space Exploration.",
updated: new Date("Tue, 10 Jun 2003 09:41:01 GMT"),
author: "editor@example.com",
items: [{
id: "http://liftoff.msfc.nasa.gov/2003/06/03.html#item573",
title: "Star City",
link: "http://liftoff.msfc.nasa.gov/news/2003/news-starcity.asp",
description: "How do Americans get ready to work with Russians aboard the International Space Station? They take a crash course in culture, language and protocol at Russia's &lt;a href=\"http://howe.iki.rssi.ru/GCTC/gctc_e.htm\"&gt;Star City&lt;/a&gt;.",
pubDate: new Date("Tue, 03 Jun 2003 09:39:21 GMT")
}, {
id: "http://liftoff.msfc.nasa.gov/2003/05/30.html#item572",
description: "Sky watchers in Europe, Asia, and parts of Alaska and Canada will experience a &lt;a href=\"http://science.nasa.gov/headlines/y2003/30may_solareclipse.htm\"&gt;partial eclipse of the Sun&lt;/a&gt; on Saturday, May 31st.",
pubDate: new Date("Fri, 30 May 2003 11:06:42 GMT")
}, {
id: "http://liftoff.msfc.nasa.gov/2003/05/27.html#item571",
title: "The Engine That Does More",
link: "http://liftoff.msfc.nasa.gov/news/2003/news-VASIMR.asp",
description: "Before man travels to Mars, NASA hopes to design new engines that will let us fly through the Solar System more quickly. The proposed VASIMR engine would do that.",
pubDate: new Date("Tue, 27 May 2003 08:37:32 GMT")
}, {
id: "http://liftoff.msfc.nasa.gov/2003/05/20.html#item570",
title: "Astronauts' Dirty Laundry",
link: "http://liftoff.msfc.nasa.gov/news/2003/news-laundry.asp",
description: "Compared to earlier spacecraft, the International Space Station has many luxuries, but laundry facilities are not one of them. Instead, astronauts have other options.",
pubDate: new Date("Tue, 20 May 2003 08:56:02 GMT")
}]
};

View File

@@ -0,0 +1,18 @@
exports.name = "Atom (1.0)";
exports.file = "/Atom_Example.xml";
exports.expected = {
type: "atom",
id: "urn:uuid:60a76c80-d399-11d9-b91C-0003939e0af6",
title: "Example Feed",
link: "http://example.org/feed/",
description: "A subtitle.",
updated: new Date("2003-12-13T18:30:02Z"),
author: "johndoe@example.com",
items: [{
id: "urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a",
title: "Atom-Powered Robots Run Amok",
link: "http://example.org/2003/12/13/atom03",
description: "Some content.",
pubDate: new Date("2003-12-13T18:30:02Z")
}]
};

20
project3/node_modules/htmlparser2/test/Feeds/03-rdf.js generated vendored Normal file
View File

@@ -0,0 +1,20 @@
exports.name = "RDF test";
exports.file = "/RDF_Example.xml";
exports.expected = {
"type": "rdf",
"id": "",
"title": "craigslist | all community in SF bay area",
"link": "http://sfbay.craigslist.org/ccc/",
"items": [
{
"title": "Music Equipment Repair and Consignment",
"link": "http://sfbay.craigslist.org/sby/muc/2681301534.html",
"description": "San Jose Rock Shop offers musical instrument repair and consignment! (408) 215-2065<br> <br> We are pleased to announce our NEW LOCATION: 1199 N 5th st. San Jose, ca 95112. Please call ahead, by appointment only.<br> <br> Recently featured by Metro Newspaper in their 2011 Best of the Silicon Valley edition see it online here:<br> <a href=\"http://www.metroactive.com/best-of-silicon-valley/2011/music-nightlife/editor-picks.html\" rel=\"nofollow\">http://www.metroactive.com/best-of-silicon-valley/2011/music-nightlife/editor-picks.html</a><br> <br> Guitar Set up (acoustic and electronic) $40!<!-- END CLTAGS -->"
},
{
"title": "Ride Offered - Oakland/BART to LA/SFV - TODAY 3PM 11/04 (oakland north / temescal)",
"link": "http://sfbay.craigslist.org/eby/rid/2685010755.html",
"description": "Im offering a lift for up to two people from Oakland (or near any BART station in the East Bay/580/880 Corridor, or San Jose/Morgan Hill, Gilroy) to the San Fernando Valley / Los Angeles area. Specifically, Im leaving from Oakland between 2:30 and 3:00pm (this is flexible, but if I leave too late my girlfriend will kill me), and heading to Woodland Hills via the 580, I-5, 405, and 101.<!-- END CLTAGS -->"
}
]
};

View File

@@ -0,0 +1,83 @@
{
"name": "Basic html",
"options": {},
"file": "Basic.html",
"expected": [
{
"event": "processinginstruction",
"data": [
"!doctype",
"!DOCTYPE html"
]
},
{
"event": "opentagname",
"data": [
"html"
]
},
{
"event": "opentag",
"data": [
"html",
{}
]
},
{
"event": "opentagname",
"data": [
"title"
]
},
{
"event": "opentag",
"data": [
"title",
{}
]
},
{
"event": "text",
"data": [
"The Title"
]
},
{
"event": "closetag",
"data": [
"title"
]
},
{
"event": "opentagname",
"data": [
"body"
]
},
{
"event": "opentag",
"data": [
"body",
{}
]
},
{
"event": "text",
"data": [
"Hello world"
]
},
{
"event": "closetag",
"data": [
"body"
]
},
{
"event": "closetag",
"data": [
"html"
]
}
]
}

1093
project3/node_modules/htmlparser2/test/Stream/02-RSS.json generated vendored Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,678 @@
{
"name": "Atom feed",
"options": {"xmlMode": true},
"file": "Atom_Example.xml",
"expected": [
{
"event": "processinginstruction",
"data": [
"?xml",
"?xml version=\"1.0\" encoding=\"utf-8\"?"
]
},
{
"event": "text",
"data": [
"\n"
]
},
{
"event": "comment",
"data": [
" http://en.wikipedia.org/wiki/Atom_%28standard%29 "
]
},
{
"event": "commentend",
"data": []
},
{
"event": "text",
"data": [
"\n"
]
},
{
"event": "opentagname",
"data": [
"feed"
]
},
{
"event": "attribute",
"data": [
"xmlns",
"http://www.w3.org/2005/Atom"
]
},
{
"event": "opentag",
"data": [
"feed",
{
"xmlns": "http://www.w3.org/2005/Atom"
}
]
},
{
"event": "text",
"data": [
"\n\t"
]
},
{
"event": "opentagname",
"data": [
"title"
]
},
{
"event": "opentag",
"data": [
"title",
{}
]
},
{
"event": "text",
"data": [
"Example Feed"
]
},
{
"event": "closetag",
"data": [
"title"
]
},
{
"event": "text",
"data": [
"\n\t"
]
},
{
"event": "opentagname",
"data": [
"subtitle"
]
},
{
"event": "opentag",
"data": [
"subtitle",
{}
]
},
{
"event": "text",
"data": [
"A subtitle."
]
},
{
"event": "closetag",
"data": [
"subtitle"
]
},
{
"event": "text",
"data": [
"\n\t"
]
},
{
"event": "opentagname",
"data": [
"link"
]
},
{
"event": "attribute",
"data": [
"href",
"http://example.org/feed/"
]
},
{
"event": "attribute",
"data": [
"rel",
"self"
]
},
{
"event": "opentag",
"data": [
"link",
{
"href": "http://example.org/feed/",
"rel": "self"
}
]
},
{
"event": "closetag",
"data": [
"link"
]
},
{
"event": "text",
"data": [
"\n\t"
]
},
{
"event": "opentagname",
"data": [
"link"
]
},
{
"event": "attribute",
"data": [
"href",
"http://example.org/"
]
},
{
"event": "opentag",
"data": [
"link",
{
"href": "http://example.org/"
}
]
},
{
"event": "closetag",
"data": [
"link"
]
},
{
"event": "text",
"data": [
"\n\t"
]
},
{
"event": "opentagname",
"data": [
"id"
]
},
{
"event": "opentag",
"data": [
"id",
{}
]
},
{
"event": "text",
"data": [
"urn:uuid:60a76c80-d399-11d9-b91C-0003939e0af6"
]
},
{
"event": "closetag",
"data": [
"id"
]
},
{
"event": "text",
"data": [
"\n\t"
]
},
{
"event": "opentagname",
"data": [
"updated"
]
},
{
"event": "opentag",
"data": [
"updated",
{}
]
},
{
"event": "text",
"data": [
"2003-12-13T18:30:02Z"
]
},
{
"event": "closetag",
"data": [
"updated"
]
},
{
"event": "text",
"data": [
"\n\t"
]
},
{
"event": "opentagname",
"data": [
"author"
]
},
{
"event": "opentag",
"data": [
"author",
{}
]
},
{
"event": "text",
"data": [
"\n\t\t"
]
},
{
"event": "opentagname",
"data": [
"name"
]
},
{
"event": "opentag",
"data": [
"name",
{}
]
},
{
"event": "text",
"data": [
"John Doe"
]
},
{
"event": "closetag",
"data": [
"name"
]
},
{
"event": "text",
"data": [
"\n\t\t"
]
},
{
"event": "opentagname",
"data": [
"email"
]
},
{
"event": "opentag",
"data": [
"email",
{}
]
},
{
"event": "text",
"data": [
"johndoe@example.com"
]
},
{
"event": "closetag",
"data": [
"email"
]
},
{
"event": "text",
"data": [
"\n\t"
]
},
{
"event": "closetag",
"data": [
"author"
]
},
{
"event": "text",
"data": [
"\n\n\t"
]
},
{
"event": "opentagname",
"data": [
"entry"
]
},
{
"event": "opentag",
"data": [
"entry",
{}
]
},
{
"event": "text",
"data": [
"\n\t\t"
]
},
{
"event": "opentagname",
"data": [
"title"
]
},
{
"event": "opentag",
"data": [
"title",
{}
]
},
{
"event": "text",
"data": [
"Atom-Powered Robots Run Amok"
]
},
{
"event": "closetag",
"data": [
"title"
]
},
{
"event": "text",
"data": [
"\n\t\t"
]
},
{
"event": "opentagname",
"data": [
"link"
]
},
{
"event": "attribute",
"data": [
"href",
"http://example.org/2003/12/13/atom03"
]
},
{
"event": "opentag",
"data": [
"link",
{
"href": "http://example.org/2003/12/13/atom03"
}
]
},
{
"event": "closetag",
"data": [
"link"
]
},
{
"event": "text",
"data": [
"\n\t\t"
]
},
{
"event": "opentagname",
"data": [
"link"
]
},
{
"event": "attribute",
"data": [
"rel",
"alternate"
]
},
{
"event": "attribute",
"data": [
"type",
"text/html"
]
},
{
"event": "attribute",
"data": [
"href",
"http://example.org/2003/12/13/atom03.html"
]
},
{
"event": "opentag",
"data": [
"link",
{
"rel": "alternate",
"type": "text/html",
"href": "http://example.org/2003/12/13/atom03.html"
}
]
},
{
"event": "closetag",
"data": [
"link"
]
},
{
"event": "text",
"data": [
"\n\t\t"
]
},
{
"event": "opentagname",
"data": [
"link"
]
},
{
"event": "attribute",
"data": [
"rel",
"edit"
]
},
{
"event": "attribute",
"data": [
"href",
"http://example.org/2003/12/13/atom03/edit"
]
},
{
"event": "opentag",
"data": [
"link",
{
"rel": "edit",
"href": "http://example.org/2003/12/13/atom03/edit"
}
]
},
{
"event": "closetag",
"data": [
"link"
]
},
{
"event": "text",
"data": [
"\n\t\t"
]
},
{
"event": "opentagname",
"data": [
"id"
]
},
{
"event": "opentag",
"data": [
"id",
{}
]
},
{
"event": "text",
"data": [
"urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a"
]
},
{
"event": "closetag",
"data": [
"id"
]
},
{
"event": "text",
"data": [
"\n\t\t"
]
},
{
"event": "opentagname",
"data": [
"updated"
]
},
{
"event": "opentag",
"data": [
"updated",
{}
]
},
{
"event": "text",
"data": [
"2003-12-13T18:30:02Z"
]
},
{
"event": "closetag",
"data": [
"updated"
]
},
{
"event": "text",
"data": [
"\n\t\t"
]
},
{
"event": "opentagname",
"data": [
"content"
]
},
{
"event": "attribute",
"data": [
"type",
"html"
]
},
{
"event": "opentag",
"data": [
"content",
{
"type": "html"
}
]
},
{
"event": "opentagname",
"data": [
"p"
]
},
{
"event": "opentag",
"data": [
"p",
{}
]
},
{
"event": "text",
"data": [
"Some content."
]
},
{
"event": "closetag",
"data": [
"p"
]
},
{
"event": "closetag",
"data": [
"content"
]
},
{
"event": "text",
"data": [
"\n\t"
]
},
{
"event": "closetag",
"data": [
"entry"
]
},
{
"event": "text",
"data": [
"\n\n"
]
},
{
"event": "closetag",
"data": [
"feed"
]
},
{
"event": "text",
"data": [
"\n"
]
}
]
}

1399
project3/node_modules/htmlparser2/test/Stream/04-RDF.json generated vendored Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,354 @@
{
"name": "Attributes",
"options": {},
"file": "Attributes.html",
"expected": [
{
"event": "processinginstruction",
"data": [
"!doctype",
"!doctype html"
]
},
{
"event": "text",
"data": [
"\n"
]
},
{
"event": "opentagname",
"data": [
"html"
]
},
{
"event": "opentag",
"data": [
"html",
{}
]
},
{
"event": "text",
"data": [
"\n"
]
},
{
"event": "opentagname",
"data": [
"head"
]
},
{
"event": "opentag",
"data": [
"head",
{}
]
},
{
"event": "text",
"data": [
"\n\t"
]
},
{
"event": "opentagname",
"data": [
"title"
]
},
{
"event": "opentag",
"data": [
"title",
{}
]
},
{
"event": "text",
"data": [
"Attributes test"
]
},
{
"event": "closetag",
"data": [
"title"
]
},
{
"event": "text",
"data": [
"\n"
]
},
{
"event": "closetag",
"data": [
"head"
]
},
{
"event": "text",
"data": [
"\n"
]
},
{
"event": "opentagname",
"data": [
"body"
]
},
{
"event": "opentag",
"data": [
"body",
{}
]
},
{
"event": "text",
"data": [
"\n\t"
]
},
{
"event": "comment",
"data": [
" Normal attributes "
]
},
{
"event": "commentend",
"data": []
},
{
"event": "text",
"data": [
"\n\t"
]
},
{
"event": "opentagname",
"data": [
"button"
]
},
{
"event": "attribute",
"data": [
"id",
"test0"
]
},
{
"event": "attribute",
"data": [
"class",
"value0"
]
},
{
"event": "attribute",
"data": [
"title",
"value1"
]
},
{
"event": "opentag",
"data": [
"button",
{
"id": "test0",
"class": "value0",
"title": "value1"
}
]
},
{
"event": "text",
"data": [
"class=\"value0\" title=\"value1\""
]
},
{
"event": "closetag",
"data": [
"button"
]
},
{
"event": "text",
"data": [
"\n\n\t"
]
},
{
"event": "comment",
"data": [
" Attributes with no quotes or value "
]
},
{
"event": "commentend",
"data": []
},
{
"event": "text",
"data": [
"\n\t"
]
},
{
"event": "opentagname",
"data": [
"button"
]
},
{
"event": "attribute",
"data": [
"id",
"test1"
]
},
{
"event": "attribute",
"data": [
"class",
"value2"
]
},
{
"event": "attribute",
"data": [
"disabled",
""
]
},
{
"event": "opentag",
"data": [
"button",
{
"id": "test1",
"class": "value2",
"disabled": ""
}
]
},
{
"event": "text",
"data": [
"class=value2 disabled"
]
},
{
"event": "closetag",
"data": [
"button"
]
},
{
"event": "text",
"data": [
"\n\n\t"
]
},
{
"event": "comment",
"data": [
" Attributes with no space between them. No valid, but accepted by the browser "
]
},
{
"event": "commentend",
"data": []
},
{
"event": "text",
"data": [
"\n\t"
]
},
{
"event": "opentagname",
"data": [
"button"
]
},
{
"event": "attribute",
"data": [
"id",
"test2"
]
},
{
"event": "attribute",
"data": [
"class",
"value4"
]
},
{
"event": "attribute",
"data": [
"title",
"value5"
]
},
{
"event": "opentag",
"data": [
"button",
{
"id": "test2",
"class": "value4",
"title": "value5"
}
]
},
{
"event": "text",
"data": [
"class=\"value4\"title=\"value5\""
]
},
{
"event": "closetag",
"data": [
"button"
]
},
{
"event": "text",
"data": [
"\n"
]
},
{
"event": "closetag",
"data": [
"body"
]
},
{
"event": "text",
"data": [
"\n"
]
},
{
"event": "closetag",
"data": [
"html"
]
}
]
}

75
project3/node_modules/htmlparser2/test/api.js generated vendored Normal file
View File

@@ -0,0 +1,75 @@
var htmlparser2 = require(".."),
assert = require("assert");
describe("API", function(){
it("should load all modules", function(){
var Stream = require("../lib/Stream.js");
assert.strictEqual(htmlparser2.Stream, Stream, "should load module");
assert.strictEqual(htmlparser2.Stream, Stream, "should load it again (cache)");
var ProxyHandler = require("../lib/ProxyHandler.js");
assert.strictEqual(htmlparser2.ProxyHandler, ProxyHandler, "should load module");
assert.strictEqual(htmlparser2.ProxyHandler, ProxyHandler, "should load it again (cache)");
});
it("should work without callbacks", function(){
var p = new htmlparser2.Parser(null, {xmlMode: true, lowerCaseAttributeNames: true});
p.end("<a foo><bar></a><!-- --><![CDATA[]]]><?foo?><!bar><boo/>boohay");
p.write("foo");
//check for an error
p.end();
var err = false;
p._cbs.onerror = function(){ err = true; };
p.write("foo");
assert(err);
err = false;
p.end();
assert(err);
p.reset();
//remove method
p._cbs.onopentag = function(){};
p.write("<a foo");
p._cbs.onopentag = null;
p.write(">");
//pause/resume
var processed = false;
p._cbs.ontext = function(t){
assert.equal(t, "foo");
processed = true;
};
p.pause();
p.write("foo");
assert(!processed);
p.resume();
assert(processed);
processed = false;
p.pause();
assert(!processed);
p.resume();
assert(!processed);
p.pause();
p.end("foo");
assert(!processed);
p.resume();
assert(processed);
});
it("should update the position", function(){
var p = new htmlparser2.Parser(null);
p.write("foo");
assert.equal(p.startIndex, 0);
p.write("<bar>");
assert.equal(p.startIndex, 3);
});
});

83
project3/node_modules/htmlparser2/test/test-helper.js generated vendored Normal file
View File

@@ -0,0 +1,83 @@
var htmlparser2 = require(".."),
fs = require("fs"),
path = require("path"),
assert = require("assert"),
Parser = htmlparser2.Parser,
CollectingHandler = htmlparser2.CollectingHandler;
exports.writeToParser = function(handler, options, data){
var parser = new Parser(handler, options);
//first, try to run the test via chunks
for(var i = 0; i < data.length; i++){
parser.write(data.charAt(i));
}
parser.end();
//then parse everything
parser.parseComplete(data);
};
//returns a tree structure
exports.getEventCollector = function(cb){
var handler = new CollectingHandler({onerror: cb, onend: onend});
return handler;
function onend(){
cb(null, handler.events.reduce(eventReducer, []));
}
};
function eventReducer(events, arr){
if(arr[0] === "onerror" || arr[0] === "onend");
else if(arr[0] === "ontext" && events.length && events[events.length - 1].event === "text"){
events[events.length - 1].data[0] += arr[1];
} else {
events.push({
event: arr[0].substr(2),
data: arr.slice(1)
});
}
return events;
}
function getCallback(expected, done){
var repeated = false;
return function(err, actual){
assert.ifError(err);
try {
assert.deepEqual(expected, actual, "didn't get expected output");
} catch(e){
e.expected = JSON.stringify(expected, null, 2);
e.actual = JSON.stringify(actual, null, 2);
throw e;
}
if(repeated) done();
else repeated = true;
};
}
exports.mochaTest = function(name, root, test){
describe(name, readDir);
function readDir(){
var dir = path.join(root, name);
fs
.readdirSync(dir)
.filter(RegExp.prototype.test, /^[^\._]/) //ignore all files with a leading dot or underscore
.map(function(name){
return path.join(dir, name);
})
.map(require)
.forEach(runTest);
}
function runTest(file){
it(file.name, function(done){
test(file, getCallback(file.expected, done));
});
}
};