REBOL [
	Title: "XML/DOM"
	Date: 15-Dec-2008
	Author: "Christopher Ross-Gill"
	Type: 'module
	Exports: [load-xml]
	Version: 0.1.2
	File: %xml-dom.r
	Purpose: {A rudimentary in-memory XML interpreter and interface.}
	Notes: {
		Features: utilizes REBOL datatypes to represent XML structure;
		DOM methods for extraction; self-contained and works with /Base;
		prettified block structure.

		Caveat: destructive - discards whitespace and comments; does not
		preserve empty tags vs. matching tags with no content; NOT an
		implementation of W3 DOM, only a loosely inspired subset.

		ToDo: saving.
	}
	Library: [
		Level: 'intermediate
		Platform: 'all
		Type: [module dialect function]
		Domain: [html markup parse text web xml]
		License: 'cc-by-sa
	]
	Usage: [
		test: {Text }
		load-xml test
		doc: load-xml/dom test
		doc/get-by-tag 
		c: doc/get-by-id "d"
		c/text
		doc/tree//
	]
]

load-xml: use [
	xml! doc make-node
	space word decode entity text name attribute element header content
][
	xml!: context [
		name: value: tree: branch: position: none

		flatten: does [""]

		get-by-tag: func [tag /local result rule mk][
			result: copy []
			parse tree rule: [
				some [
					opt [mk: tag skip (append result make-node mk) :mk]
					skip [into rule | skip]
				]
			] result
		]

		get-by-id: func [id /local result rule mk][
			parse tree rule: [
				some [
					  mk: tag! into [thru /id id to end] (result: make-node mk) end skip
					| skip [into rule | skip]
				]
			] result
		]

		text: has [result][
			case/all [
				string? value [result: value]
				block? value [
					result: all [
						parse value [any [refinement! skip] # set result string!]
						result
					]
				]
				string? result [trim/auto copy result]
			]
		]

		get: func [name [refinement! tag!] /local result mk][
			if parse tree [
				tag! into [
					any [
						  mk: name [block! (result: make-node mk) | set result skip] to end
						| [refinement! | tag! | issue!] skip
					]
				]
			][result]
		]

		sibling: func [/before /after][
			case [
				all [after find [tag! issue!] type?/word position/3] [
					make-node skip position 2
				]
 				all [before find [tag! issue!] type?/word position/-2] [
					make-node skip position -2
				]
			]
		]

		parent: "Need position stack"

		children: has [result mk][
			result: copy []
			parse case [
				block? value [value] string? value [reduce [# value]] none? value [[]]
			][
				any [refinement! skip]
				any [mk: [tag! | issue!] skip (append result make-node mk)]
			]
			result
		]

		clone: does [make-node tree]

		append-child: func [name data /local at][
			case [
				none? position/2 [value: tree/2: position/2: copy []]
				string? position/2 [
					new-line value: tree/2: position/2: compose [# (position/2)] true
				]
			]

			either refinement? name [
				parse position/2 [any [refinement! skip] at:]
			][at: tail position/2]

			insert at reduce [name data]
			new-line at true
		]

		append-text: func [text][
			case [
				none? position/2 [value: tree/2: position/2: text]
				string? position/2 [append position/2 text]
				# = pick tail position/2 -2 [append last position/2 text]
				block? position/2 [append-child # text]
			]
		]

		append-attr: func [name value][
			append-child to-refinement name value
		]
	]

	doc: make xml! [
		branch: make block! 10
		document: true
		new: does [clear branch tree: position: reduce ['document none]]

		open-tag: func [tag][
			insert/only branch position
			tree: position: append-child to-tag tag none
		]

		close-tag: func [tag][
			tag: to-tag tag
			while [tag <> position/1][
				probe reform ["No End Tag:" position/1]
				if empty? branch [make error! "End tag error!"]
				take branch
			]
			tree: position: take branch
		]
	]

	make-node: func [here /base][
		make either base [doc][xml!][
			position: here
			name: here/1
			value: here/2
			tree: reduce [name value]
		]
	]

	space: use [space][
		space: charset "^-^/^M "
		[some space]
	]

	word: use [w1 w+][
		w1: #[bitset! 64#{AAAAAAAAAAD+//+H/v//B/////////////////////8=}]
		w+: #[bitset! 64#{AAAAAABg/wP+//+H/v//B/////////////////////8=}]
		[w1 any w+]
	]

	decode: use [nm hx rf mk ex ns entity to-utf-char][
		nm: #[bitset! 64#{AAAAAAAA/wMAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=}]
		hx: #[bitset! 64#{AAAAAAAA/wN+AAAAfgAAAAAAAAAAAAAAAAAAAAAAAAA=}]
		ns: ["lt" 60 "gt" 62 "amp" 38 "quot" 34 "apos" 39]

		to-utf-char: use [os fc en][
			os: [0 192 224 240 248 252]
			fc: [1 64 4096 262144 16777216 1073741824]
			en: [127 2047 65535 2097151 67108863 2147483647]

			func [int [integer!] /local char][
				repeat ln 6 [
					if int <= en/:ln [
						char: reduce [os/:ln + to integer! (int / fc/:ln)]
						repeat ps ln - 1 [
							insert next char (to integer! int / fc/:ps) // 64 + 128
						]
						break
					]
				]

				to-string to-binary char
			]
		]

		entity: [
			mk: #"&" [
				  copy rf word ";" (rf: any [select ns rf 63])
				| #"#" [
					  #"x" copy rf 2 4 hx ";" (rf: to-integer to-issue rf)
					| copy rf 2 5 nm ";" (rf: to-integer rf)
				]
			] ex: (mk: change/part mk to-utf-char rf ex) :mk
		]

		func [text [string!]][
			if parse/all text [any [to "&" [entity | skip]] to end][text]
		]
	]

	entity: use [nm hx][
		nm: charset "0123456789"
		hx: charset "0123456789abcdefABCDEF"
		[#"&" [word | #"#" [1 5 nm | #"x" 1 4 hx]] ";" | #"&"]
	]

	text: use [char value][
		char: complement charset "^-^/^M &<"
		[
			copy value [
				opt space [char | entity]
				any [char | entity | space]
			] (doc/append-text decode value)
		]
	]

	name: [word opt [":" word]]

	attribute: use [q1 q2 attr value][
		q1: complement charset {"&<}
		q2: complement charset {&'<}
		[	space copy attr name opt space "=" opt space [
				; lone ampersand is 'loose' not 'strict'
				  {"} copy value any [q1 | entity | "&"] {"}
				| {'} copy value any [q2 | entity | "&"] {'}
			] (doc/append-attr attr decode any [value ""])
		]
	]

	element: use [tag value][
		[	#"<" [
				copy tag name (doc/open-tag tag) any attribute opt space [
					  "/>" (doc/close-tag tag)
					| #">" content ""
				]
				| #"!" [
					  "--" copy value to "-->" 3 skip ; (doc/append-child #comment value)
					| "[CDATA[" copy value to "]]>" 3 skip (doc/append-text value)
				]
			]
		]
	]

	header: [
		any [
			  space 
			| "<" ["?xml" thru "?>" | "!" ["--" thru "-->" | thru ">"] | "?" thru "?>"]
		]
	]

	content: [any [text | element | space]]

	load-xml: func [document /dom /local root][
		if any [file? document url? document][document: read document]
		root: doc/new
		parse/all/case document [header element to end]
		doc/tree: any [root/document []]
		doc/value: doc/tree/2
		either dom [make-node/base doc/tree][doc/tree]
	]
]