REBOL [
  Title: "Attribute Values Extractor"
  File: %ave.r
  Author: [ "HY" ]
  Date: 9-Jan-2006
  Purpose: "Extract attribute values from (HTML) tags"
  Library: [
    level: 'intermediate
    domain: [http web]
    license: none
    Platform: 'all
    Tested-under: none
    Type: 'function
    Support: none
  ]

  Comment: {
    Here are two functions: ex-att1 and ex-att2. The first has some
    limitations, and will return the worng text for the ACTION attribute
    on the example tag herein. The second is usually a tiny bit faster
    than the first, but in some cases much slower (like in the default
    benchmark test further down in this document). But the ex-att2 function
    RETURNS THE RIGHT ATTRIBUTE VALUE.
  }

]




ex-att1: func [str attr /local a b] [
  a: parse str "<> ='"
  while [b: find a "" remove b] []
  trim to-string select a attr
]



ex-att2: func [str attr /local ] [
  ; the following definition has to be inside this function since I do a comparison on 'attr
  attr-value-pair: [copy a attr-name eq copy v attr-value (either all [dbq = first v dbq = last v] [v: trim-quotes v]
[if all [sgq = first v sgq = last v] [v: trim-quotes v]] if (trim attr) = (trim a) [return trim v]) any ws x: ]
  parse/all str [thru-ws any [attr-only | attr-value-pair ] to end]
  ""
]





thru-ws: [thru #" " | thru tab | thru newline]
ws: [#" " | tab | newline]
to-ws: [to #" " | to newline | to tab]
not-eq: complement charset reduce "="
not-eq-or-ws: complement charset reduce [newline tab #" " #"="]
dbq: to-char {"}
sgq: #"'"

eq: [ any ws #"=" any ws ]
attr-value: [ [ dbq [ thru dbq | to end ] ] | [ sgq [ thru sgq | to end ] ] | [ to-ws any ws | to end ] ]
attr-name:  [ some not-eq-or-ws ]
attr-only: [copy a attr-name any ws [not-eq x: (x: back x) :x | end] ]


trim-quotes: func [str] [
  str: reverse next reverse str
  str: next str
]








tag: 
print tt: now/precise loop 5110000 [ ex-att1 tag "action" ] print join "ex-att1 used " difference now/precise tt print "^/===^/" print tt: now/precise loop 5110000 [ ex-att2 tag "action" ] print join "ex-att2 used " difference now/precise tt halt