/
(                 # 1: start
(                 # 2: look for header...
(                 # 3: start
(ht|f)tps?\:        # 4: http or some variant, or
)|                # 3: end.
(www)             # 5: www.
)                 # done with header.
(?:               # start repeating group
([.\/:?=&-]+)     # 6: allowed punctuations
(                 # 9: start. chose one:
(((\s?)([a-z0-9]+) # a space and lower case chars 
)|                 #or
(\w+)             # 11: all word chars, but no spaces
)
)                 # 9: end.
)+                # end repeating group
)                 # end capture group 1
(\/?\.?\s?[A-Z]?)  # exclude this.
/
gmx