import Foundation
let pattern = ##"@(?P<citekey>\w[\w:.#$%&\-+?<>~/]*\w+)"##
let regex = try! NSRegularExpression(pattern: pattern, options: .anchorsMatchLines)
let testString = ###"""
The [Pandoc manual](https://pandoc.org/MANUAL.html#citations) defines the following syntax for citation keys:
> The citation key must begin with a letter, digit, or `_`, and may contain alphanumerics, `_`, and internal punctuation characters (`:.#$%&-+?<>~/`). Here are some examples:
## Valid citekeys
Manubot supports citations like `@source:identifier`, where `source` is one of the options described below. The citekeys in this section are valid according to the Pandoc syntax.
1. DOI (Digital Object Identifier), cite like `@doi:10.15363/thinklab.4`.
Shortened versions of DOIs can be created at [shortdoi.org](http://shortdoi.org/).
shortDOIs begin with `10/` rather than `10.` and can also be cited.
For example, Manubot will expand `@doi:10/993` to the DOI above.
We suggest using shortDOIs to cite DOIs containing forbidden characters, such as `(` or `)`.
2. PubMed Central ID, cite like `@pmcid:PMC4497619`.
3. PubMed ID, cite like `@pmid:26158728`.
4. _arXiv_ ID, cite like `@arxiv:1508.06576v2`.
5. ISBN (International Standard Book Number), cite like `@isbn:9781339919881`.
6. URL / webpage, cite like `@url:https://nyti.ms/1QUgAt1`.
URL citations can be helpful if the above methods return incorrect metadata.
For example, `@doi:10.1038/ng.3834` [incorrectly handles](https://github.com/manubot/manubot/issues/158) the consortium name resulting in a blank author, while `@url:https://doi.org/10.1038/ng.3834` succeeds.
Similarly, `@url:https://doi.org/10.1101/142760` is a [workaround](https://github.com/manubot/manubot/issues/16) to set the journal name of bioRxiv preprints to _bioRxiv_.
7. Wikidata Items, cite like `@wikidata:Q50051684`.
Note that anyone can edit or add records on [Wikidata](https://www.wikidata.org), so users are encouraged to contribute metadata for hard-to-cite works to Wikidata as an alternative to using a `raw` citation.
8. For references that do not have any of the persistent identifiers above, use a raw citation like `@raw:old-manuscript`.
Metadata for raw citations must be provided manually.
Cite multiple items at once like:
```md
Here is a sentence with several citations [@doi:10.15363/thinklab.4; @pmid:26158728; @arxiv:1508.06576; @isbn:9780394603988].
```
More information at https://github.com/manubot/rootstock/blob/master/USAGE.md#citations
## Invalid citekeys
Citekeys in this section would be nice to support, but notice that they do not completely match the regex:
Citekey with parentheses @doi:10.1016/S0022-2836(05)80360-2
Citekey with closing slash @https://www.google.com/
Citekey with equal sign @https://openreview.net/forum?id=HkwoSDPgg
See https://github.com/jgm/pandoc/issues/6026 for discussion on a more flexible markdown syntax for citation keys.
"""###
let stringRange = NSRange(location: 0, length: testString.utf16.count)
let matches = regex.matches(in: testString, range: stringRange)
var result: [[String]] = []
for match in matches {
var groups: [String] = []
for rangeIndex in 1 ..< match.numberOfRanges {
let nsRange = match.range(at: rangeIndex)
guard !NSEqualRanges(nsRange, NSMakeRange(NSNotFound, 0)) else { continue }
let string = (testString as NSString).substring(with: nsRange)
groups.append(string)
}
if !groups.isEmpty {
result.append(groups)
}
}
print(result)
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Swift 5.2, please visit: https://developer.apple.com/documentation/foundation/nsregularexpression