const regex = /(?P<file_number>\d+)\.\s+(?P<filepath>.*?)\n(?P<file_contents>```(.*?)```)(?=\d+\.\s+|$)/gms;
// Alternative syntax using RegExp constructor
// const regex = new RegExp('(?P<file_number>\\d+)\\.\\s+(?P<filepath>.*?)\\n(?P<file_contents>```(.*?)```)(?=\\d+\\.\\s+|$)', 'gms')
const str = `1. nextflow.config
\`\`\`
params {
inputDir = "data/raw"
outputDir = "results"
}
process {
executor = "local"
memory = "8 GB"
cpus = 4
}
\`\`\`
2. main.nf
\`\`\`
#!/usr/bin/env nextflow
params.inputDir = "./data/raw"
params.outputDir = "./results"
include { preprocess, qualityControl, differentialExpression } from "./scripts"
workflow {
input:
path(inputDir) from params.inputDir
output:
path(outputDir) into results
preprocess_output, quality_control_output, differential_expression_output = script {
preprocess(inputDir)
}
quality_control_output = script {
qualityControl(preprocess_output)
}
differential_expression_output = script {
differentialExpression(preprocess_output)
}
}
\`\`\`
3. scripts/preprocess.sh
\`\`\`
#!/bin/bash
inputDir=\$1
# Trim reads using Trimmomatic
trimmomatic PE \${inputDir}/sample_R1.fastq.gz \${inputDir}/sample_R2.fastq.gz \${inputDir}/sample_R1.trimmed.fastq.gz \${inputDir}/sample_R1.unpaired.fastq.gz \${inputDir}/sample_R2.trimmed.fastq.gz \${inputDir}/sample_R2.unpaired.fastq.gz ILLUMINACLIP:adapters.fa:2:30:10 LEADING:3 TRAILING:3 SLIDINGWINDOW:4:15 MINLEN:36
# Align trimmed reads using HISAT2
hisat2 -x genome_index -1 \${inputDir}/sample_R1.trimmed.fastq.gz -2 \${inputDir}/sample_R2.trimmed.fastq.gz -S \${inputDir}/sample.sam
# Convert SAM to BAM
samtools view -bS \${inputDir}/sample.sam > \${inputDir}/sample.bam
# Sort BAM file
samtools sort \${inputDir}/sample.bam -o \${inputDir}/sample.sorted.bam
# Index sorted BAM file
samtools index \${inputDir}/sample.sorted.bam
# Remove intermediate files
rm \${inputDir}/sample.sam \${inputDir}/sample.bam
\`\`\`
4. scripts/quality_control.sh
\`\`\`
#!/bin/bash
inputDir=\$1
# Run FastQC on trimmed reads
fastqc \${inputDir}/sample_R1.trimmed.fastq.gz \${inputDir}/sample_R2.trimmed.fastq.gz -o \${inputDir}
# Move FastQC output to results directory
mv \${inputDir}/*.html \${inputDir}/*.zip \${inputDir}/../results/logs
\`\`\`
5. scripts/differential_expression.sh
\`\`\`
#!/bin/bash
inputDir=\$1
# Count reads using featureCounts
featureCounts -T 4 -a annotation.gtf -o \${inputDir}/counts.txt \${inputDir}/sample.sorted.bam
# Perform differential expression analysis using DESeq2
Rscript differential_expression.R \${inputDir}/counts.txt \${inputDir}/../results/differential_expression_results.txt
\`\`\`
Note: The contents of the \`data/\` and \`results/\` directories are not provided as they would depend on the specific input data and expected output structure.
`;
// Reset `lastIndex` if this regex is defined globally
// regex.lastIndex = 0;
let m;
while ((m = regex.exec(str)) !== null) {
// This is necessary to avoid infinite loops with zero-width matches
if (m.index === regex.lastIndex) {
regex.lastIndex++;
}
// The result can be accessed through the `m`-variable.
m.forEach((match, groupIndex) => {
console.log(`Found match, group ${groupIndex}: ${match}`);
});
}
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for JavaScript, please visit: https://developer.mozilla.org/en/docs/Web/JavaScript/Guide/Regular_Expressions