import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class Example {
public static void main(String[] args) {
final String regex = "(?P<file_number>\\d+)\\.\\s+(?P<filepath>.*?)\\n(?P<file_contents>```(.*?)```)(?=\\d+\\.\\s+|$)";
final String string = "1. nextflow.config\n"
+ "```\n"
+ "params {\n"
+ " inputDir = \"data/raw\"\n"
+ " outputDir = \"results\"\n"
+ "}\n\n"
+ "process {\n"
+ " executor = \"local\"\n"
+ " memory = \"8 GB\"\n"
+ " cpus = 4\n"
+ "}\n"
+ "```\n\n"
+ "2. main.nf\n"
+ "```\n"
+ "#!/usr/bin/env nextflow\n\n"
+ "params.inputDir = \"./data/raw\"\n"
+ "params.outputDir = \"./results\"\n\n"
+ "include { preprocess, qualityControl, differentialExpression } from \"./scripts\"\n\n"
+ "workflow {\n"
+ " input:\n"
+ " path(inputDir) from params.inputDir\n\n"
+ " output:\n"
+ " path(outputDir) into results\n\n"
+ " preprocess_output, quality_control_output, differential_expression_output = script {\n"
+ " preprocess(inputDir)\n"
+ " } \n\n"
+ " quality_control_output = script {\n"
+ " qualityControl(preprocess_output)\n"
+ " }\n\n"
+ " differential_expression_output = script {\n"
+ " differentialExpression(preprocess_output)\n"
+ " }\n"
+ "}\n"
+ "```\n\n"
+ "3. scripts/preprocess.sh\n"
+ "```\n"
+ "#!/bin/bash\n\n"
+ "inputDir=$1\n\n"
+ "# Trim reads using Trimmomatic\n"
+ "trimmomatic PE ${inputDir}/sample_R1.fastq.gz ${inputDir}/sample_R2.fastq.gz ${inputDir}/sample_R1.trimmed.fastq.gz ${inputDir}/sample_R1.unpaired.fastq.gz ${inputDir}/sample_R2.trimmed.fastq.gz ${inputDir}/sample_R2.unpaired.fastq.gz ILLUMINACLIP:adapters.fa:2:30:10 LEADING:3 TRAILING:3 SLIDINGWINDOW:4:15 MINLEN:36\n\n"
+ "# Align trimmed reads using HISAT2\n"
+ "hisat2 -x genome_index -1 ${inputDir}/sample_R1.trimmed.fastq.gz -2 ${inputDir}/sample_R2.trimmed.fastq.gz -S ${inputDir}/sample.sam\n\n"
+ "# Convert SAM to BAM\n"
+ "samtools view -bS ${inputDir}/sample.sam > ${inputDir}/sample.bam\n\n"
+ "# Sort BAM file\n"
+ "samtools sort ${inputDir}/sample.bam -o ${inputDir}/sample.sorted.bam\n\n"
+ "# Index sorted BAM file\n"
+ "samtools index ${inputDir}/sample.sorted.bam\n\n"
+ "# Remove intermediate files\n"
+ "rm ${inputDir}/sample.sam ${inputDir}/sample.bam\n"
+ "```\n\n"
+ "4. scripts/quality_control.sh\n"
+ "```\n"
+ "#!/bin/bash\n\n"
+ "inputDir=$1\n\n"
+ "# Run FastQC on trimmed reads\n"
+ "fastqc ${inputDir}/sample_R1.trimmed.fastq.gz ${inputDir}/sample_R2.trimmed.fastq.gz -o ${inputDir}\n\n"
+ "# Move FastQC output to results directory\n"
+ "mv ${inputDir}/*.html ${inputDir}/*.zip ${inputDir}/../results/logs\n"
+ "```\n\n"
+ "5. scripts/differential_expression.sh\n"
+ "```\n"
+ "#!/bin/bash\n\n"
+ "inputDir=$1\n\n"
+ "# Count reads using featureCounts\n"
+ "featureCounts -T 4 -a annotation.gtf -o ${inputDir}/counts.txt ${inputDir}/sample.sorted.bam\n\n"
+ "# Perform differential expression analysis using DESeq2\n"
+ "Rscript differential_expression.R ${inputDir}/counts.txt ${inputDir}/../results/differential_expression_results.txt\n"
+ "```\n\n"
+ "Note: The contents of the `data/` and `results/` directories are not provided as they would depend on the specific input data and expected output structure.\n";
final Pattern pattern = Pattern.compile(regex, Pattern.MULTILINE | Pattern.DOTALL);
final Matcher matcher = pattern.matcher(string);
while (matcher.find()) {
System.out.println("Full match: " + matcher.group(0));
for (int i = 1; i <= matcher.groupCount(); i++) {
System.out.println("Group " + i + ": " + matcher.group(i));
}
}
}
}
Please keep in mind that these code samples are automatically generated and are not guaranteed to work. If you find any syntax errors, feel free to submit a bug report. For a full regex reference for Java, please visit: https://docs.oracle.com/javase/7/docs/api/java/util/regex/Pattern.html