sylvainhalle · sylvainhalle · Oct 28, 2023 · Oct 25, 2023 · Oct 26, 2023 · Oct 26, 2023
diff --git a/Readme.md b/Readme.md
@@ -289,6 +289,10 @@ file and give you no advice.
 
 TeXtidote also automatically follows sub-files that are embedded from a main document using `\input{filename}` and `\include{filename}` (braces are mandatory). Any such *non-commented* instruction will add the corresponding filename to the running queue. If you want to *exclude* an `\input` from being processed, you must surround the line with `ignore begin`/`end` comments (see below, *Helping TeXtidote*).
 
+TeXtidote takes into account the `%!TEX root = ...` directive for following sub-files. You can also use `--root` argument to specify it separatedly.
+However, if your usecase requires a more complex handling of sub-files you can use many of the available tools to pre-process your project. Some
+suggestions are listed [here](https://tex.stackexchange.com/questions/21838/replace-inputfilex-by-the-content-of-filex-automatically/377404).
+
 ### Removing markup
 
 You can also use TeXtidote just to remove the markup from your original LaTeX

diff --git a/Source/Core/src/ca/uqac/lif/textidote/Main.java b/Source/Core/src/ca/uqac/lif/textidote/Main.java
@@ -203,6 +203,7 @@ public static int mainLoop(String[] args, InputStream in, PrintStream out, Print
 		cli_parser.addArgument(new Argument().withLongName("ci").withDescription("Ignores the return code for CI usage"));
 		cli_parser.addArgument(new Argument().withLongName("encoding").withArgument("x").withDescription("Read files using encoding x"));
 		cli_parser.addArgument(new Argument().withLongName("single-file").withDescription("Don't read sub-files if any"));
+		cli_parser.addArgument(new Argument().withLongName("root").withArgument("file").withDescription("Manually set the root of the LaTeX document"));
 
 		// Check if we are using textidote in a CI tool
 		boolean usingCI = false;
@@ -406,7 +407,8 @@ else if (type.compareToIgnoreCase("txt") == 0)
 					if (input_type == Linter.Language.LATEX || (filename.compareTo("--") == 0 && input_type == Linter.Language.UNSPECIFIED) || filename.endsWith(".tex"))
 					{
 						// LaTeX file
-						LatexCleaner latex_cleaner = new LatexCleaner();
+						String root_dir = calculateRootDir(filename, map.getOptionValue("root"));
+						LatexCleaner latex_cleaner = new LatexCleaner(root_dir);
 						latex_cleaner.setIgnoreBeforeDocument(!read_all);
 						latex_cleaner.ignoreEnvironments(env_blacklist);
 						latex_cleaner.ignoreMacros(mac_blacklist);
@@ -633,7 +635,8 @@ else if (output_method.compareToIgnoreCase("json") == 0)
 				}
 				else
 				{
-					LatexCleaner latex_cleaner = new LatexCleaner();
+					String root_dir = calculateRootDir(top_level_filename, map.getOptionValue("root"));
+					LatexCleaner latex_cleaner = new LatexCleaner(root_dir);
 					if (cmd_filenames.contains(filename))
 					{
 						latex_cleaner.setIgnoreBeforeDocument(!read_all);
@@ -683,7 +686,7 @@ else if (output_method.compareToIgnoreCase("json") == 0)
 				int added = 0;
 				if (!single_file)
 				{
-					added = addInnerFilesToQueue(c_cleaner.getInnerFiles(), processed_filenames, filename_queue, top_level_filename);
+					added = addInnerFilesToQueue(c_cleaner.getInnerFiles(), processed_filenames, filename_queue);
 				}
 				if (added > 0 && cmd_filenames.size() > 1)
 				{
@@ -920,6 +923,29 @@ protected static void printMap(PrintStream ps, Map<Range,Range> map)
 		}
 		return out;
 	}
+
+	/**
+	 * Calculate the location of the root dir, using the root if is provided.
+	 * Otherwise just use the current file location.
+	 * @param current_filename The name of the file currently being processed
+	 * @param root The file of the root document
+	 * @return The location of the root dir
+	 */
+	protected static String calculateRootDir(String current_filename, /*@ nullable @*/ String root)
+	{
+		if (root == null){
+			root = current_filename;
+		}
+		File f = new File(root);
+		String root_dir = f.getParent();
+		if (root_dir == null)
+		{
+			// This happens if the filename is "--" or the file is in
+			// the current folder
+			root_dir = "";
+		}
+		return root_dir;
+	}
 
 	/**
 	 * Adds filenames found in the <code>input</code> statements of the current
@@ -931,33 +957,17 @@ protected static void printMap(PrintStream ps, Map<Range,Range> map)
 	 * @param file_queue The queue of filenames waiting to be processed.
 	 * This object is modified by the current method (new filenames can be
 	 * added to it).
-	 * @param current_filename The name of the file currently being processed
 	 * @return The number of new files added to the queue
 	 */
 	protected static int addInnerFilesToQueue(List<String> inner_files, Set<String> processed_filenames,
-			Queue<String> file_queue, String current_filename)
+			Queue<String> file_queue)
 	{
 		int added = 0;
-		File f = new File(current_filename);
-		String parent_path = f.getParent();
-		if (parent_path == null)
-		{
-			// This happens if the filename is "--" or the file is in
-			// the current folder
-			parent_path = "";
-		}
-		else
-		{
-			if (!parent_path.endsWith(PATH_SEP))
-			{
-				parent_path += PATH_SEP;
-			}
-		}
 		for (String filename : inner_files)
 		{
 			if (!processed_filenames.contains(filename))
 			{
-				file_queue.add(parent_path + filename);
+				file_queue.add(filename);
 				added++;
 			}
 		}

diff --git a/Source/Core/src/ca/uqac/lif/textidote/cleaning/latex/LatexCleaner.java b/Source/Core/src/ca/uqac/lif/textidote/cleaning/latex/LatexCleaner.java
@@ -22,6 +22,8 @@
 import ca.uqac.lif.textidote.as.AnnotatedString.Line;
 import ca.uqac.lif.textidote.cleaning.TextCleaner;
 import ca.uqac.lif.textidote.cleaning.TextCleanerException;
+import java.nio.file.Path;
+import java.nio.file.Paths;
 
 import java.util.*;
 import java.util.regex.Matcher;
@@ -52,6 +54,11 @@ public class LatexCleaner extends TextCleaner
 	 */
 	/*@ non_null @*/ protected final Set<String> m_macrosToIgnore = new HashSet<String>();
 
+	/**
+	 * The path of the root dir
+	 */
+	protected final Path m_rootDir;
+
 	/**
 	 * A list of <em>non-commented</em> <code>input</code> and <code>include</code>
 	 * declarations found in the file to be cleaned.
@@ -64,6 +71,31 @@ public class LatexCleaner extends TextCleaner
 	 */
 	protected static final transient Pattern m_includePattern = Pattern.compile("^.*\\\\(input|include)\\s*\\{(.*?)\\}.*$");
 
+	/**
+	 * A regex pattern matching the root directive.
+	 */
+	protected static final transient Pattern m_rootPattern = Pattern.compile("^%!TEX\\s+root\\s*=\\s*(.*)$");
+
+	/**
+	 * Creates a new instance of the cleaner
+	 * @param root_dir Path to the root location
+	 */
+	public LatexCleaner(/*@ non_null @*/ String root_dir)
+	{
+		super();
+		m_rootDir = Paths.get(root_dir);
+	}
+
+	/**
+	 * Creates a new instance of the cleaner
+	 */
+	public LatexCleaner()
+	{
+		super();
+		// Assume root dir is the working directory
+		m_rootDir = Paths.get("");
+	}
+
 	/**
 	 * Adds a new environment name to remove when cleaning up
 	 * @param e_name The name of the environment
@@ -114,10 +146,15 @@ public LatexCleaner ignoreMacros(/*@ non_null @*/ Collection<String> m_names)
 		// Reset list of inner files every time we clean
 		m_innerFiles.clear();
 		AnnotatedString new_as = new AnnotatedString(as);
+		Path root = m_rootDir;
+		String root_directive = parseRoot(new_as);
+		if(root_directive != null){
+			root = root.resolve(Paths.get(root_directive)).getParent();
+		}
 		new_as = cleanComments(new_as);
 		new_as = removeEnvironments(new_as);
 		new_as = removeMacros(new_as);
-		fetchIncludes(new_as);
+		fetchIncludes(new_as, root);
 		//new_as = removeAllMarkup(new_as);
 		new_as = removeMarkup(new_as);
 		//new_as = simplifySpaces(new_as);
@@ -517,13 +554,32 @@ public LatexCleaner setIgnoreBeforeDocument(boolean b)
 		return this;
 	}
 
+	/**
+	 * Extracts the location of the root specified by the
+	 * <code>!TEX root</code> directive, if present.
+	 * @param as The contents of the tex file.
+	 * @return The root path.
+	 */
+	/*@ nullable @*/ protected String parseRoot(/*@ non_null @*/ AnnotatedString as)
+	{
+		if(as.lineCount()>0){
+			// !TEX root directive needs to be in the first line
+			Matcher mat = m_rootPattern.matcher(as.getLine(0).toString());
+			if(mat.find()){
+				return mat.group(1).trim();
+			}
+		}
+		return null;
+	}
+
 	/**
 	 * Populates a list of <em>non-commented</em> <code>input</code> and
 	 * <code>include</code> declarations found in the file to be cleaned.
 	 * @param as The contents of the file (where environments and
 	 * comments have already been removed).
+	 * @param root Root location
 	 */
-	protected void fetchIncludes(/*@ non_null @*/ AnnotatedString as)
+	protected void fetchIncludes(/*@ non_null @*/ AnnotatedString as, /*@ non_null @*/ Path root)
 	{
 		for (Line l : as.getLines())
 		{
@@ -536,7 +592,8 @@ protected void fetchIncludes(/*@ non_null @*/ AnnotatedString as)
 				{
 					filename += ".tex";
 				}
-				m_innerFiles.add(filename);
+				Path filepath = root.resolve(Paths.get(filename));
+				m_innerFiles.add(filepath.toString());
 			}
 		}
 	}

diff --git a/Source/CoreTest/src/ca/uqac/lif/textidote/MainTest.java b/Source/CoreTest/src/ca/uqac/lif/textidote/MainTest.java
@@ -304,6 +304,35 @@ public void testNoBreakOnHTMLWithDummyReplacement() throws IOException
 	}
 
 	@Test
+	public void testIncludeWithRoot() throws IOException
+	{
+		ByteArrayOutputStream baos = new ByteArrayOutputStream();
+		PrintStream out = new PrintStream(baos);
+		int ret_code = Main.mainLoop(new String[] {"--read-all", "--output", "html", "rules/data/childs/child-section.tex"}, null, out, new NullPrintStream(), MainTest.class);
+		String output = new String(baos.toByteArray());
+		assertNotNull(output);
+		assertEquals(0, ret_code);
+		assertFalse(output.trim().isEmpty());
+		// Check that the desired sections are present
+		assertTrue(output.indexOf("child section")!=-1);
+		assertTrue(output.indexOf("child sibling section")!=-1);
+	}
+
+	@Test
+	public void testIncludeWithRootAsArgument() throws IOException
+	{
+		ByteArrayOutputStream baos = new ByteArrayOutputStream();
+		PrintStream out = new PrintStream(baos);
+		int ret_code = Main.mainLoop(new String[] {"--read-all", "--output", "html", "--root", "rules/data/root.tex", "rules/data/childs/child-section-no-root.tex"}, null, out, new NullPrintStream(), MainTest.class);
+		String output = new String(baos.toByteArray());
+		assertNotNull(output);
+		assertEquals(0, ret_code);
+		assertFalse(output.trim().isEmpty());
+		// Check that the desired sections are present
+		assertTrue(output.indexOf("child section")!=-1);
+		assertTrue(output.indexOf("child sibling section")!=-1);
+  }
+
 	public void testBeamerFile() throws IOException
 	{
 		InputStream in = MainTest.class.getResourceAsStream("rules/data/beamer.tex");

diff --git a/Source/CoreTest/src/ca/uqac/lif/textidote/rules/data/childs/child-section-no-root.tex b/Source/CoreTest/src/ca/uqac/lif/textidote/rules/data/childs/child-section-no-root.tex
@@ -0,0 +1,2 @@
+This is the child section.
+\include{childs/child-sibling}
diff --git a/Source/CoreTest/src/ca/uqac/lif/textidote/rules/data/childs/child-section.tex b/Source/CoreTest/src/ca/uqac/lif/textidote/rules/data/childs/child-section.tex
@@ -0,0 +1,4 @@
+%!TEX root = ../root.tex
+
+This is the child section.
+\include{childs/child-sibling}
diff --git a/Source/CoreTest/src/ca/uqac/lif/textidote/rules/data/childs/child-sibling.tex b/Source/CoreTest/src/ca/uqac/lif/textidote/rules/data/childs/child-sibling.tex
@@ -0,0 +1,3 @@
+%!TEX root = ../root.tex
+
+This is the child sibling section.
diff --git a/Source/CoreTest/src/ca/uqac/lif/textidote/rules/data/root.tex b/Source/CoreTest/src/ca/uqac/lif/textidote/rules/data/root.tex
@@ -0,0 +1,5 @@
+\begin{document}
+\section{Introduction}
+
+\include{childs/child_section}
+\end{document}
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		This is the child section.
		\include{childs/child-sibling}
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		%!TEX root = ../root.tex

		This is the child sibling section.