Skip to content

Commit 1386716

Browse files
committed
ruff_python_formatting: add doctest state handling
This commit adds a small but central component to code snippet formatting in docstrings: it specifically implements the state transitions needed to recognize and collect code snippets from doctests. This means looking for PS1 and PS2 prompts and extracting the code portion of each line. This also introduces a "code example add action" which we will use in a subsequent commit to control the higher level docstring line printer.
1 parent 0d4f1c1 commit 1386716

File tree

1 file changed

+168
-0
lines changed
  • crates/ruff_python_formatter/src/expression

1 file changed

+168
-0
lines changed

crates/ruff_python_formatter/src/expression/string.rs

Lines changed: 168 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1071,6 +1071,83 @@ impl<'src> DocstringLine<'src> {
10711071
}
10721072
}
10731073
}
1074+
1075+
/// A single code example extracted from a docstring.
1076+
///
1077+
/// This represents an intermediate state from when the code example was first
1078+
/// found all the way up until the point at which the code example has finished
1079+
/// and is reformatted.
1080+
///
1081+
/// Its default state is "empty." That is, that no code example is currently
1082+
/// being collected.
1083+
#[derive(Debug, Default)]
1084+
struct CodeExample<'src> {
1085+
/// The kind of code example being collected, or `None` if no code example
1086+
/// has been observed.
1087+
kind: Option<CodeExampleKind>,
1088+
/// The lines that have been seen so far that make up the code example.
1089+
lines: Vec<CodeExampleLine<'src>>,
1090+
}
1091+
1092+
impl<'src> CodeExample<'src> {
1093+
/// Attempt to add an original line from a docstring to this code example.
1094+
///
1095+
/// Based on the line and the internal state of whether a code example is
1096+
/// currently being collected or not, this will return an "action" for
1097+
/// the caller to perform. The typical case is a "print" action, which
1098+
/// instructs the caller to just print the line as though it were not part
1099+
/// of a code snippet.
1100+
fn add(&mut self, original: DocstringLine<'src>) -> CodeExampleAddAction<'src> {
1101+
match self.kind.take() {
1102+
// There's no existing code example being built, so we look for
1103+
// the start of one or otherwise tell the caller we couldn't find
1104+
// anything.
1105+
None => match self.add_start(original) {
1106+
None => CodeExampleAddAction::Kept,
1107+
Some(original) => CodeExampleAddAction::Print { original },
1108+
},
1109+
Some(CodeExampleKind::Doctest(doctest)) => {
1110+
if let Some(code) = doctest_find_ps2_prompt(&doctest.indent, &original.line) {
1111+
let code = code.to_string();
1112+
self.lines.push(CodeExampleLine { original, code });
1113+
// Stay with the doctest kind while we accumulate all
1114+
// PS2 prompts.
1115+
self.kind = Some(CodeExampleKind::Doctest(doctest));
1116+
return CodeExampleAddAction::Kept;
1117+
}
1118+
let code = std::mem::take(&mut self.lines);
1119+
let original = self.add_start(original);
1120+
CodeExampleAddAction::Format {
1121+
code,
1122+
kind: CodeExampleKind::Doctest(doctest),
1123+
original,
1124+
}
1125+
}
1126+
}
1127+
}
1128+
1129+
/// Looks for the start of a code example. If one was found, then the given
1130+
/// line is kept and added as part of the code example. Otherwise, the line
1131+
/// is returned unchanged and no code example was found.
1132+
///
1133+
/// # Panics
1134+
///
1135+
/// This panics when the existing code-example is any non-None value. That
1136+
/// is, this routine assumes that there is no ongoing code example being
1137+
/// collected and looks for the beginning of another code example.
1138+
fn add_start(&mut self, original: DocstringLine<'src>) -> Option<DocstringLine<'src>> {
1139+
assert_eq!(None, self.kind, "expected no existing code example");
1140+
if let Some((indent, code)) = doctest_find_ps1_prompt(&original.line) {
1141+
let indent = indent.to_string();
1142+
let code = code.to_string();
1143+
self.lines.push(CodeExampleLine { original, code });
1144+
self.kind = Some(CodeExampleKind::Doctest(CodeExampleDoctest { indent }));
1145+
return None;
1146+
}
1147+
Some(original)
1148+
}
1149+
}
1150+
10741151
/// The kind of code example observed in a docstring.
10751152
#[derive(Clone, Debug, Eq, PartialEq)]
10761153
enum CodeExampleKind {
@@ -1117,6 +1194,97 @@ struct CodeExampleLine<'src> {
11171194
/// The code extracted from the line.
11181195
code: String,
11191196
}
1197+
1198+
/// An action that a caller should perform after attempting to add a line from
1199+
/// a docstring to a code example.
1200+
///
1201+
/// Callers are expected to add every line from a docstring to a code example,
1202+
/// and the state of the code example (and the line itself) will determine
1203+
/// how the caller should react.
1204+
#[derive(Debug)]
1205+
enum CodeExampleAddAction<'src> {
1206+
/// The line added was ignored by `CodeExample` and the caller should print
1207+
/// it to the formatter as-is.
1208+
///
1209+
/// This is the common case. That is, most lines in most docstrings are not
1210+
/// part of a code example.
1211+
Print { original: DocstringLine<'src> },
1212+
/// The line added was kept by `CodeExample` as part of a new or existing
1213+
/// code example.
1214+
///
1215+
/// When this occurs, callers should not try to format the line and instead
1216+
/// move on to the next line.
1217+
Kept,
1218+
/// The line added indicated that the code example is finished and should
1219+
/// be formatted and printed. The line added is not treated as part of
1220+
/// the code example. If the line added indicated the start of another
1221+
/// code example, then is won't be returned to the caller here. Otherwise,
1222+
/// callers should pass it through to the formatter as-is.
1223+
Format {
1224+
/// The kind of code example that was found.
1225+
kind: CodeExampleKind,
1226+
/// The Python code that should be formatted, indented and printed.
1227+
///
1228+
/// This is guaranteed to be non-empty.
1229+
code: Vec<CodeExampleLine<'src>>,
1230+
/// When set, the line is considered not part of any code example
1231+
/// and should be formatted as if the `Ignore` action were returned.
1232+
/// Otherwise, if there is no line, then either one does not exist
1233+
/// or it is part of another code example and should be treated as a
1234+
/// `Kept` action.
1235+
original: Option<DocstringLine<'src>>,
1236+
},
1237+
}
1238+
1239+
/// Looks for a valid doctest PS1 prompt in the line given.
1240+
///
1241+
/// If one was found, then the indentation prior to the prompt is returned
1242+
/// along with the code portion of the line.
1243+
fn doctest_find_ps1_prompt(line: &str) -> Option<(&str, &str)> {
1244+
let trim_start = line.trim_start();
1245+
// Prompts must be followed by an ASCII space character[1].
1246+
//
1247+
// [1]: https://github.com/python/cpython/blob/0ff6368519ed7542ad8b443de01108690102420a/Lib/doctest.py#L809-L812
1248+
let code = trim_start.strip_prefix(">>> ")?;
1249+
let indent_len = line
1250+
.len()
1251+
.checked_sub(trim_start.len())
1252+
.expect("suffix is <= original");
1253+
let indent = &line[..indent_len];
1254+
Some((indent, code))
1255+
}
1256+
1257+
/// Looks for a valid doctest PS2 prompt in the line given.
1258+
///
1259+
/// If one is found, then the code portion of the line following the PS2 prompt
1260+
/// is returned.
1261+
///
1262+
/// Callers must provide a string containing the original indentation of the
1263+
/// PS1 prompt that started the doctest containing the potential PS2 prompt
1264+
/// in the line given. If the line contains a PS2 prompt, its indentation must
1265+
/// match the indentation used for the corresponding PS1 prompt (otherwise
1266+
/// `None` will be returned).
1267+
fn doctest_find_ps2_prompt<'src>(ps1_indent: &str, line: &'src str) -> Option<&'src str> {
1268+
let (ps2_indent, ps2_after) = line.split_once("...")?;
1269+
// PS2 prompts must have the same indentation as their
1270+
// corresponding PS1 prompt.[1] While the 'doctest' Python
1271+
// module will error in this case, we just treat this line as a
1272+
// non-doctest line.
1273+
//
1274+
// [1]: https://github.com/python/cpython/blob/0ff6368519ed7542ad8b443de01108690102420a/Lib/doctest.py#L733
1275+
if ps1_indent != ps2_indent {
1276+
return None;
1277+
}
1278+
// PS2 prompts must be followed by an ASCII space character unless
1279+
// it's an otherwise empty line[1].
1280+
//
1281+
// [1]: https://github.com/python/cpython/blob/0ff6368519ed7542ad8b443de01108690102420a/Lib/doctest.py#L809-L812
1282+
match ps2_after.strip_prefix(' ') {
1283+
None if ps2_after.is_empty() => Some(""),
1284+
None => None,
1285+
Some(code) => Some(code),
1286+
}
1287+
}
11201288
}
11211289

11221290
/// If the last line of the docstring is `content" """` or `content\ """`, we need a chaperone space

0 commit comments

Comments
 (0)