1 # SPDX-License-Identifier: GPL-2.0-only
3 # Copyright (C) 2023 EfficiOS Inc.
5 # pyright: strict, reportTypeCommentUsage=false
8 from typing
import TextIO
11 # One part of a moultipart document.
13 # For example, for this part of which the header is at line 37:
15 # --- Another Oscar Wilde quote
16 # I can resist everything except temptation.
18 # The corresponding `Part` object is:
20 # Part('Another Oscar Wilde quote',
21 # 'I can resist everything except temptation',
24 def __init__(self
, header_info
: str, content
: str, first_content_line_no
: int):
25 self
._header
_info
= header_info
26 self
._content
= content
27 self
._first
_content
_line
_no
= first_content_line_no
30 def header_info(self
):
31 return self
._header
_info
37 # Number of the first line, relative to the beginning of the
38 # containing moultipart document, of the content of this part.
40 def first_content_line_no(self
):
41 return self
._first
_content
_line
_no
44 return "Part({}, {}, {})".format(
45 repr(self
.header_info
), repr(self
.content
), self
.first_content_line_no
49 def _try_parse_header(line
: str):
50 m
= re
.match(r
"---(\s*| .+)$", line
)
55 return m
.group(1).strip()
58 # Parses the moultipart document file `in_file` and returns its parts
59 # (list of `Part` objects).
61 # A moultipart document is a sequence of parts.
63 # A moutlipart part is:
65 # 1. A header line, that is, in this order:
68 # b) Zero or more spaces.
69 # c) Optional: custom information until the end of the line.
71 # 2. Zero or more lines of text which aren't header lines.
73 # For example, consider the following moultipart document:
79 # --- This part is empty
83 # Then this function would return the following part objects:
86 # Part('Victoria', 'Parenteau\n', 2),
87 # Part('', 'Taillon\n', 4),
88 # Part('This part is empty', '', 6),
89 # Part('Josianne', 'Gervais\n', 7),
92 # Raises `RuntimeError` on any parsing error.
93 def parse(in_file
: TextIO
):
94 # Read the first header
96 cur_first_content_line_no
= 2
97 parts
= [] # type: list[Part]
100 cur_part_header_info
= _try_parse_header(line
)
102 if cur_part_header_info
is None:
104 "Expecting header line starting with `---`, got `{}`".format(
111 maybe_part_header_info
= _try_parse_header(line
)
113 if maybe_part_header_info
is not None:
117 cur_part_header_info
,
119 cur_first_content_line_no
,
122 cur_part_content
= ""
123 cur_part_header_info
= maybe_part_header_info
124 cur_first_content_line_no
= line_no
+ 1
127 # Accumulate content lines
128 cur_part_content
+= line
130 # Last part (always exists)
133 cur_part_header_info
,
135 cur_first_content_line_no
,
142 if __name__
== "__main__":
146 with
open(sys
.argv
[1]) as f
:
147 pprint
.pprint(parse(f
))