_site/cover/scran_character_complete.COVER.html

1 %% Copyright (c) 2023 Peter Morgan <peter.james.morgan@gmail.com>
2 %%
3 %% Licensed under the Apache License, Version 2.0 (the "License");
4 %% you may not use this file except in compliance with the License.
5 %% You may obtain a copy of the License at
6 %%
7 %% http://www.apache.org/licenses/LICENSE-2.0
8 %%
9 %% Unless required by applicable law or agreed to in writing, software
10 %% distributed under the License is distributed on an "AS IS" BASIS,
11 %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 %% See the License for the specific language governing permissions and
13 %% limitations under the License.
14
15 %% @doc Parser combinators that deal with unicode input.
16
17 -module(scran_character_complete).
18
19
20 -feature(maybe_expr, enable).
21
22
23 -export([alpha0/0]).
24 -export([alpha1/0]).
25 -export([alphanumeric0/0]).
26 -export([alphanumeric1/0]).
27 -export([digit0/0]).
28 -export([digit1/0]).
29 -export([hex_digit0/0]).
30 -export([hex_digit1/0]).
31 -export([multispace0/0]).
32 -export([multispace1/0]).
33 -export([none_of/1]).
34 -export([one_of/1]).
35 -export([re/1]).
36 -export([re_no_case/1]).
37 -export([tag/1]).
38 -export([tag_no_case/1]).
39 -export([take/1]).
40 -include_lib("kernel/include/logger.hrl").
41
42
43 %% @doc Take a number of characters from the input.
44
45 -spec take(pos_integer()) -> scran:parser().
46
47 take(N) ->
48 14 fun
49 (Input) ->
50 14 ?LOG_DEBUG(#{n => N, input => Input}),
51 14 maybe
52 14 true ?= string:length(Input) >= N,
53 10 {string:slice(Input, N), string:slice(Input, 0, N)}
54 else
55 false ->
56 4 nomatch
57 end
58 end.
59
60
61 %% @doc Return input that matches the supplied case sensitive regular
62 %% expression.
63
64 -spec re(iodata()) -> scran:parser().
65
66 re(Regex) ->
67 10 re(Regex, [anchored]).
68
69
70 %% @doc Return input that matches the supplied case insensitive
71 %% regular expression.
72
73 -spec re_no_case(iodata()) -> scran:parser().
74
75 re_no_case(Regex) ->
76 36 re(Regex, [anchored, caseless]).
77
78
79 -type compile_option() :: unicode | anchored | caseless | dollar_endonly
80 | dotall | extended | firstline | multiline
81 | no_auto_capture | dupnames | ungreedy
82 | {newline, nl_spec()}
83 | bsr_anycrlf | bsr_unicode
84 | no_start_optimize | ucp | never_utf.
85
86 -type nl_spec() :: cr | crlf | lf | anycrlf | any.
87
88 -spec re(iodata(), [compile_option()]) -> scran:parser().
89
90 re(Regex, CompileOptions) ->
91 46 {ok, MP} = re:compile(Regex, CompileOptions),
92 46 fun
93 (Input) ->
94 26 ?LOG_DEBUG(#{regex => Regex, input => Input}),
95 26 maybe
96 26 {match, [{Begin, End} | _]} ?= re:run(Input, MP),
97 16 {string:slice(Input, End),
98 string:slice(Input, Begin, End)}
99 end
100 end.
101
102
103 %% @doc Return the matching case sensistive character data.
104
105 -spec tag(unicode:chardata()) -> scran:parser().
106
107 tag(Tag) ->
108 120 fun
109 (Input) ->
110 130 ?LOG_DEBUG(#{tag => Tag, input => Input}),
111 130 case string:prefix(Input, Tag) of
112 nomatch ->
113 43 nomatch;
114
115 Remainder when is_binary(Input),
116 is_list(Tag) ->
117 16 {Remainder, list_to_binary(Tag)};
118
119 Remainder ->
120 71 {Remainder, Tag}
121 end
122 end.
123
124
125 %% @doc Return the matching case insensistive character data.
126
127 -spec tag_no_case(unicode:chardata()) -> scran:parser().
128
129 tag_no_case(Tag) ->
130 6 fun
131 (Input) ->
132 6 ?LOG_DEBUG(#{tag => Tag, input => Input}),
133 6 case string:prefix(
134 string:lowercase(Input),
135 string:lowercase(Tag)) of
136
137 nomatch ->
138 2 nomatch;
139
140 _Remainder ->
141 4 {string:slice(Input, string:length(Tag)),
142 string:slice(Input, 0, string:length(Tag))}
143 end
144 end.
145
146
147 %% @doc Return one of the matching characters.
148
149 -spec one_of([unicode:chardata()]) -> scran:parser().
150
151 one_of(Choice) ->
152 33 fun
153 (Input) ->
154 86 ?LOG_DEBUG(#{choice => Choice, input => Input}),
155 86 maybe
156 86 false ?= string:is_empty(Input),
157 70 FirstCharacter = string:slice(Input, 0, 1),
158 70 true ?= string:find(Choice, FirstCharacter) /= nomatch,
159 64 {string:slice(Input, 1), FirstCharacter}
160
161 else
162 Failed when is_boolean(Failed) ->
163 22 nomatch
164 end
165 end.
166
167
168 %% @doc Return the input if it is none of supplied characters.
169
170 -spec none_of([unicode:chardata()]) -> scran:parser().
171
172 none_of(Choice) ->
173 5 fun
174 (Input) ->
175 5 ?LOG_DEBUG(#{choice => Choice, input => Input}),
176 5 maybe
177 5 false ?= string:is_empty(Input),
178 4 FirstCharacter = string:slice(Input, 0, 1),
179 4 true ?= string:find(Choice, FirstCharacter) == nomatch,
180 3 {string:slice(Input, 1), FirstCharacter}
181
182 else
183 Failed when is_boolean(Failed) ->
184 2 nomatch
185 end
186 end.
187
188
189 %% @doc Recognizes zero or more lowercase and uppercase ASCII
190 %% alphabetic characters: a-z, A-Z.
191
192 -spec alpha0() -> scran:parser().
193
194 alpha0() ->
195 4 fun
196 (Input) ->
197 4 (zero_or_more(alpha()))(Input)
198 end.
199
200
201 %% @doc Recognizes one or more lowercase and uppercase ASCII
202 %% alphabetic characters: a-z, A-Z.
203
204 -spec alpha1() -> scran:parser().
205
206 alpha1() ->
207 105 fun
208 (Input) ->
209 111 (at_least_one(alpha()))(Input)
210 end.
211
212
213 %% @doc Recognizes zero or more ASCII numerical and alphabetic
214 %% characters: 0-9, a-z, A-Z.
215
216 -spec alphanumeric0() -> scran:parser().
217
218 alphanumeric0() ->
219 4 fun
220 (Input) ->
221 4 (zero_or_more(alphanumeric()))(Input)
222 end.
223
224
225 %% @doc Recognizes one or more ASCII numerical and alphabetic characters:
226 %% 0-9, a-z, A-Z.
227
228 -spec alphanumeric1() -> scran:parser().
229
230 alphanumeric1() ->
231 4 fun
232 (Input) ->
233 4 (at_least_one(alphanumeric()))(Input)
234 end.
235
236
237 %% @doc Recognizes zero or more ASCII numerical characters: 0-9.
238
239 -spec digit0() -> scran:parser().
240
241 digit0() ->
242 5 fun
243 (Input) ->
244 5 (zero_or_more(numeric()))(Input)
245 end.
246
247
248 %% @doc Recognizes one or more ASCII numerical characters: 0-9.
249
250 -spec digit1() -> scran:parser().
251
252 digit1() ->
253 85 fun
254 (Input) ->
255 70 (at_least_one(numeric()))(Input)
256 end.
257
258
259 %% @doc Recognizes zero or more spaces, tabs, carriage returns and
260 %% line feeds.
261
262 -spec multispace0() -> scran:parser().
263
264 multispace0() ->
265 4 fun
266 (Input) ->
267 4 (zero_or_more(whitespace()))(Input)
268 end.
269
270
271 %% @doc Recognizes zero or more ASCII hexadecimal numerical
272 %% characters: 0-9, A-F, a-f.
273
274 -spec hex_digit0() -> scran:parser().
275
276 hex_digit0() ->
277 4 fun
278 (Input) ->
279 4 (zero_or_more(hex()))(Input)
280 end.
281
282
283 %% @doc Recognizes one or more ASCII hexadecimal numerical characters:
284 %% 0-9, A-F, a-f.
285
286 -spec hex_digit1() -> scran:parser().
287
288 hex_digit1() ->
289 8 fun
290 (Input) ->
291 8 (at_least_one(hex()))(Input)
292 end.
293
294
295 %% @doc Recognizes one or more spaces, tabs, carriage returns and line
296 %% feeds.
297
298 -spec multispace1() -> scran:parser().
299
300 multispace1() ->
301 16 fun
302 (Input) ->
303 7 (at_least_one(whitespace()))(Input)
304 end.
305
306
307 at_least_one(Characters) ->
308 200 fun
309 (Input) ->
310 200 maybe
311 200 {_, Matched} = Result = (zero_or_more(Characters))(Input),
312 200 true ?= string:length(Matched) >= 1,
313 139 Result
314 else
315 false ->
316 61 nomatch
317 end
318 end.
319
320 zero_or_more(Characters) ->
321 221 fun
322 (Input) ->
323 221 flip(string:take(Input, Characters))
324 end.
325
326
327 flip(Tuple) ->
328 221 list_to_tuple(lists:reverse(tuple_to_list(Tuple))).
329
330
331 alpha() ->
332 123 lists:seq($a, $z) ++ lists:seq($A, $Z).
333
334
335 numeric() ->
336 95 lists:seq($0, $9).
337
338
339 alphanumeric() ->
340 8 alpha() ++ numeric().
341
342 hex() ->
343 12 numeric() ++ lists:seq($a, $f) ++ lists:seq($A, $F).
344
345 whitespace() ->
346 11 "\s\t\n\r".
Line Hits Source