commiting:
[people/mcb30/busybox.git] / editors / sed.c
1 /* vi: set sw=4 ts=4: */
2 /*
3  * sed.c - very minimalist version of sed
4  *
5  * Copyright (C) 1999,2000,2001 by Lineo, inc. and Mark Whitley
6  * Copyright (C) 1999,2000,2001 by Mark Whitley <markw@codepoet.org>
7  * Copyright (C) 2002  Matt Kraai
8  * Copyright (C) 2003 by Glenn McGrath <bug1@iinet.net.au>
9  * Copyright (C) 2003,2004 by Rob Landley <rob@landley.net>
10  *
11  * This program is free software; you can redistribute it and/or modify
12  * it under the terms of the GNU General Public License as published by
13  * the Free Software Foundation; either version 2 of the License, or
14  * (at your option) any later version.
15  *
16  * This program is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19  * General Public License for more details.
20  *
21  * You should have received a copy of the GNU General Public License
22  * along with this program; if not, write to the Free Software
23  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24  *
25  */
26
27 /* Code overview.
28
29   Files are laid out to avoid unnecessary function declarations.  So for
30   example, every function add_cmd calls occurs before add_cmd in this file.
31
32   add_cmd() is called on each line of sed command text (from a file or from
33   the command line).  It calls get_address() and parse_cmd_args().  The
34   resulting sed_cmd_t structures are appended to a linked list
35   (sed_cmd_head/sed_cmd_tail).
36
37   add_input_file() adds a FILE * to the list of input files.  We need to
38   know them all ahead of time to find the last line for the $ match.
39
40   process_files() does actual sedding, reading data lines from each input FILE *
41   (which could be stdin) and applying the sed command list (sed_cmd_head) to
42   each of the resulting lines.
43
44   sed_main() is where external code calls into this, with a command line.
45 */
46
47
48 /*
49         Supported features and commands in this version of sed:
50
51          - comments ('#')
52          - address matching: num|/matchstr/[,num|/matchstr/|$]command
53          - commands: (p)rint, (d)elete, (s)ubstitue (with g & I flags)
54          - edit commands: (a)ppend, (i)nsert, (c)hange
55          - file commands: (r)ead
56          - backreferences in substitution expressions (\1, \2...\9)
57          - grouped commands: {cmd1;cmd2}
58          - transliteration (y/source-chars/dest-chars/)
59          - pattern space hold space storing / swapping (g, h, x)
60          - labels / branching (: label, b, t, T)
61
62          (Note: Specifying an address (range) to match is *optional*; commands
63          default to the whole pattern space if no specific address match was
64          requested.)
65
66         Unsupported features:
67
68          - most GNU extensions
69          - and more.
70
71         Todo:
72
73          - Create a wrapper around regex to make libc's regex conform with sed
74          - Fix bugs
75
76
77         Reference http://www.opengroup.org/onlinepubs/007904975/utilities/sed.html
78 */
79
80 #include <stdio.h>
81 #include <unistd.h>             /* for getopt() */
82 #include <regex.h>
83 #include <string.h>             /* for strdup() */
84 #include <errno.h>
85 #include <ctype.h>              /* for isspace() */
86 #include <stdlib.h>
87 #include "busybox.h"
88
89 typedef struct sed_cmd_s {
90     /* Ordered by alignment requirements: currently 36 bytes on x86 */
91
92     /* address storage */
93     regex_t *beg_match; /* sed -e '/match/cmd' */
94     regex_t *end_match; /* sed -e '/match/,/end_match/cmd' */
95     regex_t *sub_match; /* For 's/sub_match/string/' */
96     int beg_line;               /* 'sed 1p'   0 == apply commands to all lines */
97     int end_line;               /* 'sed 1,3p' 0 == one line only. -1 = last line ($) */
98
99     FILE *file;                 /* File (sr) command writes to, -1 for none. */
100     char *string;               /* Data string for (saicytb) commands. */
101
102     unsigned short which_match;         /* (s) Which match to replace (0 for all) */
103
104     /* Bitfields (gcc won't group them if we don't) */
105     unsigned int invert:1;                      /* the '!' after the address */
106     unsigned int in_match:1;            /* Next line also included in match? */
107     unsigned int no_newline:1;          /* Last line written by (sr) had no '\n' */
108     unsigned int sub_p:1;                       /* (s) print option */
109
110
111     /* GENERAL FIELDS */
112     char cmd;                           /* The command char: abcdDgGhHilnNpPqrstwxy:={} */
113     struct sed_cmd_s *next;     /* Next command (linked list, NULL terminated) */
114 } sed_cmd_t;
115
116 /* globals */
117 /* options */
118 static int be_quiet, in_place, regex_type;
119 static FILE *nonstdout;
120 static char *outname,*hold_space;
121
122 /* List of input files */
123 static int input_file_count,current_input_file;
124 static FILE **input_file_list;
125
126 static const char bad_format_in_subst[] =
127         "bad format in substitution expression";
128 static const char *const semicolon_whitespace = "; \n\r\t\v";
129
130 static regmatch_t regmatch[10];
131 static regex_t *previous_regex_ptr;
132
133 /* linked list of sed commands */
134 static sed_cmd_t sed_cmd_head;
135 static sed_cmd_t *sed_cmd_tail = &sed_cmd_head;
136
137 /* Linked list of append lines */
138 struct append_list {
139         char *string;
140         struct append_list *next;
141 };
142 static struct append_list *append_head=NULL, *append_tail=NULL;
143
144 #ifdef CONFIG_FEATURE_CLEAN_UP
145 static void free_and_close_stuff(void)
146 {
147         sed_cmd_t *sed_cmd = sed_cmd_head.next;
148
149         while(append_head) {
150                 append_tail=append_head->next;
151                 free(append_head->string);
152                 free(append_head);
153                 append_head=append_tail;
154         }
155
156         while (sed_cmd) {
157                 sed_cmd_t *sed_cmd_next = sed_cmd->next;
158
159                 if(sed_cmd->file)
160                         bb_xprint_and_close_file(sed_cmd->file);
161
162                 if (sed_cmd->beg_match) {
163                         regfree(sed_cmd->beg_match);
164                         free(sed_cmd->beg_match);
165                 }
166                 if (sed_cmd->end_match) {
167                         regfree(sed_cmd->end_match);
168                         free(sed_cmd->end_match);
169                 }
170                 if (sed_cmd->sub_match) {
171                         regfree(sed_cmd->sub_match);
172                         free(sed_cmd->sub_match);
173                 }
174                 free(sed_cmd->string);
175                 free(sed_cmd);
176                 sed_cmd = sed_cmd_next;
177         }
178
179         if(hold_space) free(hold_space);
180
181     while(current_input_file<input_file_count)
182                 fclose(input_file_list[current_input_file++]);
183 }
184 #endif
185
186 /* If something bad happens during -i operation, delete temp file */
187
188 static void cleanup_outname(void)
189 {
190   if(outname) unlink(outname);
191 }
192
193 /* strdup, replacing "\n" with '\n', and "\delimiter" with 'delimiter' */
194
195 static void parse_escapes(char *dest, const char *string, int len, char from, char to)
196 {
197         int i=0;
198
199         while(i<len) {
200                 if(string[i] == '\\') {
201                         if(!to || string[i+1] == from) {
202                                 *(dest++) = to ? to : string[i+1];
203                                 i+=2;
204                                 continue;
205                         } else *(dest++)=string[i++];
206                 }
207                 *(dest++) = string[i++];
208         }
209         *dest=0;
210 }
211
212 static char *copy_parsing_slashn(const char *string, int len)
213 {
214         char *dest=xmalloc(len+1);
215
216         parse_escapes(dest,string,len,'n','\n');
217         return dest;
218 }
219
220
221 /*
222  * index_of_next_unescaped_regexp_delim - walks left to right through a string
223  * beginning at a specified index and returns the index of the next regular
224  * expression delimiter (typically a forward * slash ('/')) not preceded by
225  * a backslash ('\').
226  */
227 static int index_of_next_unescaped_regexp_delim(const char delimiter,
228         const char *str)
229 {
230         int bracket = -1;
231         int escaped = 0;
232         int idx = 0;
233         char ch;
234
235         for (; (ch = str[idx]); idx++) {
236                 if (bracket != -1) {
237                         if (ch == ']' && !(bracket == idx - 1 || (bracket == idx - 2
238                                         && str[idx - 1] == '^')))
239                                 bracket = -1;
240                 } else if (escaped)
241                         escaped = 0;
242                 else if (ch == '\\')
243                         escaped = 1;
244                 else if (ch == '[')
245                         bracket = idx;
246                 else if (ch == delimiter)
247                         return idx;
248         }
249
250         /* if we make it to here, we've hit the end of the string */
251         return -1;
252 }
253
254 /*
255  *  Returns the index of the third delimiter
256  */
257 static int parse_regex_delim(const char *cmdstr, char **match, char **replace)
258 {
259         const char *cmdstr_ptr = cmdstr;
260         char delimiter;
261         int idx = 0;
262
263         /* verify that the 's' or 'y' is followed by something.  That something
264          * (typically a 'slash') is now our regexp delimiter... */
265         if (*cmdstr == '\0') bb_error_msg_and_die(bad_format_in_subst);
266         delimiter = *(cmdstr_ptr++);
267
268         /* save the match string */
269         idx = index_of_next_unescaped_regexp_delim(delimiter, cmdstr_ptr);
270         if (idx == -1) {
271                 bb_error_msg_and_die(bad_format_in_subst);
272         }
273         *match = copy_parsing_slashn(cmdstr_ptr, idx);
274
275         /* save the replacement string */
276         cmdstr_ptr += idx + 1;
277         idx = index_of_next_unescaped_regexp_delim(delimiter, cmdstr_ptr);
278         if (idx == -1) {
279                 bb_error_msg_and_die(bad_format_in_subst);
280         }
281         *replace = copy_parsing_slashn(cmdstr_ptr, idx);
282
283         return ((cmdstr_ptr - cmdstr) + idx);
284 }
285
286 /*
287  * returns the index in the string just past where the address ends.
288  */
289 static int get_address(char *my_str, int *linenum, regex_t ** regex)
290 {
291         char *pos = my_str;
292
293         if (isdigit(*my_str)) {
294                 *linenum = strtol(my_str, &pos, 10);
295                 /* endstr shouldnt ever equal NULL */
296         } else if (*my_str == '$') {
297                 *linenum = -1;
298                 pos++;
299         } else if (*my_str == '/' || *my_str == '\\') {
300                 int next;
301                 char delimiter;
302                 char *temp;
303
304                 if (*my_str == '\\') delimiter = *(++pos);
305                 else delimiter = '/';
306                 next = index_of_next_unescaped_regexp_delim(delimiter, ++pos);
307                 if (next == -1)
308                         bb_error_msg_and_die("unterminated match expression");
309
310                 temp=copy_parsing_slashn(pos,next);
311                 *regex = (regex_t *) xmalloc(sizeof(regex_t));
312                 xregcomp(*regex, temp, regex_type|REG_NEWLINE);
313                 free(temp);
314                 /* Move position to next character after last delimiter */
315                 pos+=(next+1);
316         }
317         return pos - my_str;
318 }
319
320 /* Grab a filename.  Whitespace at start is skipped, then goes to EOL. */
321 static int parse_file_cmd(sed_cmd_t * sed_cmd, const char *filecmdstr, char **retval)
322 {
323         int start = 0, idx, hack=0;
324
325         /* Skip whitespace, then grab filename to end of line */
326         while (isspace(filecmdstr[start])) start++;
327         idx=start;
328         while(filecmdstr[idx] && filecmdstr[idx]!='\n') idx++;
329         /* If lines glued together, put backslash back. */
330         if(filecmdstr[idx]=='\n') hack=1;
331         if(idx==start) bb_error_msg_and_die("Empty filename");
332         *retval = bb_xstrndup(filecmdstr+start, idx-start+hack+1);
333         if(hack) *(idx+*retval)='\\';
334
335         return idx;
336 }
337
338 static int parse_subst_cmd(sed_cmd_t * const sed_cmd, char *substr)
339 {
340         int cflags = regex_type;
341         char *match;
342         int idx = 0;
343
344         /*
345          * A substitution command should look something like this:
346          *    s/match/replace/ #gIpw
347          *    ||     |        |||
348          *    mandatory       optional
349          */
350         idx = parse_regex_delim(substr, &match, &sed_cmd->string);
351
352         /* determine the number of back references in the match string */
353         /* Note: we compute this here rather than in the do_subst_command()
354          * function to save processor time, at the expense of a little more memory
355          * (4 bits) per sed_cmd */
356
357         /* process the flags */
358
359         sed_cmd->which_match=1;
360         while (substr[++idx]) {
361                 /* Parse match number */
362                 if(isdigit(substr[idx])) {
363                         if(match[0]!='^') {
364                                 /* Match 0 treated as all, multiple matches we take the last one. */
365                                 char *pos=substr+idx;
366                                 sed_cmd->which_match=(unsigned short)strtol(substr+idx,&pos,10);
367                                 idx=pos-substr;
368                         }
369                         continue;
370                 }
371                 /* Skip spaces */
372                 if(isspace(substr[idx])) continue;
373
374                 switch (substr[idx]) {
375                         /* Replace all occurrences */
376                         case 'g':
377                                 if (match[0] != '^') sed_cmd->which_match = 0;
378                                 break;
379                         /* Print pattern space */
380                         case 'p':
381                                 sed_cmd->sub_p = 1;
382                                 break;
383                         case 'w':
384                         {
385                                 char *temp;
386                                 idx+=parse_file_cmd(sed_cmd,substr+idx,&temp);
387
388                                 break;
389                         }
390                         /* Ignore case (gnu exension) */
391                         case 'I':
392                                 cflags |= REG_ICASE;
393                                 break;
394                         case ';':
395                         case '}':
396                                 goto out;
397                         default:
398                                 bb_error_msg_and_die("bad option in substitution expression");
399                 }
400         }
401 out:
402         /* compile the match string into a regex */
403         if (*match != '\0') {
404                 /* If match is empty, we use last regex used at runtime */
405                 sed_cmd->sub_match = (regex_t *) xmalloc(sizeof(regex_t));
406                 xregcomp(sed_cmd->sub_match, match, cflags);
407         }
408         free(match);
409
410         return idx;
411 }
412
413 /*
414  *  Process the commands arguments
415  */
416 static char *parse_cmd_args(sed_cmd_t *sed_cmd, char *cmdstr)
417 {
418         /* handle (s)ubstitution command */
419         if (sed_cmd->cmd == 's') cmdstr += parse_subst_cmd(sed_cmd, cmdstr);
420         /* handle edit cmds: (a)ppend, (i)nsert, and (c)hange */
421         else if (strchr("aic", sed_cmd->cmd)) {
422                 if ((sed_cmd->end_line || sed_cmd->end_match) && sed_cmd->cmd != 'c')
423                         bb_error_msg_and_die
424                                 ("only a beginning address can be specified for edit commands");
425                 for(;;) {
426                         if(*cmdstr=='\n' || *cmdstr=='\\') {
427                                 cmdstr++;
428                                 break;
429                         } else if(isspace(*cmdstr)) cmdstr++;
430                         else break;
431                 }
432                 sed_cmd->string = bb_xstrdup(cmdstr);
433                 parse_escapes(sed_cmd->string,sed_cmd->string,strlen(cmdstr),0,0);
434                 cmdstr += strlen(cmdstr);
435         /* handle file cmds: (r)ead */
436         } else if(strchr("rw", sed_cmd->cmd)) {
437                 if (sed_cmd->end_line || sed_cmd->end_match)
438                         bb_error_msg_and_die("Command only uses one address");
439                 cmdstr += parse_file_cmd(sed_cmd, cmdstr, &sed_cmd->string);
440                 if(sed_cmd->cmd=='w')
441                         sed_cmd->file=bb_xfopen(sed_cmd->string,"w");
442         /* handle branch commands */
443         } else if (strchr(":btT", sed_cmd->cmd)) {
444                 int length;
445
446                 while(isspace(*cmdstr)) cmdstr++;
447                 length = strcspn(cmdstr, semicolon_whitespace);
448                 if (length) {
449                         sed_cmd->string = strndup(cmdstr, length);
450                         cmdstr += length;
451                 }
452         }
453         /* translation command */
454         else if (sed_cmd->cmd == 'y') {
455                 char *match, *replace;
456                 int i=cmdstr[0];
457
458                 cmdstr+=parse_regex_delim(cmdstr, &match, &replace)+1;
459                 /* \n already parsed, but \delimiter needs unescaping. */
460                 parse_escapes(match,match,strlen(match),i,i);
461                 parse_escapes(replace,replace,strlen(replace),i,i);
462
463                 sed_cmd->string = xcalloc(1, (strlen(match) + 1) * 2);
464                 for (i = 0; match[i] && replace[i]; i++) {
465                         sed_cmd->string[i * 2] = match[i];
466                         sed_cmd->string[(i * 2) + 1] = replace[i];
467                 }
468                 free(match);
469                 free(replace);
470         }
471         /* if it wasnt a single-letter command that takes no arguments
472          * then it must be an invalid command.
473          */
474         else if (strchr("dDgGhHlnNpPqx={}", sed_cmd->cmd) == 0) {
475                 bb_error_msg_and_die("Unsupported command %c", sed_cmd->cmd);
476         }
477
478         /* give back whatever's left over */
479         return (cmdstr);
480 }
481
482
483 /* Parse address+command sets, skipping comment lines. */
484
485 static void add_cmd(char *cmdstr)
486 {
487         static char *add_cmd_line=NULL;
488         sed_cmd_t *sed_cmd;
489         int temp;
490
491         /* Append this line to any unfinished line from last time. */
492         if(add_cmd_line) {
493                 int lastlen=strlen(add_cmd_line);
494                 char *tmp=xmalloc(lastlen+strlen(cmdstr)+2);
495
496                 memcpy(tmp,add_cmd_line,lastlen);
497                 tmp[lastlen]='\n';
498                 strcpy(tmp+lastlen+1,cmdstr);
499                 free(add_cmd_line);
500                 cmdstr=add_cmd_line=tmp;
501         } else add_cmd_line=NULL;
502
503         /* If this line ends with backslash, request next line. */
504         temp=strlen(cmdstr);
505         if(temp && cmdstr[temp-1]=='\\') {
506                 if(!add_cmd_line) add_cmd_line=strdup(cmdstr);
507                 add_cmd_line[temp-1]=0;
508                 return;
509         }
510
511         /* Loop parsing all commands in this line. */
512         while(*cmdstr) {
513                 /* Skip leading whitespace and semicolons */
514                 cmdstr += strspn(cmdstr, semicolon_whitespace);
515
516                 /* If no more commands, exit. */
517                 if(!*cmdstr) break;
518
519                 /* if this is a comment, jump past it and keep going */
520                 if (*cmdstr == '#') {
521                         /* "#n" is the same as using -n on the command line */
522                         if (cmdstr[1] == 'n') be_quiet++;
523                         if(!(cmdstr=strpbrk(cmdstr, "\n\r"))) break;
524                         continue;
525                 }
526
527                 /* parse the command
528                  * format is: [addr][,addr][!]cmd
529                  *            |----||-----||-|
530                  *            part1 part2  part3
531                  */
532
533                 sed_cmd = xcalloc(1, sizeof(sed_cmd_t));
534
535                 /* first part (if present) is an address: either a '$', a number or a /regex/ */
536                 cmdstr += get_address(cmdstr, &sed_cmd->beg_line, &sed_cmd->beg_match);
537
538                 /* second part (if present) will begin with a comma */
539                 if (*cmdstr == ',') {
540                         int idx;
541
542                         cmdstr++;
543                         idx = get_address(cmdstr, &sed_cmd->end_line, &sed_cmd->end_match);
544                         if (!idx) bb_error_msg_and_die("get_address: no address found in string\n");
545                         cmdstr += idx;
546                 }
547
548                 /* skip whitespace before the command */
549                 while (isspace(*cmdstr)) cmdstr++;
550
551                 /* Check for inversion flag */
552                 if (*cmdstr == '!') {
553                         sed_cmd->invert = 1;
554                         cmdstr++;
555
556                         /* skip whitespace before the command */
557                         while (isspace(*cmdstr)) cmdstr++;
558                 }
559
560                 /* last part (mandatory) will be a command */
561                 if (!*cmdstr) bb_error_msg_and_die("missing command");
562                 sed_cmd->cmd = *(cmdstr++);
563                 cmdstr = parse_cmd_args(sed_cmd, cmdstr);
564
565                 /* Add the command to the command array */
566                 sed_cmd_tail->next = sed_cmd;
567                 sed_cmd_tail = sed_cmd_tail->next;
568         }
569
570         /* If we glued multiple lines together, free the memory. */
571         if(add_cmd_line) {
572                 free(add_cmd_line);
573                 add_cmd_line=NULL;
574         }
575 }
576
577 /* Append to a string, reallocating memory as necessary. */
578
579 static struct pipeline {
580         char *buf;      /* Space to hold string */
581         int idx;        /* Space used */
582         int len;        /* Space allocated */
583 } pipeline;
584
585 #define PIPE_GROW 64
586
587 static void pipe_putc(char c)
588 {
589         if(pipeline.idx==pipeline.len) {
590                 pipeline.buf = xrealloc(pipeline.buf, pipeline.len + PIPE_GROW);
591                 pipeline.len+=PIPE_GROW;
592         }
593         pipeline.buf[pipeline.idx++] = (c);
594 }
595
596 static void do_subst_w_backrefs(const char *line, const char *replace)
597 {
598         int i,j;
599
600         /* go through the replacement string */
601         for (i = 0; replace[i]; i++) {
602                 /* if we find a backreference (\1, \2, etc.) print the backref'ed * text */
603                 if (replace[i] == '\\' && replace[i+1]>'0' && replace[i+1]<='9') {
604                         int backref=replace[++i]-'0';
605
606                         /* print out the text held in regmatch[backref] */
607                         if(regmatch[backref].rm_so != -1)
608                                 for (j = regmatch[backref].rm_so; j < regmatch[backref].rm_eo; j++)
609                                         pipe_putc(line[j]);
610                 }
611
612                 /* if we find a backslash escaped character, print the character */
613                 else if (replace[i] == '\\') pipe_putc(replace[++i]);
614
615                 /* if we find an unescaped '&' print out the whole matched text. */
616                 else if (replace[i] == '&')
617                         for (j = regmatch[0].rm_so; j < regmatch[0].rm_eo; j++)
618                                 pipe_putc(line[j]);
619                 /* Otherwise just output the character. */
620                 else pipe_putc(replace[i]);
621         }
622 }
623
624 static int do_subst_command(sed_cmd_t * sed_cmd, char **line)
625 {
626         char *oldline = *line;
627         int altered = 0;
628         int match_count=0;
629         regex_t *current_regex;
630
631         /* Handle empty regex. */
632         if (sed_cmd->sub_match == NULL) {
633                 current_regex = previous_regex_ptr;
634                 if(!current_regex)
635                         bb_error_msg_and_die("No previous regexp.");
636         } else previous_regex_ptr = current_regex = sed_cmd->sub_match;
637
638         /* Find the first match */
639         if(REG_NOMATCH==regexec(current_regex, oldline, 10, regmatch, 0))
640                 return 0;
641
642         /* Initialize temporary output buffer. */
643         pipeline.buf=xmalloc(PIPE_GROW);
644         pipeline.len=PIPE_GROW;
645         pipeline.idx=0;
646
647         /* Now loop through, substituting for matches */
648         do {
649                 int i;
650
651                 /* Work around bug in glibc regexec, demonstrated by:
652                    echo " a.b" | busybox sed 's [^ .]* x g'
653                    The match_count check is so not to break
654                    echo "hi" | busybox sed 's/^/!/g' */
655                 if(!regmatch[0].rm_so && !regmatch[0].rm_eo && match_count) {
656                         pipe_putc(*(oldline++));
657                         continue;
658                 }
659
660                 match_count++;
661
662                 /* If we aren't interested in this match, output old line to
663                    end of match and continue */
664                 if(sed_cmd->which_match && sed_cmd->which_match!=match_count) {
665                         for(i=0;i<regmatch[0].rm_eo;i++)
666                                 pipe_putc(oldline[i]);
667                         continue;
668                 }
669
670                 /* print everything before the match */
671                 for (i = 0; i < regmatch[0].rm_so; i++) pipe_putc(oldline[i]);
672
673                 /* then print the substitution string */
674                 do_subst_w_backrefs(oldline, sed_cmd->string);
675
676                 /* advance past the match */
677                 oldline += regmatch[0].rm_eo;
678                 /* flag that something has changed */
679                 altered++;
680
681                 /* if we're not doing this globally, get out now */
682                 if (sed_cmd->which_match) break;
683         } while (*oldline && (regexec(current_regex, oldline, 10, regmatch, 0) != REG_NOMATCH));
684
685         /* Copy rest of string into output pipeline */
686
687         while(*oldline) pipe_putc(*(oldline++));
688         pipe_putc(0);
689
690         free(*line);
691         *line = pipeline.buf;
692         return altered;
693 }
694
695 /* Set command pointer to point to this label.  (Does not handle null label.) */
696 static sed_cmd_t *branch_to(const char *label)
697 {
698         sed_cmd_t *sed_cmd;
699
700         for (sed_cmd = sed_cmd_head.next; sed_cmd; sed_cmd = sed_cmd->next) {
701                 if ((sed_cmd->cmd == ':') && (sed_cmd->string) && (strcmp(sed_cmd->string, label) == 0)) {
702                         return (sed_cmd);
703                 }
704         }
705         bb_error_msg_and_die("Can't find label for jump to `%s'", label);
706 }
707
708 /* Append copy of string to append buffer */
709 static void append(char *s)
710 {
711         struct append_list *temp=calloc(1,sizeof(struct append_list));
712
713         if(append_head)
714                 append_tail=(append_tail->next=temp);
715         else append_head=append_tail=temp;
716         temp->string=strdup(s);
717 }
718
719 static void flush_append(void)
720 {
721         /* Output appended lines. */
722         while(append_head) {
723                 fprintf(nonstdout,"%s\n",append_head->string);
724                 append_tail=append_head->next;
725                 free(append_head->string);
726                 free(append_head);
727                 append_head=append_tail;
728         }
729         append_head=append_tail=NULL;
730 }
731
732 static void add_input_file(FILE *file)
733 {
734         input_file_list=xrealloc(input_file_list,(input_file_count+1)*sizeof(FILE *));
735         input_file_list[input_file_count++]=file;
736 }
737
738 /* Get next line of input from input_file_list, flushing append buffer and
739  * noting if we ran out of files without a newline on the last line we read.
740  */
741 static char *get_next_line(int *no_newline)
742 {
743         char *temp=NULL;
744         int len;
745
746         flush_append();
747         while(current_input_file<input_file_count) {
748                 temp=bb_get_line_from_file(input_file_list[current_input_file]);
749                 if(temp) {
750                         len=strlen(temp);
751                         *no_newline=!(len && temp[len-1]=='\n');
752                         if(!*no_newline) temp[len-1]=0;
753                         break;
754                 } else fclose(input_file_list[current_input_file++]);
755         }
756
757         return temp;
758 }
759
760 /* Output line of text.  missing_newline means the last line output did not
761    end with a newline.  no_newline means this line does not end with a
762    newline. */
763
764 static int puts_maybe_newline(char *s, FILE *file, int missing_newline, int no_newline)
765 {
766         if(missing_newline) fputc('\n',file);
767         fputs(s,file);
768         if(!no_newline) fputc('\n',file);
769
770     if(ferror(file)) {
771                 fprintf(stderr,"Write failed.\n");
772                 exit(4);  /* It's what gnu sed exits with... */
773         }
774
775         return no_newline;
776 }
777
778 #define sed_puts(s,n) missing_newline=puts_maybe_newline(s,nonstdout,missing_newline,n)
779
780 static void process_files(void)
781 {
782         char *pattern_space, *next_line;
783         int linenum = 0, missing_newline=0;
784         int no_newline,next_no_newline=0;
785
786         next_line = get_next_line(&next_no_newline);
787
788         /* go through every line in each file */
789         for(;;) {
790                 sed_cmd_t *sed_cmd;
791                 int substituted=0;
792
793                 /* Advance to next line.  Stop if out of lines. */
794                 if(!(pattern_space=next_line)) break;
795                 no_newline=next_no_newline;
796
797                 /* Read one line in advance so we can act on the last line, the '$' address */
798                 next_line = get_next_line(&next_no_newline);
799                 linenum++;
800 restart:
801                 /* for every line, go through all the commands */
802                 for (sed_cmd = sed_cmd_head.next; sed_cmd; sed_cmd = sed_cmd->next) {
803                         int old_matched, matched;
804
805                         old_matched = sed_cmd->in_match;
806
807                         /* Determine if this command matches this line: */
808
809                         /* Are we continuing a previous multi-line match? */
810
811                         sed_cmd->in_match = sed_cmd->in_match
812
813                         /* Or is no range necessary? */
814                                 || (!sed_cmd->beg_line && !sed_cmd->end_line
815                                         && !sed_cmd->beg_match && !sed_cmd->end_match)
816
817                         /* Or did we match the start of a numerical range? */
818                                 || (sed_cmd->beg_line > 0 && (sed_cmd->beg_line == linenum))
819
820                         /* Or does this line match our begin address regex? */
821                                 || (sed_cmd->beg_match &&
822                                     !regexec(sed_cmd->beg_match, pattern_space, 0, NULL, 0))
823
824                         /* Or did we match last line of input? */
825                                 || (sed_cmd->beg_line == -1 && next_line == NULL);
826
827                         /* Snapshot the value */
828
829                         matched = sed_cmd->in_match;
830
831                         /* Is this line the end of the current match? */
832
833                         if(matched) {
834                                 sed_cmd->in_match = !(
835                                         /* has the ending line come, or is this a single address command? */
836                                         (sed_cmd->end_line ?
837                                                 sed_cmd->end_line==-1 ?
838                                                         !next_line
839                                                         : sed_cmd->end_line<=linenum
840                                                 : !sed_cmd->end_match)
841                                         /* or does this line matches our last address regex */
842                                         || (sed_cmd->end_match && old_matched && (regexec(sed_cmd->end_match, pattern_space, 0, NULL, 0) == 0))
843                                 );
844                         }
845
846                         /* Skip blocks of commands we didn't match. */
847                         if (sed_cmd->cmd == '{') {
848                                 if(sed_cmd->invert ? matched : !matched)
849                                         while(sed_cmd && sed_cmd->cmd!='}') sed_cmd=sed_cmd->next;
850                                 if(!sed_cmd) bb_error_msg_and_die("Unterminated {");
851                                 continue;
852                         }
853
854                         /* Okay, so did this line match? */
855                         if (sed_cmd->invert ? !matched : matched) {
856                                 /* Update last used regex in case a blank substitute BRE is found */
857                                 if (sed_cmd->beg_match) {
858                                         previous_regex_ptr = sed_cmd->beg_match;
859                                 }
860
861                                 /* actual sedding */
862                                 switch (sed_cmd->cmd) {
863
864                                         /* Print line number */
865                                         case '=':
866                                                 fprintf(nonstdout,"%d\n", linenum);
867                                                 break;
868
869                                         /* Write the current pattern space up to the first newline */
870                                         case 'P':
871                                         {
872                                                 char *tmp = strchr(pattern_space, '\n');
873
874                                                 if (tmp) {
875                                                         *tmp = '\0';
876                                                         sed_puts(pattern_space,1);
877                                                         *tmp = '\n';
878                                                         break;
879                                                 }
880                                                 /* Fall Through */
881                                         }
882
883                                         /* Write the current pattern space to output */
884                                         case 'p':
885                                                 sed_puts(pattern_space,no_newline);
886                                                 break;
887                                         /* Delete up through first newline */
888                                         case 'D':
889                                         {
890                                                 char *tmp = strchr(pattern_space,'\n');
891
892                                                 if(tmp) {
893                                                         tmp=bb_xstrdup(tmp+1);
894                                                         free(pattern_space);
895                                                         pattern_space=tmp;
896                                                         goto restart;
897                                                 }
898                                         }
899                                         /* discard this line. */
900                                         case 'd':
901                                                 goto discard_line;
902
903                                         /* Substitute with regex */
904                                         case 's':
905                                                 if(do_subst_command(sed_cmd, &pattern_space)) {
906                                                         substituted|=1;
907
908                                                         /* handle p option */
909                                                         if(sed_cmd->sub_p)
910                                                                 sed_puts(pattern_space,no_newline);
911                                                         /* handle w option */
912                                                         if(sed_cmd->file)
913                                                                 sed_cmd->no_newline=puts_maybe_newline(pattern_space, sed_cmd->file, sed_cmd->no_newline, no_newline);
914
915                                                 }
916                                                 break;
917
918                                         /* Append line to linked list to be printed later */
919                                         case 'a':
920                                         {
921                                                 append(sed_cmd->string);
922                                                 break;
923                                         }
924
925                                         /* Insert text before this line */
926                                         case 'i':
927                                                 sed_puts(sed_cmd->string,1);
928                                                 break;
929
930                                         /* Cut and paste text (replace) */
931                                         case 'c':
932                                                 /* Only triggers on last line of a matching range. */
933                                                 if (!sed_cmd->in_match) sed_puts(sed_cmd->string,0);
934                                                 goto discard_line;
935
936                                         /* Read file, append contents to output */
937                                         case 'r':
938                                         {
939                                                 FILE *outfile;
940
941                                                 outfile = fopen(sed_cmd->string, "r");
942                                                 if (outfile) {
943                                                         char *line;
944
945                                                         while ((line = bb_get_chomped_line_from_file(outfile))
946                                                                         != NULL)
947                                                                 append(line);
948                                                         bb_xprint_and_close_file(outfile);
949                                                 }
950
951                                                 break;
952                                         }
953
954                                         /* Write pattern space to file. */
955                                         case 'w':
956                                                 sed_cmd->no_newline=puts_maybe_newline(pattern_space,sed_cmd->file, sed_cmd->no_newline,no_newline);
957                                                 break;
958
959                                         /* Read next line from input */
960                                         case 'n':
961                                                 if (!be_quiet)
962                                                         sed_puts(pattern_space,no_newline);
963                                                 if (next_line) {
964                                                         free(pattern_space);
965                                                         pattern_space = next_line;
966                                                         no_newline=next_no_newline;
967                                                         next_line = get_next_line(&next_no_newline);
968                                                         linenum++;
969                                                         break;
970                                                 }
971                                                 /* fall through */
972
973                                         /* Quit.  End of script, end of input. */
974                                         case 'q':
975                                                 /* Exit the outer while loop */
976                                                 free(next_line);
977                                                 next_line = NULL;
978                                                 goto discard_commands;
979
980                                         /* Append the next line to the current line */
981                                         case 'N':
982                                         {
983                                                 /* If no next line, jump to end of script and exit. */
984                                                 if (next_line == NULL) {
985                                                         /* Jump to end of script and exit */
986                                                         free(next_line);
987                                                         next_line = NULL;
988                                                         goto discard_line;
989                                                 /* append next_line, read new next_line. */
990                                                 } else {
991                                                         int len=strlen(pattern_space);
992
993                                                         pattern_space = realloc(pattern_space, len + strlen(next_line) + 2);
994                                                         pattern_space[len]='\n';
995                                                         strcpy(pattern_space+len+1, next_line);
996                                                         no_newline=next_no_newline;
997                                                         next_line = get_next_line(&next_no_newline);
998                                                         linenum++;
999                                                 }
1000                                                 break;
1001                                         }
1002
1003                                         /* Test/branch if substitution occurred */
1004                                         case 't':
1005                                                 if(!substituted) break;
1006                                                 substituted=0;
1007                                                 /* Fall through */
1008                                         /* Test/branch if substitution didn't occur */
1009                                         case 'T':
1010                                                 if (substituted) break;
1011                                                 /* Fall through */
1012                                         /* Branch to label */
1013                                         case 'b':
1014                                                 if (!sed_cmd->string) goto discard_commands;
1015                                                 else sed_cmd = branch_to(sed_cmd->string);
1016                                                 break;
1017                                         /* Transliterate characters */
1018                                         case 'y':
1019                                         {
1020                                                 int i;
1021
1022                                                 for (i = 0; pattern_space[i]; i++) {
1023                                                         int j;
1024
1025                                                         for (j = 0; sed_cmd->string[j]; j += 2) {
1026                                                                 if (pattern_space[i] == sed_cmd->string[j]) {
1027                                                                         pattern_space[i] = sed_cmd->string[j + 1];
1028                                                                 }
1029                                                         }
1030                                                 }
1031
1032                                                 break;
1033                                         }
1034                                         case 'g':       /* Replace pattern space with hold space */
1035                                                 free(pattern_space);
1036                                                 pattern_space = strdup(hold_space ? hold_space : "");
1037                                                 break;
1038                                         case 'G':       /* Append newline and hold space to pattern space */
1039                                         {
1040                                                 int pattern_space_size = 2;
1041                                                 int hold_space_size = 0;
1042
1043                                                 if (pattern_space)
1044                                                         pattern_space_size += strlen(pattern_space);
1045                                                 if (hold_space) hold_space_size = strlen(hold_space);
1046                                                 pattern_space = xrealloc(pattern_space, pattern_space_size + hold_space_size);
1047                                                 if (pattern_space_size == 2) pattern_space[0]=0;
1048                                                 strcat(pattern_space, "\n");
1049                                                 if (hold_space) strcat(pattern_space, hold_space);
1050                                                 no_newline=0;
1051
1052                                                 break;
1053                                         }
1054                                         case 'h':       /* Replace hold space with pattern space */
1055                                                 free(hold_space);
1056                                                 hold_space = strdup(pattern_space);
1057                                                 break;
1058                                         case 'H':       /* Append newline and pattern space to hold space */
1059                                         {
1060                                                 int hold_space_size = 2;
1061                                                 int pattern_space_size = 0;
1062
1063                                                 if (hold_space) hold_space_size += strlen(hold_space);
1064                                                 if (pattern_space)
1065                                                         pattern_space_size = strlen(pattern_space);
1066                                                 hold_space = xrealloc(hold_space,
1067                                                                                         hold_space_size + pattern_space_size);
1068
1069                                                 if (hold_space_size == 2) hold_space[0]=0;
1070                                                 strcat(hold_space, "\n");
1071                                                 if (pattern_space) strcat(hold_space, pattern_space);
1072
1073                                                 break;
1074                                         }
1075                                         case 'x': /* Exchange hold and pattern space */
1076                                         {
1077                                                 char *tmp = pattern_space;
1078                                                 pattern_space = hold_space;
1079                                                 no_newline=0;
1080                                                 hold_space = tmp;
1081                                                 break;
1082                                         }
1083                                 }
1084                         }
1085                 }
1086
1087                 /*
1088                  * exit point from sedding...
1089                  */
1090 discard_commands:
1091                 /* we will print the line unless we were told to be quiet ('-n')
1092                    or if the line was suppressed (ala 'd'elete) */
1093                 if (!be_quiet) sed_puts(pattern_space,no_newline);
1094
1095                 /* Delete and such jump here. */
1096 discard_line:
1097                 flush_append();
1098                 free(pattern_space);
1099         }
1100 }
1101
1102 /* It is possible to have a command line argument with embedded
1103    newlines.  This counts as multiple command lines. */
1104
1105 static void add_cmd_block(char *cmdstr)
1106 {
1107         int go=1;
1108         char *temp=bb_xstrdup(cmdstr),*temp2=temp;
1109
1110         while(go) {
1111                 int len=strcspn(temp2,"\n");
1112                 if(!temp2[len]) go=0;
1113                 else temp2[len]=0;
1114                 add_cmd(temp2);
1115                 temp2+=len+1;
1116         }
1117         free(temp);
1118 }
1119
1120 extern int sed_main(int argc, char **argv)
1121 {
1122         int status = EXIT_SUCCESS, opt, getpat = 1;
1123
1124 #ifdef CONFIG_FEATURE_CLEAN_UP
1125         /* destroy command strings on exit */
1126         if (atexit(free_and_close_stuff) == -1)
1127                 bb_perror_msg_and_die("atexit");
1128 #endif
1129
1130 #define LIE_TO_AUTOCONF
1131 #ifdef LIE_TO_AUTOCONF
1132         if(argc==2 && !strcmp(argv[1],"--version")) {
1133                 printf("This is not GNU sed version 4.0\n");
1134                 exit(0);
1135         }
1136 #endif
1137
1138         /* do normal option parsing */
1139         while ((opt = getopt(argc, argv, "irne:f:")) > 0) {
1140                 switch (opt) {
1141                 case 'i':
1142                         in_place++;
1143                         atexit(cleanup_outname);
1144                         break;
1145                 case 'r':
1146                         regex_type|=REG_EXTENDED;
1147                         break;
1148                 case 'n':
1149                         be_quiet++;
1150                         break;
1151                 case 'e':
1152                         add_cmd_block(optarg);
1153                         getpat=0;
1154                         break;
1155                 case 'f':
1156                 {
1157                         FILE *cmdfile;
1158                         char *line;
1159
1160                         cmdfile = bb_xfopen(optarg, "r");
1161
1162                         while ((line = bb_get_chomped_line_from_file(cmdfile))
1163                                  != NULL) {
1164                                 add_cmd(line);
1165                                 getpat=0;
1166                                 free(line);
1167                         }
1168                         bb_xprint_and_close_file(cmdfile);
1169
1170                         break;
1171                 }
1172                 default:
1173                         bb_show_usage();
1174                 }
1175         }
1176
1177         /* if we didn't get a pattern from -e or -f, use argv[optind] */
1178         if(getpat) {
1179                 if (argv[optind] == NULL)
1180                         bb_show_usage();
1181                 else
1182                         add_cmd_block(argv[optind++]);
1183         }
1184         /* Flush any unfinished commands. */
1185         add_cmd("");
1186
1187         /* By default, we write to stdout */
1188         nonstdout=stdout;
1189
1190         /* argv[(optind)..(argc-1)] should be names of file to process. If no
1191          * files were specified or '-' was specified, take input from stdin.
1192          * Otherwise, we process all the files specified. */
1193         if (argv[optind] == NULL) {
1194                 if(in_place) bb_error_msg_and_die("Filename required for -i");
1195                 add_input_file(stdin);
1196                 process_files();
1197         } else {
1198                 int i;
1199                 FILE *file;
1200
1201                 for (i = optind; i < argc; i++) {
1202                         if(!strcmp(argv[i], "-") && !in_place) {
1203                                 add_input_file(stdin);
1204                                 process_files();
1205                         } else {
1206                                 file = bb_wfopen(argv[i], "r");
1207                                 if (file) {
1208                                         if(in_place) {
1209                                                 struct stat statbuf;
1210                                                 int nonstdoutfd;
1211                                                 
1212                                                 outname=bb_xstrndup(argv[i],strlen(argv[i])+6);
1213                                                 strcat(outname,"XXXXXX");
1214                                                 if(-1==(nonstdoutfd=mkstemp(outname)))
1215                                                         bb_error_msg_and_die("no temp file");
1216                                                 nonstdout=fdopen(nonstdoutfd,"w");
1217                                                 /* Set permissions of output file */
1218                                                 fstat(fileno(file),&statbuf);
1219                                                 fchmod(nonstdoutfd,statbuf.st_mode);
1220                                                 add_input_file(file);
1221                                                 process_files();
1222                                                 fclose(nonstdout);
1223                                                 nonstdout=stdout;
1224                                                 unlink(argv[i]);
1225                                                 rename(outname,argv[i]);
1226                                                 free(outname);
1227                                                 outname=0;
1228                                         } else add_input_file(file);
1229                                 } else {
1230                                         status = EXIT_FAILURE;
1231                                 }
1232                         }
1233                 }
1234                 if(input_file_count>current_input_file) process_files();
1235         }
1236
1237         return status;
1238 }