Home
       ical: improve and simplify line parsing - ics2txt - convert icalendar .ics file to plain text
  HTML git clone git://bitreich.org/ics2txt git://enlrupgkhuxnvlhsf6lc3fziv5h2hhfrinws65d7roiv6bfj7d652fid.onion/ics2txt
   DIR Log
   DIR Files
   DIR Refs
   DIR Tags
   DIR README
       ---
   DIR commit 92a5d0067b717710eb607c0465a8a60d4b4c8655
   DIR parent d10df705caaa2ca4e3229af6d5ec76e0f0d301da
  HTML Author: Josuah Demangeon <me@josuah.net>
       Date:   Wed, 16 Jun 2021 23:13:22 +0200
       
       ical: improve and simplify line parsing
       
       Diffstat:
         M Makefile                            |       2 +-
         M bin/ics2tsv                         |       2 +-
         M ical.c                              |     152 +++++++++++++++++++------------
         M ical.h                              |      25 ++++++++++++++++++-------
         M ics2tree.c                          |       8 ++++----
         A ics2tsv.c                           |      99 +++++++++++++++++++++++++++++++
         M tcal.5                              |      11 +++++------
       
       7 files changed, 220 insertions(+), 79 deletions(-)
       ---
   DIR diff --git a/Makefile b/Makefile
       @@ -10,7 +10,7 @@ MANPREFIX = ${PREFIX}/man
        SRC = ical.c base64.c util.c
        HDR = ical.h base64.h util.h
        OBJ = ${SRC:.c=.o}
       -BIN = ics2tree
       +BIN = ics2tree ics2tsv
        MAN1 = ics2txt.1
        MAN5 = tcal.5
        
   DIR diff --git a/bin/ics2tsv b/bin/ics2tsv
       @@ -126,7 +126,7 @@ sub("^ ", "") {
                        next
        
                if (content["name"] == "TZID") {
       -                ical_set_tzid(content["value"])
       +                ical_set_tz(content["value"])
                } else if (DT[content["name"]]) {
                        vevent[content["name"]] = ical_to_epoch(content, params)
                } else {
   DIR diff --git a/ical.c b/ical.c
       @@ -11,14 +11,20 @@
        #include "util.h"
        #include "base64.h"
        
       -#define Xstrlcpy(d, s) (strlcpy((d), (s), sizeof(d)) < sizeof(d))
       -#define Xstrlcat(d, s) (strlcat((d), (s), sizeof(d)) < sizeof(d))
       -
       -/* helpers: common utilities to call within the p->fn() callbacks as
       - * well as in the code below */
       +char *ical_block_name[ICAL_BLOCK_OTHER + 1] = {
       +        [ICAL_BLOCK_VEVENT]        = "VEVENT",
       +        [ICAL_BLOCK_VTODO]        = "VTODO",
       +        [ICAL_BLOCK_VJOURNAL]        = "VJOURNAL",
       +        [ICAL_BLOCK_VFREEBUSY]        = "VFREEBUSY",
       +        [ICAL_BLOCK_VALARM]        = "VALARM",
       +        [ICAL_BLOCK_OTHER]        = NULL,
       +};
       +
       +/* valuel helpers: common utilities to call within the p->fn()
       + * callbacks as well as in the code below */
        
        int
       -ical_error(IcalParser *p, char const *msg)
       +ical_err(IcalParser *p, char *msg)
        {
                p->errmsg = msg;
                return -1;
       @@ -36,7 +42,7 @@ ical_get_value(IcalParser *p, char *s, size_t *len)
                *len = strlen(s);
                if (p->base64)
                        if (base64_decode(s, len, s, len) < 0)
       -                        return ical_error(p, "invalid base64 data");
       +                        return ical_err(p, "invalid base64 data");
                return 0;
        }
        
       @@ -55,7 +61,7 @@ ical_get_time(IcalParser *p, char *s, time_t *t)
                /* date */
                for (int i = 0; i < 8; i++)
                        if (!isdigit(s[i]))
       -                        return ical_error(p, "invalid date format");
       +                        return ical_err(p, "invalid date format");
                tm.tm_year = N(0,1000) + N(1,100) + N(2,10) + N(3,1) - 1900;
                tm.tm_mon = N(4,10) + N(5,1) - 1;
                tm.tm_mday = N(6,10) + N(7,1);
       @@ -66,7 +72,7 @@ ical_get_time(IcalParser *p, char *s, time_t *t)
                        s++;
                        for (int i = 0; i < 6; i++)
                                if (!isdigit(s[i]))
       -                                return ical_error(p, "invalid time format");
       +                                return ical_err(p, "invalid time format");
                        tm.tm_hour = N(0,10) + N(1,1);
                        tm.tm_min = N(2,10) + N(3,1);
                        tm.tm_sec = N(4,10) + N(5,1);
       @@ -74,8 +80,10 @@ ical_get_time(IcalParser *p, char *s, time_t *t)
                                tzid = "UTC";
                }
        
       +#undef N
       +
                if ((*t = tztime(&tm, tzid)) == (time_t)-1)
       -                return ical_error(p, "could not convert time");
       +                return ical_err(p, "could not convert time");
        
                return 0;
        }
       @@ -84,21 +92,21 @@ ical_get_time(IcalParser *p, char *s, time_t *t)
         * processing time zones definition or prepare base64 decoding, and
         * permit to only have parsing code left to parsing functions */
        
       -int
       +static int
        hook_entry_name(IcalParser *p, char *name)
        {
                (void)p; (void)name;
                return 0;
        }
        
       -int
       +static int
        hook_param_name(IcalParser *p, char *name)
        {
                (void)p; (void)name;
                return 0;
        }
        
       -int
       +static int
        hook_param_value(IcalParser *p, char *name, char *value)
        {
                if (strcasecmp(name, "ENCODING") == 0)
       @@ -110,38 +118,53 @@ hook_param_value(IcalParser *p, char *name, char *value)
                return 0;
        }
        
       -int
       +static int
        hook_entry_value(IcalParser *p, char *name, char *value)
        {
                if (strcasecmp(name, "TZID") == 0)
       -                if (!Xstrlcpy(p->current->tzid, value))
       -                        return ical_error(p, "TZID: name too large");
       +                if (strlcpy(p->current->tzid, value, sizeof p->current->tzid) >=
       +                    sizeof p->current->tzid)
       +                        return ical_err(p, "TZID: name too large");
        
                p->tzid = NULL;
        
                return 0;
        }
        
       -int
       +static int
        hook_block_begin(IcalParser *p, char *name)
        {
                p->current++;
                memset(p->current, 0, sizeof(*p->current));
                if (ical_get_level(p) >= ICAL_STACK_SIZE)
       -                return ical_error(p, "max recurion reached");
       -        if (!Xstrlcpy(p->current->name, name))
       -                return ical_error(p, "value too large");
       +                return ical_err(p, "max recurion reached");
       +        if (strlcpy(p->current->name, name, sizeof p->current->name) >=
       +            sizeof p->current->name)
       +                return ical_err(p, "value too large");
       +
       +        for (int i = 0; ical_block_name[i] != NULL; i++) {
       +                if (strcasecmp(ical_block_name[i], name) == 0) {
       +                        if (p->block != ICAL_BLOCK_OTHER)
       +                                return ical_err(p, "BEGIN:V* in BEGIN:V*");
       +                        p->block = i;
       +                }
       +        }
       +
                return 0;
        }
        
       -int
       +static int
        hook_block_end(IcalParser *p, char *name)
        {
                if (strcasecmp(p->current->name, name) != 0)
       -                return ical_error(p, "mismatching BEGIN: and END:");
       +                return ical_err(p, "mismatching BEGIN: and END:");
                p->current--;
                if (p->current < p->stack)
       -                return ical_error(p, "more END: than BEGIN:");
       +                return ical_err(p, "more END: than BEGIN:");
       +
       +        if (ical_block_name[p->block] != NULL &&
       +            strcasecmp(ical_block_name[p->block], name) == 0)
       +                p->block = ICAL_BLOCK_OTHER;
                return 0;
        }
        
       @@ -162,7 +185,7 @@ ical_parse_value(IcalParser *p, char **sp, char *name)
                        while (!iscntrl(*s) && *s != '"')
                                s++;
                        if (*s != '"')
       -                        return ical_error(p, "missing '\"'");
       +                        return ical_err(p, "missing '\"'");
                        *s++ = '\0';
                } else {
                        val = s;
       @@ -188,7 +211,7 @@ ical_parse_param(IcalParser *p, char **sp)
                do {
                        for (name = s; isalnum(*s) || *s == '-'; s++);
                        if (s == name || (*s != '='))
       -                        return ical_error(p, "invalid parameter name");
       +                        return ical_err(p, "invalid parameter name");
                        *s++ = '\0';
                        if ((err = hook_param_name(p, name)) != 0 ||
                            (err = CALL(p, fn_param_name, name)) != 0)
       @@ -208,9 +231,12 @@ ical_parse_contentline(IcalParser *p, char *s)
                int err;
                char c, *name, *sep;
        
       +        if (*s == '\0')
       +                return 0;
       +
                for (name = s; isalnum(*s) || *s == '-'; s++);
                if (s == name || (*s != ';' && *s != ':'))
       -                return ical_error(p, "invalid entry name");
       +                return ical_err(p, "invalid property name");
                c = *s, *s = '\0';
                if (strcasecmp(name, "BEGIN") != 0 && strcasecmp(name, "END") != 0)
                        if ((err = hook_entry_name(p, name)) != 0 ||
       @@ -227,7 +253,7 @@ ical_parse_contentline(IcalParser *p, char *s)
                }
        
                if (*s != ':')
       -                return ical_error(p, "expected ':' delimiter");
       +                return ical_err(p, "expected ':' delimiter");
                s++;
        
                *sep = '\0';
       @@ -247,47 +273,53 @@ ical_parse_contentline(IcalParser *p, char *s)
                return 0;
        }
        
       +static ssize_t
       +ical_getline(char **contentline, char **line, size_t *sz, FILE *fp)
       +{
       +        size_t num = 0;
       +        int c;
       +
       +        if ((*contentline = realloc(*contentline, 1)) == NULL)
       +                return -1;
       +        **contentline = '\0';
       +
       +        do {
       +                if (getline(line, sz, fp) <= 0)
       +                        goto end;
       +                num++;
       +                strchomp(*line);
       +
       +                if (strappend(contentline, *line) < 0)
       +                        return -1;
       +                if ((c = fgetc(fp)) == EOF)
       +                        goto end;
       +        } while (c == ' ');
       +        ungetc(c, fp);
       +        assert(!ferror(fp));
       +end:
       +        return ferror(fp) ? -1 : num;
       +}
       +
        int
        ical_parse(IcalParser *p, FILE *fp)
        {
       -        char *ln = NULL, *contentline = NULL;
       +        char *line = NULL, *contentline = NULL;
                size_t sz = 0;
       -        int err, c;
       +        ssize_t l;
       +        int err;
        
                p->current = p->stack;
       +        p->linenum = 0;
       +        p->block = ICAL_BLOCK_OTHER;
        
       -        while (!feof(fp)) {
       -                if ((contentline = realloc(contentline, 1)) == NULL)
       -                        return ical_error(p, strerror(errno));
       -                *contentline = '\0';
       -
       -                do {
       -                        do {
       -                                p->linenum++;
       -                                if (getline(&ln, &sz, fp) <= 0) {
       -                                        if (ferror(fp))
       -                                                return ical_error(p, strerror(errno));
       -                                        goto end;
       -                                }
       -                                strchomp(ln);
       -                        } while (*ln == '\0');
       -
       -                        if (strappend(&contentline, ln) < 0)
       -                                return ical_error(p, strerror(errno));
       -                        if ((c = fgetc(fp)) == EOF) {
       -                                if (ferror(fp))
       -                                        return ical_error(p, strerror(errno));
       -                                goto done;
       -                        }
       -                } while (c == ' ');
       -                ungetc(c, fp);
       -done:
       -                assert(!ferror(fp));
       -                if ((err = ical_parse_contentline(p, contentline)) != 0)
       +        do {
       +                if ((l = ical_getline(&contentline, &line, &sz, fp)) < 0) {
       +                        err = ical_err(p, "readling line");
                                break;
       -        }
       -end:
       +                }
       +                p->linenum += l;
       +        } while        (l > 0 && (err = ical_parse_contentline(p, contentline)) == 0);
                free(contentline);
       -        free(ln);
       +        free(line);
                return err;
        }
   DIR diff --git a/ical.h b/ical.h
       @@ -9,6 +9,15 @@
        typedef struct IcalParser IcalParser;
        typedef struct IcalStack IcalStack;
        
       +typedef enum {
       +        ICAL_BLOCK_VEVENT,
       +        ICAL_BLOCK_VTODO,
       +        ICAL_BLOCK_VJOURNAL,
       +        ICAL_BLOCK_VFREEBUSY,
       +        ICAL_BLOCK_VALARM,
       +        ICAL_BLOCK_OTHER,
       +} IcalBlock;
       +
        struct IcalStack {
                char         name[32];
                char         tzid[32];
       @@ -25,17 +34,19 @@ struct IcalParser {
                /* if returning non-zero then halt the parser */
        
                int         base64;
       -        char const *errmsg;
       +        char        *errmsg;
                size_t         linenum;
                char        *tzid;
       -
       +        IcalBlock block;
                IcalStack stack[ICAL_STACK_SIZE], *current;
        };
        
       -int        ical_parse(IcalParser *, FILE *);
       -int        ical_get_level(IcalParser *);
       -int        ical_get_time(IcalParser *, char *, time_t *);
       -int        ical_get_value(IcalParser *, char *, size_t *);
       -int        ical_error(IcalParser *, char const *);
       +extern char *ical_block_name[ICAL_BLOCK_OTHER + 1];
       +
       +int         ical_parse(IcalParser *, FILE *);
       +int         ical_get_level(IcalParser *);
       +int         ical_get_time(IcalParser *, char *, time_t *);
       +int         ical_get_value(IcalParser *, char *, size_t *);
       +int         ical_err(IcalParser *, char *);
        
        #endif
   DIR diff --git a/ics2tree.c b/ics2tree.c
       @@ -18,6 +18,7 @@ fn_entry_name(IcalParser *p, char *name)
        {
                print_ruler(ical_get_level(p));
                printf("name %s\n", name);
       +        fflush(stdout);
                return 0;
        }
        
       @@ -26,6 +27,7 @@ fn_block_begin(IcalParser *p, char *name)
        {
                print_ruler(ical_get_level(p) - 1);
                printf("begin %s\n", name);
       +        fflush(stdout);
                return 0;
        }
        
       @@ -34,6 +36,7 @@ fn_param_value(IcalParser *p, char *name, char *value)
        {
                print_ruler(ical_get_level(p) + 1);
                printf("param %s=%s\n", name, value);
       +        fflush(stdout);
                return 0;
        }
        
       @@ -45,21 +48,18 @@ fn_entry_value(IcalParser *p, char *name, char *value)
        
                if (ical_get_value(p, value, &len) < 0)
                        return -1;
       -
                print_ruler(ical_get_level(p) + 1);
       -
                if (strcasecmp(name, "DTSTART") == 0 ||
                    strcasecmp(name, "DTSTAMP") == 0 ||
                    strcasecmp(name, "DTEND") == 0) {
                        time_t t;
       -
                        if (ical_get_time(p, value, &t) != 0)
                                warn("%s: %s", p->errmsg, value);
                        printf("epoch %lld\n", t);
                } else {        
                        printf("value %s\n", value);
                }
       -
       +        fflush(stdout);
                return 0;
        }
        
   DIR diff --git a/ics2tsv.c b/ics2tsv.c
       @@ -0,0 +1,99 @@
       +#include <stdio.h>
       +#include <stdlib.h>
       +#include <string.h>
       +#include <strings.h>
       +
       +#include "ical.h"
       +#include "util.h"
       +
       +#define FIELDS_MAX 64
       +
       +typedef struct Event Event;
       +
       +struct Event {
       +        time_t beg, end;
       +        char *fields[FIELDS_MAX];
       +};
       +
       +static char *fields_time[] = {
       +        "DTSTART", "DTEND", "DTSTAMP", "DUE", "EXDATE", "RDATE"
       +};
       +
       +static char *fields_default[] = {
       +        "ATTENDEE", "CATEGORY", "DESCRIPTION", "LOCATION", "SUMMARY", "URL"
       +};
       +
       +static char **fields = fields_default;
       +
       +static int
       +fn_entry_name(IcalParser *p, char *name)
       +{
       +        printf("name %s\n", name);
       +        return 0;
       +}
       +
       +static int
       +fn_block_begin(IcalParser *p, char *name)
       +{
       +        printf("begin %s\n", name);
       +        return 0;
       +}
       +
       +static int
       +fn_param_value(IcalParser *p, char *name, char *value)
       +{
       +        printf("param %s=%s\n", name, value);
       +        return 0;
       +}
       +
       +static int
       +fn_entry_value(IcalParser *p, char *name, char *value)
       +{
       +        size_t len;
       +        (void)name;
       +
       +        if (ical_get_value(p, value, &len) < 0)
       +                return -1;
       +
       +        if (strcasecmp(name, "DTSTART") == 0 ||
       +            strcasecmp(name, "DTSTAMP") == 0 ||
       +            strcasecmp(name, "DTEND") == 0) {
       +                time_t t = 0;
       +                if (ical_get_time(p, value, &t) != 0)
       +                        warn("%s: %s", p->errmsg, value);
       +                printf("epoch %lld\n", t);
       +        } else {        
       +                printf("value %s\n", value);
       +        }
       +
       +        return 0;
       +}
       +
       +int
       +main(int argc, char **argv)
       +{
       +        IcalParser p = {0};
       +        arg0 = *argv++;
       +
       +        p.fn_entry_name = fn_entry_name;
       +        p.fn_block_begin = fn_block_begin;
       +        p.fn_param_value = fn_param_value;
       +        p.fn_entry_value = fn_entry_value;
       +
       +        if (*argv == NULL) {
       +                if (ical_parse(&p, stdin) < 0)
       +                        err("parsing stdin:%d: %s", p.linenum, p.errmsg);
       +        }
       +
       +        for (; *argv != NULL; argv++, argc--) {
       +                FILE *fp;
       +
       +                debug("converting \"%s\"", *argv);
       +                if ((fp = fopen(*argv, "r")) == NULL)
       +                        err("opening %s", *argv);
       +                if (ical_parse(&p, fp) < 0)
       +                        err("parsing %s:%d: %s", *argv, p.linenum, p.errmsg);
       +                fclose(fp);
       +        }
       +        return 0;
       +}
   DIR diff --git a/tcal.5 b/tcal.5
       @@ -36,17 +36,16 @@ end of line.
        .Bd -literal
        TZ+0200
        
       -2020-06-28 00:00
       -2020-06-05 00:00
       +2021-06-28 00:00
       +2021-06-05 00:00
         loc: 950-0994, Chuo Ward, Niigata, Japan
         sum: summer holidays
        
       -2020-06-29 13:30
       -2020-06-29 15:00
       - loc: online, irc.freenode.net, #bitreich-en
       +2021-06-29 13:30
       +2021-06-29 15:00
       + loc: online, irc.bitreich.org, #bitreich-en
         sum: bitreich irc invitation
         des: at this moment like all other moment, everyone invited on IRC
       -
        .Ed
        .
        .