!----------------------------------------------------------------------------
!
! parse_delimited -- Parse a line of delimited text into fields.
!
! For general application.
!
! Rev	Date		Notes
! 1.00	2001-jan-02	Original version.  By Dave Allured.
! 1.01	2001-jan-18	Remove all handling for EOL characters.
!			This is now assumed to be handled by caller.
!
! 2.00	2008-apr-24	Fortran 90 interface.  Add module definition.
!			This achieves proper bounds checking for arrays.
!
! 3.00	2011-mar-16	Add proper support for quoted fields; interface change.
!			Allow the main delimiter to be protected within quotes.
!			Support two consecutive quotes as an escape sequence,
!			  without terminating the quoted field.
!			Support multiple quote characters.
! 3.01	2011-mar-17	Simplify where null substrings are allowed.
!			Fix comments related to null substrings.
!
! Input, output:  See calling args below.
!
! Usage:
!
! Call with a line of text in "line".
! The individual fields are not actually broken out.
! Instead, two arrays containing the first and last
! character positions of each field are returned.
!
! Notes:
!
! End-of-line characters such as CR (carriage return) must be
! handled by the calling program.  An EOL character will be
! regarded here as part of the last field.
!
! For purposes of this parser, all lines are considered
! to end at the last non-space character, if any.
!
! Whenever BOL, delimiters, and/or EOL are adjacent,
! the result is null fields.  Null fields are indicated
! by last(n) < first(n).
!
! A blank or null line returns as a single null field.
!
! When the first or last fields are null strings, one of the
! returned field indices will be OUTSIDE of the range 1 to
! len_trim (line).
!
! The contents of first() and last() beyond n_fields
! are undefined.
!
! Range checking is not done.  The caller must ensure that
! output array dimensions are sufficient for any possible
! line of text.
!
! Quoting rules:
!
! * Field quoting is always optional, in this version.
!
! * Blanks before the leading quote, or after the trailing quote,
!   are allowed.
!
! * Within each field, all active quotes must be the same character.
!
! * Within each field, all quotes must be balanced.
!
! * A quoted field protects any included field delimiters.
!
! * Within a single quoted field, two consecutive quotes comprise
!   one complete escape sequence, and do not interrupt the field.
!
! * Within a single quoted field, multiple escape sequences are
!   allowed.
!
! * If any rule is violated, then all quotes are ignored, and the
!   field is re-parsed from the beginning to the first delimiter.
!
! Optimization:
!
! This version is optimized for mostly non-quoted fields, and no
! leading spaces.
!
! Best speedup is obtained by not including excess trailing
! blanks in the input line.
!
!----------------------------------------------------------------------------

module parse_delimited_mod
contains

subroutine parse_delimited (line, delim, valid_quotes, first, last, n_fields)
   
   implicit none
   character(*), intent (in ) :: line		! input text line to be parsed;
   						!   null string is allowed
   character(1), intent (in ) :: delim		! specified field delimiter
   character(*), intent (in ) :: valid_quotes   ! allowed quote characters;
   						!   null = no quote handling
						! e.g. ["] or ["'] or []
						! blanks not valid, are ignored

   integer,      intent (out) :: first(:)	! first char pos of each field
   integer,      intent (out) :: last(:)	! last char pos of each field
   integer,      intent (out) :: n_fields	! number of fields found
        					!   (always one or more)
! Local variables.

   character qch*1			! quote character for current field
   
   integer p				! char pointer within line
   integer j				! char offset when using index()
   integer eol				! pointer to last character in line
   integer fi				! field index into arrays
   
   logical valid			! T = valid quoting, F = rule violation

! Note:  This method always generates the correct output for null
! fields in any position, including start or end of line.
! A null field is indicated by last(n) < first(n).

   p = 1				! point to first character in line
   eol = len_trim (line)		! find last non-space char in line;
   					!   will be zero for blank or null lines
   fi = 0				! point to first field

!-----------------------------------------------------------------------
! Main field loop.  Do for each delimiter found, plus start of line...
!-----------------------------------------------------------------------

field_loop: &
   do
      fi = fi + 1			! save first char position in field
      first(fi) = p			! and start scanning here

      if (p > eol) exit field_loop	! all done: null field at end of line

! Scan past leading spaces in advance of a possible quoted field.
      
      do while (line(p:p) == ' ')	! scan past leading spaces, if any
         p = p + 1
         if (p > eol) exit field_loop	! all done: all blanks at end of line
      end do

! Now p points to the first non-blank character, NOT past end of line.

!-----------------------------------------------------------------------
! Special handling for QUOTED field starts here.
!-----------------------------------------------------------------------

      qch = line(p:p)			! get first non-blank character;
      					! this locks in the only allowed quote
                                        ! character for the rest of the field
quoted_field: &
      if (index (valid_quotes, qch) > 0) then	! if quote: start a quoted field
						! (okay for valid_quotes = null)

! Handle the main part of the QUOTED FIELD:  "xxx""xxxxx""xx" etc.
! Spaces are NOT allowed within the two-quote escape sequence.

         valid = .true.			! init flag to check for reversion

! At the start of each substring iteration, p points to the leading
! quote of the CURRENT SUBSTRING.

substring_loop: &
         do				! scan through 1 or more quoted substrs
            
            j = index (line(p+1:eol), qch)  ! find the trailing quote
            				    ! (okay for null string, p >= eol)
            
            if (j == 0) then		! if trailing quote is missing...
               valid = .false.		! quotes are unbalanced; 
               exit substring_loop	! revert to unquoted
            end if
            
            p = p + j + 1		  ! point to next char after 2nd quote
            if (p > eol) exit field_loop  ! all done: EOL after second quote
            
            if (line(p:p) /= qch) &	! done unless two consecutive quotes
               exit substring_loop	!   (escape sequence)
         
         end do substring_loop		! loop if two consecutive quotes

! Scan past trailing spaces following a quoted field.

         if (valid) then

            do while (line(p:p) == ' ')      ! scan past trailing spaces, if any
               p = p + 1
               if (p > eol) exit field_loop  ! all done: end of line following
            end do			     !  second quote plus blanks

! Check for valid delimiter at end of quoted field.

            if (line(p:p) == delim) then
               p = p + 1		! point to next char following delimiter
         				! this might be 1 char past end of line
               last(fi) = p - 2		! save last char position in field
               cycle field_loop		! go to next field; next will handle EOL
            end if

            ! *** If we get here, rule violation; field is partially unquoted.
            ! *** I.e. a quoted field with an unquoted suffix.
            ! *** Fall through and revert to unquoted.

         end if

! Main violations of quoting rules fall through here.  Revert to unquoted.

         p = first(fi)			! revert; re-scan the entire field
      					! (inefficient, but also infrequent)
      end if quoted_field

!-----------------------------------------------------------------------
! Handle a NON-QUOTED field, including reversions.
!-----------------------------------------------------------------------

! At this point, p is at or near start of field; may be past EOL.

      j = index (line(p:eol), delim)	! find the next delimiter
      					! (okay for null string, p > eol)
      
      if (j == 0) exit field_loop	! no delimiter, last field in line

      p = p + j				! point to next char following delimiter
         				! this might be 1 char past end of line
      last(fi) = p - 2			! save last char position in field
      
   end do field_loop			! go to next field; next will handle EOL

! Always terminate the last field on the line.
! This handles several special case exits from the main loop.

   last(fi) = eol			! last field always ends at EOL; will be
   					!   null field if delim. is last char.
   n_fields = fi			! also return number of fields

end subroutine parse_delimited

end module parse_delimited_mod