#!/usr/local/bin/perl
#
# Convert a generic MIME object into a nonbinary representation
#

&mknonbin (@ARGV);

sub mknonbin {
# &mknonbin ($infile, $outfile)
# Convert MIME object in $infile to 7-bit representation, store in $outfile.
    my ($infile, $outfile) = @_;
    my ($buf, $inbuf, $outbuf, $blocksize, $state);
    my (@sepstack);
    my ($n, $i, $nlsize, $eof, $eob, $more);
    my (@header, @mime_fields);
    my ($val, $present, $param_val);
    my ($type_base, @type_params);

    open (MNBIN, $infile);
    open (MNBOUT, '>'.$outfile);
    @sepstack = ();
    $blocksize = 1024;
    $state = 0; # 0 = waiting for header
                # 1 = inside non-binary part
                # 2 = inside binary part
                # 3 = just before initial newline in binary part
    $eof = 0;
    sysread (MNBIN, $buf, $blocksize);
    while (!$eof || $buf ne '') {
	print 'sepstack: '.join (', ', @sepstack).", ";
	print ("state $state; buf = ".&encode_qp (substr ($buf, 0, 20)."\n"));
	$n = length $buf;
	if (!$eof && ($more || $n < $blocksize)) {
	    $n = sysread (MNBIN, $inbuf, $blocksize);
	    print "read $n\n";
	    if ($n == 0) { $eof = 1; }
	    $buf .= $inbuf;
	}
	$more = 0;
	if ($state == 0) {
	    # try to get header
	    if ($buf =~ /^\r?\n/s) {
		$i = 0;
		$nlsize = 0;
	    } else {
		$i = index ($buf, "\n\n");
		if ($i >= 0) {
		    $nlsize = 1;
		} else {
		    $i = index ($buf, "\r\n\r\n");
		    if ($i >= 0) {
			$nlsize = 2;
		    }
		}
	    }
	    if ($i >= 0) {
		# found the header, let's process
		@header = &split_header (substr ($buf, 0, $i + $nlsize));
		$buf = substr ($buf, $i + $nlsize);
		@mime_fields = &get_mime_fields (@header);
		$state = 1; # if not binary - override later if binary
		# find out if it's a multipart
		($val, $present) = &lookup_val ('Content-Type', @header);
		if ($present) {
		    ($type_base, @type_params) = &split_mime_params ($val);
		    if ($type_base =~ /^multipart\//i) {
			($val, $present) = &get_mime_param ('boundary',
							    @type_params);
			if ($present) {
			    push (@sepstack, $val);
			    print 'sepstack: '.join (', ', @sepstack)."\n";
			}
		    }
		}
		# find out if it's binary
		($val, $present) = &lookup_val ('Content-Transfer-Encoding',
						@header);
		if ($present) {
		    ($type_base, @type_params) = &split_mime_params ($val);
		    if (lc $type_base eq 'binary') {
			$state = 3;
			@header = &replace_field
			    ('Content-Transfer-Encoding: base64'."\n",
			     @header);
		    }
		}
		print MNBOUT (join ('', @header));
	    } elsif ($eof) {
		# didn't find a header - just dump to output
		print MNBOUT $buf;
		$buf = '';
	    } else {
		$more = 1;
	    }
	} else {
	    # in body - first, check for boundary
	    if ($#sepstack < 0) {
		$eob = $eof;
		$outbuf = $buf;
		$buf = '';
	    } else {
		$n = 6 + length $sepstack[$#sepstack];
		$i = index ($buf, "\r\n".'--'.$sepstack[$#sepstack]."\r\n");
		if ($i < 0) {
		    $n = 4 + length $sepstack[$#sepstack];
		    $i = index ($buf, "\n".'--'.$sepstack[$#sepstack]."\n");
		}
		if ($i >= 0) {
		    $eob = 1;
		    if ($i == 0) {
			print MNBOUT ("\n".'--'.$sepstack[$#sepstack]."\n");
			$buf = substr ($buf, $n);
			$outbuf = '';
			$state = 0;
		    } else {
			$outbuf = substr ($buf, 0, $i);
			$buf = substr ($buf, $i);
		    }
		} else {
		    $n = 6 + length $sepstack[$#sepstack];
		    $i = index ($buf, "\r\n".'--'.$sepstack[$#sepstack].'--'
				."\r\n");
		    if ($i < 0) {
			$n = 6 + length $sepstack[$#sepstack];
			$i = index ($buf, "\n".'--'.$sepstack[$#sepstack].'--'
				    ."\n");
		    }
		    if ($i >= 0) {
			$eob = 1;
			if ($i == 0) {
			    print MNBOUT ("\n".'--'.$sepstack[$#sepstack]
					  .'--'."\n");
			    $buf = substr ($buf, $n);
			    $outbuf = '';
			    pop (@sepstack);
			    $state = 0;
			} else {
			    $outbuf = substr ($buf, 0, $i);
			    $buf = substr ($buf, $i);
			}
		    } else {
			$n = (length $buf);
			if (!$eof) { $n -= 8 + length $sepstack[$#sepstack]; }
			$outbuf = substr ($buf, 0, $n);
			$buf = substr ($buf, $n);
		    }
		}
	    }
	    if ($outbuf ne '' && $state == 1) {
		print MNBOUT $outbuf;
		$outbuf = '';
	    } elsif ($outbuf ne '' && $state == 2) {
		if ($eob || length $outbuf >= 15 * 3) {
		    print MNBOUT (&encode_base64 (substr ($outbuf, 0, 15 * 3))
				  ."\n");
		    $outbuf = substr ($outbuf, 15 * 3);
		}
	    } elsif ($outbuf ne '' && $state == 3) {
		if ($outbuf =~ /^\n/s) {
		    $outbuf = substr ($outbuf, 1);
		    print MNBOUT "\n";
		} elsif ($buf =~ /^\r\n/s) {
		    $outbuf = substr ($outbuf, 2);
		    print MNBOUT "\n";
		}
		$state = 2;
	    }
	    $buf = $outbuf.$buf;
	} # if ($state == 0)
    } # while (!($eof && length $buf == 0))
    close (MNBIN);
    close (MNBOUT);
}

sub split_header {
# @header = &split_header ($header)
# Convert header from a single string into premail dict style (i.e. one
# key: value pair per list entry).
# 
# Canonicalize line ends to LF.
    my ($header) = @_;
    my (@header);

    @header = ();
    foreach $line (split (/\r?\n/, $header)) {
	if ($line =~ /^\S/) {
	    push (@header, $line."\n");
	} elsif ($line =~ /^\s/) {
	    push (@header, pop (@header).$line."\n");
	}
    }
    return (@header);
}

# The following routines are stolen from premail

sub lookup_val {
# ($val, $present) = &lookup_val ($key, @dict)
# Look up the key in the dict
# Return ($val, 1) if found, ("", 0) if not.
    my ($key, @dict) = @_;
    my ($field_key, $field_val);

    foreach $field (@dict) {
	($field_key, $field_val) = &parse_field ($field);
	if (lc $field_key eq lc $key) {
	    return ($field_val, 1);
	}
    }
    return ("", 0);
}

sub parse_field {
# ($key, $val) = &parse_field ($key)
    if ($_[0] =~ /^([!-9\;-\177]+)\:\s*(.*)\n$/s) { # RFC 822 field
	return ($1, $2);
    } else {
	die ("premail internal error (parse_field): field is:\n$field");
    }
}

sub delete_field {
# (@new_dict) = &delete_field ($key, @dict)
    my ($key, @dict) = @_;
    my (@new_dict);

    @new_dict = ();
    foreach $field (@dict) {
	($field_key, $field_val) = &parse_field ($field);
	if (lc $field_key ne lc $key) {
	    push (@new_dict, $field);
	}
    }
    return (@new_dict);
}

sub replace_field {
# (@new_dict) = &replace_field ($new_field, @dict)
# Delete the field if it already exists, and append to the end.
    my ($field, @dict) = @_;
    my ($key, $val);

    ($key, $val) = &parse_field ($field);
    @dict = &delete_field ($key, @dict);
    push (@dict, $field);
    return (@dict);
}

sub get_mime_fields {
# (@mime_fields) = &get_mime_fields (@header)
# Get the MIME fields (not including the MIME header). No distinction is
# made between MIMEless headers containing the MIME-Version field and
# all the default MIME fields - both return the empty list.
#
# If the field has a default value, does not put it in the header.
#
# This routine could perhaps use a little work.
    my (@header) = @_;
    my (@mime_fields);
    my ($val, $present, $param_val);
    my ($type_base, @type_params);

    ($val, $present) = &lookup_val ("MIME-Version", @header);
    if (!$present) { return (); }
    @mime_fields = ();
    ($val, $present) = &lookup_val ("Content-Type", @header);
    if ($present) {
	($type_base, @type_params) = &split_mime_params ($val);
	if (lc $type_base eq 'text/plain') {
	    ($param_val, $present) = &get_mime_param ('charset', @type_params);
	    if ($present && lc $param_val ne 'us-ascii') {
		push (@mime_fields, "Content-Type: $val\n");
	    }
	} else {
	    push (@mime_fields, "Content-Type: $val\n");
	}
    }
    ($val, $present) = &lookup_val ("Content-Transfer-Encoding", @header);
    if ($present) {
	if (lc $val ne '7bit') {
	    push (@mime_fields, "Content-Transfer-Encoding: $val\n");
	}
    }
    return (@mime_fields);
}

sub split_mime_params {
# ($baseval, @mime_params) = &split_mime_params ($val)
# Split the value portion of a MIME field into the base and the
# parameters.
#
# Not quite right yet; doesn't cope with quoted semicolons.
#
# Source: definition of content in RFC 1521
    my ($val) = @_;

    return split (/\s*\;\s*/, $val);
}

sub get_mime_param {
# ($val, $present) = &get_mime_param ($attribute, @mime_params)
# Get the mime parameter if present. Removes quoting if present.
#
# Source: definition of parameter, attribute, value in RFC 1521
    my ($attribute, @mime_params) = @_;
    my ($val, $present);

    foreach $param (@mime_params) {
	if ($param =~ /^([^\000- \(\)\<\>\@\,\;\:\\\"\/\[\]\?\=]+)\s*\=(.*)$/){
	    if (lc $attribute eq lc $1) {
		$val = $2;
		$val =~ s/^\s+//;
		if ($val =~ /\"(.*)\"/) {
		    $val = $1;
		    $val =~ s/\\(.)/$1/g;
		}
		return ($val, 1);
	    }
	}
    }
    return ('', 0);
}

sub encode_base64 {
# $encoded = &encode_base64 ($raw)
# Convert raw binary string into MIME base64 encoding (RFC 1521).
    my ($raw) = @_;
    my ($encoded);

    $encoded = pack ("u", $raw);
    chop $encoded;
    $encoded =~ s/^.//;
    $encoded =~ tr/\`\!-\_/A-Za-z0-9\+\//;
    if ((length $raw) % 3 == 1) { $encoded =~ s/..$/\=\=/; }
    elsif ((length $raw) % 3 == 2) { $encoded =~ s/.$/\=/; }
    return $encoded;
}

sub encode_qp_byte {
# $encoded = &encode_qp_byte ($char)
    return '='.uc sprintf ('%02x', unpack ('C', $_[0]));
}

sub encode_qp {
# $encoded = &encode_qp ($line, $type)
# Convert text line into MIME quoted-printable encoding (RFC 1521). Result
# may be multiple lines. Argument must be one line.
# $type argument should be one of the following:
# 'sign' - quote "From ", tabs
# otherwise minimal encoding needed to conform to spec.
    my ($line, $type) = @_;
    my ($before, $after);

    chop $line;
    if ($type eq 'sign') {
	$line =~ s/([^ -\<\>-\~])/&encode_qp_byte($1)/eg;
	$line =~ s/^From /\=46rom /;
	$line =~ s/^\.$/\=2E/;
    } else {
	$line =~ s/([^\t -\<\>-\~])/&encode_qp_byte($1)/eg;
    }
    $line =~ s/([ \t])$/&encode_qp_byte($1)/e;
    $before = '';
    while (length $line > 76) {
	$after = substr ($line, 75);
	$line = substr ($line, 0, 75);
	if ($line =~ /(\=.|\=)$/) {
	    $after = substr ($line, 75 - length $1). $after;
	    $line = substr ($line, 0, 75 - length $1);
	}
	$line = $line."\=\n";
	$before .= $line;
	$line = $after;
    }
    return $before.$line."\n";
}

