head	1.1;
access;
symbols;
locks
	raph:1.1; strict;
comment	@# @;


1.1
date	96.05.09.23.06.17;	author raph;	state Exp;
branches;
next	;


desc
@The utility to remove binary encodings from arbitrary MIME objects.
A separate file to play with before integrating into premail.
@


1.1
log
@Initial revision
@
text
@#!/usr/local/bin/perl
#
# Convert a generic MIME object into a nonbinary representation
#

&mknonbin (@@ARGV);

sub mknonbin {
# &mknonbin ($infile, $outfile)
# Convert MIME object in $infile to 7-bit representation, store in $outfile.
    my ($infile, $outfile) = @@_;
    my ($inbuf, $buf, $blocksize, $state);
    my (@@sepstack);
    my ($n, $eof, $i, $nlsize, $more);
    my (@@header, @@mime_fields);
    my ($val, $present, $param_val);
    my ($type_base, @@type_params);

    open (MNBIN, $infile);
    open (MNBOUT, '>'.$outfile);
    $buf = '';
    @@sepstack = ();
    $blocksize = 1024;
    $state = 0; # 0 = waiting for header
                # 1 = inside non-binary part
                # 2 = inside binary part
    $eof = 0;
    while (!($eof && length $buf == 0)) {
	if ($more && !$eof) {
	    $n = sysread (MNBIN, $inbuf, $blocksize);
	    if ($n == 0) { $eof = 1; }
	    $buf .= $inbuf;
	}
	if ($state == 0) {
	    # try to get header
	    $i = index ($buf, "\n\n");
	    if ($i >= 0) {
		$nlsize = 1;
	    } else {
		$i = index ($buf, "\r\n\r\n");
		if ($i >= 0) {
		    $nlsize = 2;
		}
	    }
	    if ($i >= 0) {
		# found the header, let's process
		@@header = &split_header (substr ($buf, 0, $i + $nlsize));
		$buf = substr ($buf, $i + 2 * $nlsize);
		@@mime_fields = &get_mime_fields (@@header);
		$state = 1; # if not binary - override later if binary
		# find out if it's binary
		($val, $present) = &lookup_val ('Content-Transfer-Encoding',
						@@header);
		if ($present) {
		    ($type_base, @@type_params) = &split_mime_params ($val);
		    if (lc $type_base eq 'binary') {
			$state = 2;
			@@header = &replace_field
			    ('Content-Transfer-Encoding: base64'."\n",
			     @@header);
		    }
		}
		print MNBOUT (join ('', @@header)."\n");
	    } elsif ($eof) {
		# didn't find a header - just dump to output
		print MNBOUT $buf;
		$buf = '';
	    } else {
		$more = 1;
	    }
	} else {
	    if ($state == 1) {
		print MNBOUT $buf;
		$buf = '';
	    } elsif ($state == 2) {
		if ($eof || length $buf >= 15 * 3) {
		    print MNBOUT (&encode_base64 (substr ($buf, 0, 15 * 3))
				  ."\n");
		    $buf = substr ($buf, 15 * 3);
		} else {
		    $more = 1;
		}
	    }
	} # if ($state == 0)
    } # while (!($eof && length $buf == 0))
    close (MNBIN);
    close (MNBOUT);
}

sub split_header {
# @@header = &split_header ($header)
# Convert header from a single string into premail dict style (i.e. one
# key: value pair per list entry).
# 
# Canonicalize line ends to LF.
    my ($header) = @@_;
    my (@@header);

    @@header = ();
    foreach $line (split (/\r?\n/, $header)) {
	if ($line =~ /^\S/) {
	    push (@@header, $line."\n");
	} elsif ($line =~ /^\s/) {
	    push (@@header, pop (@@header).$line."\n");
	}
    }
    return (@@header);
}

# The following routines are stolen from premail

sub lookup_val {
# ($val, $present) = &lookup_val ($key, @@dict)
# Look up the key in the dict
# Return ($val, 1) if found, ("", 0) if not.
    my ($key, @@dict) = @@_;
    my ($field_key, $field_val);

    foreach $field (@@dict) {
	($field_key, $field_val) = &parse_field ($field);
	if (lc $field_key eq lc $key) {
	    return ($field_val, 1);
	}
    }
    return ("", 0);
}

sub parse_field {
# ($key, $val) = &parse_field ($key)
    if ($_[0] =~ /^([!-9\;-\177]+)\:\s*(.*)\n$/s) { # RFC 822 field
	return ($1, $2);
    } else {
	&error ("premail internal error (parse_field): field is:\n$field");
    }
}

sub delete_field {
# (@@new_dict) = &delete_field ($key, @@dict)
    my ($key, @@dict) = @@_;
    my (@@new_dict);

    @@new_dict = ();
    foreach $field (@@dict) {
	($field_key, $field_val) = &parse_field ($field);
	if (lc $field_key ne lc $key) {
	    push (@@new_dict, $field);
	}
    }
    return (@@new_dict);
}

sub replace_field {
# (@@new_dict) = &replace_field ($new_field, @@dict)
# Delete the field if it already exists, and append to the end.
    my ($field, @@dict) = @@_;
    my ($key, $val);

    ($key, $val) = &parse_field ($field);
    @@dict = &delete_field ($key, @@dict);
    push (@@dict, $field);
    return (@@dict);
}

sub get_mime_fields {
# (@@mime_fields) = &get_mime_fields (@@header)
# Get the MIME fields (not including the MIME header). No distinction is
# made between MIMEless headers containing the MIME-Version field and
# all the default MIME fields - both return the empty list.
#
# If the field has a default value, does not put it in the header.
#
# This routine could perhaps use a little work.
    my (@@header) = @@_;
    my (@@mime_fields);
    my ($val, $present, $param_val);
    my ($type_base, @@type_params);

    ($val, $present) = &lookup_val ("MIME-Version", @@header);
    if (!$present) { return (); }
    @@mime_fields = ();
    ($val, $present) = &lookup_val ("Content-Type", @@header);
    if ($present) {
	($type_base, @@type_params) = &split_mime_params ($val);
	if (lc $type_base eq 'text/plain') {
	    ($param_val, $present) = &get_mime_param ('charset', @@type_params);
	    if ($present && lc $param_val ne 'us-ascii') {
		push (@@mime_fields, "Content-Type: $val\n");
	    }
	} else {
	    push (@@mime_fields, "Content-Type: $val\n");
	}
    }
    ($val, $present) = &lookup_val ("Content-Transfer-Encoding", @@header);
    if ($present) {
	if (lc $val ne '7bit') {
	    push (@@mime_fields, "Content-Transfer-Encoding: $val\n");
	}
    }
    return (@@mime_fields);
}

sub split_mime_params {
# ($baseval, @@mime_params) = &split_mime_params ($val)
# Split the value portion of a MIME field into the base and the
# parameters.
#
# Not quite right yet; doesn't cope with quoted semicolons.
#
# Source: definition of content in RFC 1521
    my ($val) = @@_;

    return split (/\s*\;\s*/, $val);
}

sub get_mime_param {
# ($val, $present) = &get_mime_param ($attribute, @@mime_params)
# Get the mime parameter if present. Removes quoting if present.
#
# Source: definition of parameter, attribute, value in RFC 1521
    my ($attribute, @@mime_params) = @@_;
    my ($val, $present);

    foreach $param (@@mime_params) {
	if ($param =~ /^([^\000- \(\)\<\>\@@\,\;\:\\\"\/\[\]\?\=]+)\s*\=(.*)$/){
	    if (lc $attribute eq lc $1) {
		$val = $2;
		$val =~ s/^\s+//;
		if ($val =~ /\"(.*)\"/) {
		    $val = $1;
		    $val =~ s/\\(.)/$1/g;
		}
		return ($val, 1);
	    }
	}
    }
    return ('', 0);
}

sub encode_base64 {
# $encoded = &encode_base64 ($raw)
# Convert raw binary string into MIME base64 encoding (RFC 1521).
    my ($raw) = @@_;
    my ($encoded);

    $encoded = pack ("u", $raw);
    chop $encoded;
    $encoded =~ s/^.//;
    $encoded =~ tr/\`\!-\_/A-Za-z0-9\+\//;
    if ((length $raw) % 3 == 1) { $encoded =~ s/..$/\=\=/; }
    elsif ((length $raw) % 3 == 2) { $encoded =~ s/.$/\=/; }
    return $encoded;
}

sub encode_qp_byte {
# $encoded = &encode_qp_byte ($char)
    return '='.uc sprintf ('%02x', unpack ('C', $_[0]));
}

sub encode_qp {
# $encoded = &encode_qp ($line, $type)
# Convert text line into MIME quoted-printable encoding (RFC 1521). Result
# may be multiple lines. Argument must be one line.
# $type argument should be one of the following:
# 'sign' - quote "From ", tabs
# otherwise minimal encoding needed to conform to spec.
    my ($line, $type) = @@_;
    my ($before, $after);

    chop $line;
    if ($type eq 'sign') {
	$line =~ s/([^ -\<\>-\~])/&encode_qp_byte($1)/eg;
	$line =~ s/^From /\=46rom /;
	$line =~ s/^\.$/\=2E/;
    } else {
	$line =~ s/([^\t -\<\>-\~])/&encode_qp_byte($1)/eg;
    }
    $line =~ s/([ \t])$/&encode_qp_byte($1)/e;
    $before = '';
    while (length $line > 76) {
	$after = substr ($line, 75);
	$line = substr ($line, 0, 75);
	if ($line =~ /(\=.|\=)$/) {
	    $after = substr ($line, 75 - length $1). $after;
	    $line = substr ($line, 0, 75 - length $1);
	}
	$line = $line."\=\n";
	$before .= $line;
	$line = $after;
    }
    return $before.$line."\n";
}

@
