r2179 - trunk/perl

bdubbs at linuxfromscratch.org bdubbs at linuxfromscratch.org
Thu Feb 25 18:13:00 PST 2010


Author: bdubbs
Date: 2010-02-25 19:13:00 -0700 (Thu, 25 Feb 2010)
New Revision: 2179

Added:
   trunk/perl/perl-5.10.1-utf8-1.patch
Log:
Bug fix for invalid utf-8 characters causing Perl to crash

Added: trunk/perl/perl-5.10.1-utf8-1.patch
===================================================================
--- trunk/perl/perl-5.10.1-utf8-1.patch	                        (rev 0)
+++ trunk/perl/perl-5.10.1-utf8-1.patch	2010-02-26 02:13:00 UTC (rev 2179)
@@ -0,0 +1,167 @@
+Submitted By: Robert Connolly <robert at linuxfromscratch dot org> (ashes)
+Date: 2010-02-24
+Initial Package Version: 5.10.1
+Upstream Status: From upstream
+Origin: Git
+http://perl5.git.perl.org/perl.git/patch/0abd0d78a73da1c4d13b1c700526b7e5d03b32d4
+Description: Bug fix for invalid utf-8 characters causing Perl to crash.
+
+http://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2009-3626
+
+From 0abd0d78a73da1c4d13b1c700526b7e5d03b32d4 Mon Sep 17 00:00:00 2001
+From: Yves Orton <demerphq at gmail.com>
+Date: Sun, 25 Oct 2009 20:37:08 +0100
+Subject: [PATCH] disable non-unicode case insensitive trie matching
+
+Also revert 8902bb05b18c9858efa90229ca1ee42b17277554 as it merely
+masked one symptom of the deeper problems.
+
+Also fixes RT #69973, which was a segfault which was exposed by
+8902bb05, see the ticket for further details.
+
+http://rt.perl.org/rt3//Public/Bug/Display.html?id=69973
+
+At the code of this is the problem that in unicode matching a bunch
+of code points have case folding rules beyond just A-Z/a-z. Since
+the case folding rules are decided at runtime by the string, we cant
+use the same TRIE tables for both unicode/non-unicode matching.
+
+Until this is reconciled or some other solution is found case insensitive
+matching only gets the TRIE optimisation when the pattern is uniocde.
+
+From CaseFolding.txt:
+
+00B5; C; 03BC; # MICRO SIGN
+00C0; C; 00E0; # LATIN CAPITAL LETTER A WITH GRAVE
+00C1; C; 00E1; # LATIN CAPITAL LETTER A WITH ACUTE
+00C2; C; 00E2; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX
+00C3; C; 00E3; # LATIN CAPITAL LETTER A WITH TILDE
+00C4; C; 00E4; # LATIN CAPITAL LETTER A WITH DIAERESIS
+00C5; C; 00E5; # LATIN CAPITAL LETTER A WITH RING ABOVE
+00C6; C; 00E6; # LATIN CAPITAL LETTER AE
+00C7; C; 00E7; # LATIN CAPITAL LETTER C WITH CEDILLA
+00C8; C; 00E8; # LATIN CAPITAL LETTER E WITH GRAVE
+00C9; C; 00E9; # LATIN CAPITAL LETTER E WITH ACUTE
+00CA; C; 00EA; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX
+00CB; C; 00EB; # LATIN CAPITAL LETTER E WITH DIAERESIS
+00CC; C; 00EC; # LATIN CAPITAL LETTER I WITH GRAVE
+00CD; C; 00ED; # LATIN CAPITAL LETTER I WITH ACUTE
+00CE; C; 00EE; # LATIN CAPITAL LETTER I WITH CIRCUMFLEX
+00CF; C; 00EF; # LATIN CAPITAL LETTER I WITH DIAERESIS
+00D0; C; 00F0; # LATIN CAPITAL LETTER ETH
+00D1; C; 00F1; # LATIN CAPITAL LETTER N WITH TILDE
+00D2; C; 00F2; # LATIN CAPITAL LETTER O WITH GRAVE
+00D3; C; 00F3; # LATIN CAPITAL LETTER O WITH ACUTE
+00D4; C; 00F4; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX
+00D5; C; 00F5; # LATIN CAPITAL LETTER O WITH TILDE
+00D6; C; 00F6; # LATIN CAPITAL LETTER O WITH DIAERESIS
+00D8; C; 00F8; # LATIN CAPITAL LETTER O WITH STROKE
+00D9; C; 00F9; # LATIN CAPITAL LETTER U WITH GRAVE
+00DA; C; 00FA; # LATIN CAPITAL LETTER U WITH ACUTE
+00DB; C; 00FB; # LATIN CAPITAL LETTER U WITH CIRCUMFLEX
+00DC; C; 00FC; # LATIN CAPITAL LETTER U WITH DIAERESIS
+00DD; C; 00FD; # LATIN CAPITAL LETTER Y WITH ACUTE
+00DE; C; 00FE; # LATIN CAPITAL LETTER THORN
+00DF; F; 0073 0073; # LATIN SMALL LETTER SHARP S
+---
+ ext/re/t/regop.t |   12 ++++++------
+ regcomp.c        |   17 +++++++++++------
+ regexec.c        |    9 ++-------
+ 3 files changed, 19 insertions(+), 19 deletions(-)
+
+diff --git a/ext/re/t/regop.t b/ext/re/t/regop.t
+index 9118bf6..46e6ec0 100644
+--- a/ext/re/t/regop.t
++++ b/ext/re/t/regop.t
+@@ -231,12 +231,12 @@ anchored "ABC" at 0
+ #Freeing REx: "(\\.COM|\\.EXE|\\.BAT|\\.CMD|\\.VBS|\\.VBE|\\.JS|\\.JSE|\\."......
+ %MATCHED%
+ floating ""$ at 3..4 (checking floating)
+-1:1[1] 3:2[1] 5:2[64] 45:83[1] 47:84[1] 48:85[0]
+-stclass EXACTF <.> minlen 3
+-Found floating substr ""$ at offset 30...
+-Does not contradict STCLASS...
+-Guessed: match at offset 26
+-Matching stclass EXACTF <.> against ".exe"
++#1:1[1] 3:2[1] 5:2[64] 45:83[1] 47:84[1] 48:85[0]
++#stclass EXACTF <.> minlen 3
++#Found floating substr ""$ at offset 30...
++#Does not contradict STCLASS...
++#Guessed: match at offset 26
++#Matching stclass EXACTF <.> against ".exe"
+ ---
+ #Compiling REx "[q]"
+ #size 12 nodes Got 100 bytes for offset annotations.
+diff --git a/regcomp.c b/regcomp.c
+index 6e9fa26..eb5f12f 100644
+--- a/regcomp.c
++++ b/regcomp.c
+@@ -2833,13 +2833,18 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
+                                 }
+                             } else {
+ /* 
+-    Currently we assume that the trie can handle unicode and ascii
+-    matches fold cased matches. If this proves true then the following
+-    define will prevent tries in this situation. 
+-    
+-    #define TRIE_TYPE_IS_SAFE (UTF || optype==EXACT)
+-*/
++    Currently we do not believe that the trie logic can
++    handle case insensitive matching properly when the
++    pattern is not unicode (thus forcing unicode semantics).
++
++    If/when this is fixed the following define can be swapped
++    in below to fully enable trie logic.
++
+ #define TRIE_TYPE_IS_SAFE 1
++
++*/
++#define TRIE_TYPE_IS_SAFE (UTF || optype==EXACT)
++
+                                 if ( last && TRIE_TYPE_IS_SAFE ) {
+                                     make_trie( pRExC_state, 
+                                             startbranch, first, cur, tail, count, 
+diff --git a/regexec.c b/regexec.c
+index 402ede3..ec09c28 100644
+--- a/regexec.c
++++ b/regexec.c
+@@ -1105,16 +1105,15 @@ Perl_re_intuit_start(pTHX_ REGEXP * const rx, SV *sv, char *strpos,
+ 
+ #define REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc, uscan, len,  \
+ uvc, charid, foldlen, foldbuf, uniflags) STMT_START {                       \
+-    UV uvc_unfolded = 0;						    \
+     switch (trie_type) {                                                    \
+     case trie_utf8_fold:                                                    \
+ 	if ( foldlen>0 ) {                                                  \
+-	    uvc_unfolded = uvc = utf8n_to_uvuni( uscan, UTF8_MAXLEN, &len, uniflags ); \
++	    uvc = utf8n_to_uvuni( uscan, UTF8_MAXLEN, &len, uniflags ); \
+ 	    foldlen -= len;                                                 \
+ 	    uscan += len;                                                   \
+ 	    len=0;                                                          \
+ 	} else {                                                            \
+-	    uvc_unfolded = uvc = utf8n_to_uvuni( (U8*)uc, UTF8_MAXLEN, &len, uniflags ); \
++	    uvc = utf8n_to_uvuni( (U8*)uc, UTF8_MAXLEN, &len, uniflags ); \
+ 	    uvc = to_uni_fold( uvc, foldbuf, &foldlen );                    \
+ 	    foldlen -= UNISKIP( uvc );                                      \
+ 	    uscan = foldbuf + UNISKIP( uvc );                               \
+@@ -1140,7 +1139,6 @@ uvc, charid, foldlen, foldbuf, uniflags) STMT_START {                       \
+ 	uvc = (UV)*uc;                                                      \
+ 	len = 1;                                                            \
+     }                                                                       \
+-									    \
+     if (uvc < 256) {                                                        \
+ 	charid = trie->charmap[ uvc ];                                      \
+     }                                                                       \
+@@ -1153,9 +1151,6 @@ uvc, charid, foldlen, foldbuf, uniflags) STMT_START {                       \
+ 		charid = (U16)SvIV(*svpp);                                  \
+ 	}                                                                   \
+     }                                                                       \
+-    if (!charid && trie_type == trie_utf8_fold && !UTF) {		    \
+-	charid = trie->charmap[uvc_unfolded];			    	    \
+-    }								    	    \
+ } STMT_END
+ 
+ #define REXEC_FBC_EXACTISH_CHECK(CoNd)                 \
+-- 
+1.6.5.2.74.g610f9.dirty
+




More information about the patches mailing list