Survey
* Your assessment is very important for improving the work of artificial intelligence, which forms the content of this project
* Your assessment is very important for improving the work of artificial intelligence, which forms the content of this project
Machine Learning Methods Applied to DNA Microarray
Data Can Improve the Diagnosis of Cancer
Eric Bair
Robert Tibshirani
Dept. of Statistics
Stanford University
Stanford, CA 94305-4065
Depts. of Health, Research, & Policy, and
Statistics
Stanford University
Stanford, CA 94305-4065
ebair@stanford.edu
tibs@stat.stanford.edu
ABSTRACT
!#"$%&%'"
% ')(
*+,
-'.%/
%*02130&$'4)5 %"67'.%%*8. 9' :+;<=%'=91>
/
&'.0?&@8%'"
0%ABB>+&C1*0
%D$EF(A &%
&&
(A G
H2%'.
'.
:2IKJMLONP0!6. ' QR4)%!G
'.%8S*
AS'.*%T
'.SUV&C1 '.'.%
W*0&"6 *K=AS'
3'
=XY& 97(
' Q)"65> T'.S &'.'Z[S*%*\S'.B] 9,
'.S!8(A &%)XY& ' :^#4 "6!Q6%&6. 2#<1%
*%*
'8P*0(
+$S8>&[S '2
G *0%"6&*%E[&4_A'.&"
/
%
' Q9%(#3H $"6 9%A3*7!%*%%(K9
*0'`
[
%*a4)&E11*0%P7'.S!V:)b]2 '.&%>2"6 *a19 &/
S*%*0 dc 'Z='.Se6&] 9.
'.fP3=3
'g'.S '.'Z/
,S*%*& & = *%%%*%*%8 *% "
9(A&&%)XY&& '%8
&
1% 9' :h0'19 S
'a`1>
90
*9#> Ai#1>4j/
!.[S*k9A*Y[K(AA'.%(73F. %( !:
Keywords
NP%&A. ' Q>'.Se6&+ &$.
0' Q3&*'.'.l30
1.
OVERVIEW
b\ TE13% 970'70
(AA'.B4)%T
&Qj"
%AS' *%%%
*
1&&'U]S'. WB'.'. '.'5V%'.eW
8&
'Z'.0'5
3
+%F)13% 9:h^M4 "A&Q '.1%MS&AS'
"
& '
%ml3 *0kQaAS8%*%%<]U&&%8%'.e])
0%<
%'-&C$. *F*%0Y:jRS
'j3#
11> #0%'Z%(
S%'.3*%
S3&M8%&
'. A1>87P3"68'Z0 *%*]XY& 9M!XY &'
PM13% 9:
;n5
'P*%
(@> oe$4)\35 !5%'UD(A &%]%'.
'.A:
$S' QY%K0'M A
*P>&*0% "A53# '.8XY&& 'M0U
&*0%0 *jAS A7M !D>+&C1*%m9]XY& '
%WG(A&&%]1
l*0G
=VS:OpM,
.S3&*%9Q#S9%*
& 9*9Q3ASK%*%57%&&*5A'.&"6g(
&%=7
e6&S1
H8S4j'#!C.& *7*0%Y:
%'H%'h3
(A%(Q94i "6&Q4)2j
"6&$h>IKJMLB%&A./
' :5NP0!6. '2 m'.%2S*
&AS'.*E
'.S!C1 'Z/
'.%
F*% "A *%'i
>AS'
3'`k(A 'i%7
(6%'.5:H$S' Q6&
"6#K
%*0<F8! &)%XY! &'j>&4i 7S
'
*% S*0g*0&"6 *q:0'g%'%*%*0S'Z.E0Era%(ASPsA:2pM3&K
%&
'. A1>
Q94iM1> 'hY*%1A7#
11>h>j0 9/
*q:H^M4 "A&Q(A &C1 '.'.%A+1l3*%%(8&"6
*%'j-4i
S
j<1>&')g
!S3
*%*P%'Z%&#)KA*% S*0-*% "6 *q:
%*0<20 9,7'.S!F'.S(
S1'i3
'j%1>.
9j%1*%/
SIGKDD Explorations.
ra%(ASKsAtaIMJKL\NP%&A. '` 0&$2XY& & 'h>&4i S'3PV
P& !
*%GS&+m%&A'.&A1>A:up#'Z/
%(# A$"6 9%A
*9%&
'. A10
3*'.%' Q
H*%1A7& *%*0'h%
(
S1'hLT
8v]
11> hM>` 9%
*q:`NP%&6. 'H3
*'.%'
'.4)'g32XY& 9=(A '2F
&%"6F
]%3&%"6%] '.
4iU(
S1' Qa0%0(53=&V 1&'. 92%'Z0&8%'.'.
'.S1> ' :
0
'-[
#=0(A
'.%'
P. 9-
H !:#wS11>A'.
A7'.S<1>
) &8%'2*%%e6 *]P&
'Z'.0?&F4)&
'2
/
&K'.S<1>%'#
:M=13% 9'-4)F"6=7%(AU%'.e5
&'Z
'.%'24
S*0D ]G>7.@
(A( '.'.%"6 *9QH4)&'
8
&g130&$'M AS*G>2(
0"A ]F*% '.'g0$"
'.%"6=. 9
x 8+. 9=*0*y!:U;n!F%'=P4 ]U0'Z%(AS%'.
>&<4 7&'.M'.S1> ' Q$*%*k1% 9'i4iAS*07 =>M(
%"6 7(A( '.'.%"6+. 9:P^#4 "6!Qa%'=0'=%(
*]S3 '../
*%AQ$> S'.M&S. 9H. 9'h[j
&Q$'.S!F
'i'.S(A!.
R! &
199Q"6h&C$. *M'.&"6&H'.h&XY &' : x ;<Mq&Q
'.Ai
&H130&$'h"Aj%8
'H- '.S*`> &1$9: y
;<4j S*02'.S& '.'Z[S*%*%20 9,gj1% 9'4)M%(
8%'.e
Y!
'Z
'.%'H
2kQA4j&AS*02(
%"6 zj11
10
. 9a4)%*%'.13%(K&h1% 9'R,
d`C0
S''.0
&XY !'))'.S!P. 94iAS*0P&$%*{:
%'j%'j '.'. 90
*%*+8 *0'.'.%l%
U1A*% 5:H|M%"6 +8$S8>&
[S ' x (
!C1 '.'.%A*% "A *%'y!Q
4i4)0'.=M1%&4)0
1>#
!#%'j1 '. 9)%581% 9:`N5
9F7
%M*%/
%(#19 S 'R3"6H> 8 "6&*0
1>[
%'R1>H
31A*0&5:
x w&AQ,
-&C
1*%AQR} ~3n: y
pM[.S3 *9Qh '.+&C0'Z%(]7
%+*%%(G19 S&'
j>K &*F
11*%%F=%&6.F$:HK$S8>&
#[S '7%'&C$. *D*0(AU 13 @V+S2>&2
A'.&"
%A' QRS'.%(+A'ZM7
%=*%0(P19 S 'M
q%*{:MNP "6&QYM%'#01>.
9)0 9,+F0 9%P4)0
Volume 5,Issue 2 - Page 48
(
&'=2>&'ZK1 %&
'M
HS
#1>A:MpM'.%(8&C$/
1 '.'.%AG*% "6 *`AS'3'K
iXY& 9M(A 'M77
e687/
(AA'.%'8%'g1&%
*H4)]&C%'Z%(+&!A*%
(
x 3V7 "6&#>['.%*%y!:#;<h4gG0 9%U'.7
*%*'.S'.!M`1&/
%&%"6(A ' Qa4i AS*ES'.&'.(A '=+
%'.F
9%>$% '
'.S
*%)[%2SA'Z%%(:L#*&%"6 *9Q9`7 2>)1>A'.'./
*%2P "6 *%
1E-JKL)/<
'.V(AA'Z% 'Z'KS'.0(PHi/<i
ig:
v`
PR '.M 9S ' Q$4&"6&Q
+A*%F>M
11*%0 F8
&*%"6 *]'.7*%*`$S8>&Kj(A& ' :+$S' QR%g%'g%1>
.$
0 9%74)%!P(A '-& '.'.71>!.[
'.S!P8 *0'.'.%/
l%
:
2.
PAM: A TOOL FOR CLASSIFYING TUMORS BASED ON MICROARRAY DATA
- &$. 1'=G *0
'.'.BS
'=S'.%(G%&6. B
"62S'.E'Z0'Z%*i&$'8} $H$h`
E.%l 0
*` S
*
&4i
e$'} {:=bm8 '.!0>
G
*&%"6&$53M1>&./
,
'-4 *%*a
V4)08"
%&<5
H1A*% ' :);n-0'M
*%'.F
'Z5
S3&'Z
P
+%9&1!:
YLMN0'g
'.GAD7 !09Se$4)E
'Vc< 'Z='.Se6 & 9.A0' : fubm2%*%*%S'Z.S0*%Gi%'&$G9G1/
1*%(5UP%&6. mP'.&g)'.7
*%*hASE*0S *%*
S
' x w$Mvj
)y2%*09$z} {:VE'.&F '.%'Z'
i '.S& 9'#H8(
=&C1&'.'.0
E*% "A *a
A7(A '
A
+13% 9' :r
SgXY& 9K1> 'M
iS
'-4&2&1/
&'. 9kt)v`Se$.#*1A7 x vay!QY4)%(7'&A7 x hbowy!Q
S
*0
'Z
7 x JKvy!Qa3G
YA86A'&A7 x KNUwy!:a
S
')%"$0 '.K13% 9'%$8Ag.
%%(2'. ')
P
&'Z
'.&' :pM'.%(-&S
*9&4i
e$' Q&M *0
'.'.l3gh 'Z
/
'.!"A0
'j4)]s
6 S&9:` -$&*kS'.P
2(A& ' :
%'.
9&a*q:j}sAl'ZR
3*?&K%'Rj'.&RS'.%()j ./
&'Zg 9.A0V *0
'.'.l3&: x rgP '.&%1%AG
`%'M 0$SAQ
'.&} {: yLo 'Z) &$.
0+ *0
'.'.l&-
*% S* 'jM%'Z
>!4& ]7(A%"6&V&'Z'
1*0 x 13% 9!y)7= *0
'.' 9.
!E
`8,ASg *0
'.'. ' :2 'ZK'
1*%80'&*'.'.l3E
*0
'.')[-4)%!P%'#%'Z
&0'jK'.7
*%*% 'Z:
M *0'.' 9.A0'
kMw#vj
T=#'.4)70Fra%(AS
x (53'y!:-bW V 'ZK &$.
0V&*'.'.l3&%'11*0%
U0'8Qh87
e6 '8P
*
)l"6&.
'8AmP5 'Z
'1*% ' :g%'M '.S*'.4)'M3 'ZK& 9.A0G&*'.'.l3&'
G>8'.S '.'Z,S*%*]11*0%5F%&6.U$:;<M3'M'. "9/
!
*A"A$(A 'R A1#!C%'Z%(-&$',
h *0'.'.%0(
%&A. ]$:F;]1.% S*Q=
m>2
'.%*D11*%%E
1A*0&'4)%P
K3P
*0
'.' :
^#4 "6!Q%' 09SP'Z0*%*-3'F'.&"6&
*# 4)
e' :z
&!09Sj3]&8
*q:G} `7
e6&'? &P 'Z=&.'8
E
'V]'.&:dNP
"6&Q)&'ZP &$.
0'9S '3
*%*KAAE(A '>5S'. @,
P *0
'.'.l3%A:\;<4iAS*0B>
'.
*0P "A *%A1mF *0
'.'.l3!84)](!2 S
&V3
S'. 'j[&4i&#(A ' :
3.
DESCRIPTION OF NEAREST SHRUNKEN CENTROIDS
F"6! Ag'.
. 0(A'#
h= 'ZK& 9.A05 *0
'Z/
'.l&Qa%'.
H!g
*q:+}%s H1
1>A'.EF$l30
Ei
&'Z 9.
*%(A
_e$4)'2c< 'Z'.SeA 2 &/
.
0' : f80F>&%3U 'Z'.Se6 U 9.
'M%'M
,A*%*%4)0(tHbm-
*% S*0K!7 *0'.'j 9.A07
'i4i#4iAS*0%F
&'Z#& 9.A0P *0
'.'.l&: P4ig%"$K!5& 9.A0P9
`4)%8 *0
'.'Z/n'Z
8 "$0%
=,
h!8(A A:0'(
%"6 '
(!4 %(A9-(A 'R4)A'.&C1 '.'.%A=0''Z*%A(#1/
SIGKDD Explorations.
% 9'i%-'
- *0
'.' :`&74iM11*%'.
,i '.A*0%(
8 '.S*%(+
7*%0?&E *0'.'=& 9.A0' :;ni7
*%%? 9.
@%''.7
*%*qQ7%''.&E? & x 3@ 5%'Z (6
[j- 7
%3&jR#
*% S*0%A3y!:ivH7'.
%(Q4#A$"$/
AS'.*PS KS2>&
H(A ')3#=S'.P%Pgl3*
1 %&%"6$&*{:`MM4
S*05
1>M3)%'j4AS*0P%1"6
7 S
&]
)$ *i'=4i *%*qQ`'.% 44
S*0] "6
. *% "
9-(A ' :
wS11>
'.2&7T1% 9'g
Pm(A& ' :Fbm24)%*%*i*%&=>
&
UU&C1 '.'.%AoVq\(A V
g+
\1% 9:
L#*%'.Q6'.S11>A'.&j) *0'.'. ' :H&i¡j¢- i%3% &'
)+¢P'
1*% '2%@ *0
'.'2£Y:] m7 A1>
98
)
9.
@ . '.1>A%(GGPq@(A 5%BP£9¤&*'.'%'
(A%"6&59
x sy
3¥ ¢K¦¨§ > ¢
©9ª3«
8i"A&
*%*& 9.A02 . '.1>A%(KMjq(A j%2
£9P *0
'.'-%'
x Ay
¥ ¢K¦§ ¬
&®a¯
!
x 6y
° ¢ ¦ ¢# ¥ ³ ¢#xn´ ± 3µ >¥ ´¶ y
²
4)& ´ 0'K21>9
*0 G4)%/n *0'.'='Z3G "$0%AG[
(A&.t
s
x
·
x ~9y
´· ¦
F±m ¹§¢ ¸ ®¯ § ©6ª3« ± ¥ ¢
y
s
s
x Ay
² ¢ ¦»º R¢ ±
x 89S3$< ´ ¶ %U8&A%
M%'K71>A'.%"68&A'Z9
% *%SEU1 "6 9g1>
'.'.%%*%%<m385(A 74)@U*%4
&C1&'.'.0
P*% "6&*k&AS*0F1$S =*(A ° ¢ 973
A:h;nj3'
`'
`"A*0S`,
H
*%*$(A& ' :HMi1>
'.'.0%*0<8%'a-*%& ´¶ 9S3*
M0
+
´ .¼ ' : y
H$S%A x 6y
+>K4j. U
'
x 6y
¥ ¢ ¦ >¥ 3µ ² ¢ xn´ 3µ ´¶ y ° ¢
JM4Q4g11*+'.
./n '.
*0%(28 '. &$.
0' :`&
x Ay
°9½ ¢ ¦ '.%(A x ° ¢Ay x¾ ° ¢ ¾ ±m¿y.À
Á
Á
Ã
4)&
Á
x 6y
À ¦OÂ
Ä
&.4)%'.
bmg9
'.8g
1%7
*a"
*%S=i¿Å9+&A'.'Z/n"
*%0%
: x vH
!q
S*Q
iYLKNu'.
4ji 'Z'RA)1>A'.'.%*%"
*%S&'R
3¿o(/
%(-,Ad-)i"
*%Si
`*(A 'ZH &$.
0=%g-'.&:
)
1%7
*3¿0'`!A'. >)"A*0S[i4)%j&
'.'Z/
"
*%0%
D0'.&*'.'.l30
D&.
=%'=%%%? k: yT 4g&l3MPc'.Se6&P 9.
'.f2>
x 6y
¥ ½ ¢ ¦ >¥ 3µ ² ¢ xn´ 3µ ´¶ y °9½ ¢
JM23g ° ½ ¢ ¦ +,
8
*%*`£V,
8P(
0"A ].QR m*%*`
'.Se6&+ &$.
0')?&&Q
+(A&KH9&'-
) 9.0S
2Ml3
*k *0
'.'.l3%A:
Volume 5,Issue 2 - Page 49
ÆBL
NB
RMS
Gene
0
500
1000
1500
2000
EWS
-0.5
0.0
0.5
-0.5
0.0
0.5
-0.5
0.0
0.5
-0.5
0.0
0.5
Average Expression
ra%(
S2$t-
i 9.
' x (y)
5'.Se6 5& 9.A0' x 3y[M8w$Mvj
'.&:-="6&
*%*a& 9.A053'M> U'.S.&
AM 9.A0P
5 *0'.' :
%? 9
*RS')*%
(%A'-
!C1 '.'.%AkQ>
FK
&-
M(A ')0'-..9:
J#4z'.S11>
'.K3j4i3"6gGc< 'Z#130&$f=4)5&C1 '.'.%
*% "A *%'=YÇ ¦ x kÇ¯È k·Ç ÈÉ&É ÉÈ kÊ Ç y!:bm24)%'.]+ *0
'.'.,]kÇ8+
&*'.'-4)A'.Vc'.Se6 F 9.A0f%') 'Z) Ç :`k&
Ê
x s 6y
Ë ¢ x Ç y ¦ § x Yxn´ Ç ± µ ¥´ ¶ ¢Ay y · ±m*%A(`Ì ¢
·
®¯
x ^M!AQ6ÌY¢#&1 '. 9'j1%
`1A
0*%2
Y *0'.'£>Q63`%' Q
`1A1>.0
g
*0
'.'£K%h1>A1S*0%
:a;nÌY¢0'RSe$4)kQ
#
V> 'Z%7P,
Í8$Q>
-4gU*0!KÌY¢ ¦ sÎ
,
M
*%*£Y: y&PK&*'.'.l30
US*%K%'K¡ x Ç y ¦TÏ 4)&
x s
sy
Ë&Ð x Ç y ¦ ¢ % Ë ¢ x Ç y
;nh4g4)0'.U7 'Z%7gg1A3%*%5# Ç > *%
(A'#7
(
%"6 5 *0'.' Q47 +8'.%+K,A*%*%4)0(77
&t
&C1 x ± Ë ¢ x Ç y.ÎA
y
x sAy
YÑ ¢ x Ç y ¦
Ò Ó ®a¯ &C1 x ± Ë ¢ x Ç y.Î
Ay
x %'=%'=
*%A(
AS'=P2¸ 19 SS'.VP 'Z%7 *0
'.'
1A
%*%% '`%|K
S'.'.0*%0`0'.!0%
9h
*'.%' 9'. }
,
M&%*0' : y
%'.&%%39g'. 'g% x 6yK'.%%*0FA'.2S'.G0
*%% U%'.&%0
9P
3*'.%' x I#LKy!:jkIMLÔS'. 'FGN5
/
*0A%'&.%j A1S-0'Z ->&<4 FK(
0"A 'Z
'.&"
%
U
+K&*'.'#& 9.A0' x %5"6 !
)%A3y!t
x s 6y
ËA¢ÕAÖ× x Ç y ¦ x Ç ± Y¥ ¢AyZØÙuÚ ¯ x Ç ± Y¥ ¢
ya±m*%
(iÌY¢
^#&8ÙÛ 1&'. 9')g1>9A*%P4)%/n *0'.'M"
0
Ü&"
/
g7.%CF&C1&'.'.0
U:I#Lz3')> 5'.S '.'Z/
,S*%*¤
11*%0 @EG4)0+"A%&@K1 %&%AB1
*% 'P} n:
SIGKDD Explorations.
^M4i "6&QAI#LB H> &*8
11*%0 8M(
!C1 '.'.%A
Q9'.% -j$S2>!h
k1%&
' x (A 'yh%'H2S!(!
3G2$S8>&K'1*% ' x 1% 9'y!:L-'gF&A'. $S &AQ
i7.C8ÙÝ%'&C$. *g*0(
A:h$S' Q6
9g'
1*% 'Z%7
`ÙÞ4)%*0*>'.%(AS*0Q>3F%')%$"6&'.K4)%*%*R>S3!l3 k:
JM 'ZH'.Se6&=& 9.A0'%'$S'a'.%%*0hKkIMLgQ4)8'. "9/
&*e6&V%XY! &' :;<K
'.'.S&'#3M8 "
0
27.C
Ùß%'50(AA
*q:àL#'F
\*%%&QK+4AS*0o>V%1>A'.'.%*0
1>&.,
Å& '.'.P
*% S*0%A'M4)AS#%'M'.'.S1/
%Ak:GL#*%'.QHkIMLS'. '=4_ *0
'.'2 9.A0' Q4)&'84
S'.2'.Se6 G 9.A0' :7L#G%1>
.
9g '.9S 2
i%'
q&=%'K3M&24)%*%*H>'.A8(A 'K[=4)%! ° ½ ¢ ¦ 7[
*%*R£Y:`w$SP(A& 'j4)%*%*R->KS'.+0F *0'.'.%l%
:
4. RESULTS ON THE SRBCT DATA
%'19 S84
'=
11*%%VFw#vj
»FK} {'. }s[h 1*%& '.S*' :hj"
*%Si
k¿%H9S30
x 6y
4j'KA'.&E9U11*%%( /{[A*0U&
'.'Z/<"
*%0%A:=v`
U
&
'.'Z/<"
*%0%AB&.
3]&'Z&.24!P%%%? E4) ¿ ¦ ~ É ~:-g&.M&S"6 '#g'.4)U%Vra%(AS=:)g!/
'.S*%(g'.SeA 9.
'#'.4)7%Fra%(ASM x 3'y!:
%'=$ *`1$S E? &U&
'.'Z/<"
*%0%A@&.
'2
]? &
'Z2&.
' :E;<=9Sm~AG(
' :E$S' Qh,
2%'U'.&Q
'ZR'.Se6 K& 9.A0'k1$S '
& S`1%&%A'RS'Z/
%(2&*%"6 *+,&4z(A ' :
ra%(AS8~F'.4)'#g~67(A '#M4i&=S'.P7 *0
'.'.mw$-/
vj
'A(
&!4)%=H"A*%S&%a'.Se6 g 9.
'a[
!UHK[
SK&*'.'. ' :-JMg)g(
')4)VA/n? &
1>A&$'j%U8(A%"6 + *0
'.'#g
*%
'Z#8SS
*%*7&C *%S'.%"6A:
ra%(ASF+'.4)'g 'Z%7E1A3%*%0&'8)> *%A(
%(P
Volume 5,Issue 2 - Page 50
ä
1
5
8
10
15
22
34
52
Size
81
133
206
339
598
1020
1668
2188
2308
ã
0.8
æ
ä
te te
tr
0.6
ä
æ
Error
0.4
æ
ä
å
te
0.2
te
æ
ä
å
cv æ
0.0
0
te te
æ
æ
cv
te
cv
te
te
te åcv ætr
tr
æ
tr
te te te te te te te
cv
cv
æ
æ
æ
å
tr
te te
tr
tr cv tr cv tr cv tr cv
å
â
2
á
4
6
Amount of Shrinkage Delta
ra%(
S$tg8&.
g S"6 ' x .%%(tM.!Ü( kQa&
'.'Z/<"
*%0%AtK "ÜYQR3U 'ZtMÜ*%Sy# '.S*%(F,
ç
11*0(+&'Z
'.Se6 U 9.
'#2w#vj
z$:8"
*%S2¿ ¦ ~ É ~+%0%? &'-8&
'.'Z/n"A*00%AG&.
MA:M;<#1$S 'MF'.&M
~A(A ' :
SIGKDD Explorations.
Volume 5,Issue 2 - Page 51
BL EWS NB RMS
813841
859359
207274
296448
898219
784224
796258
244618
789253
298062
461425
1409509
42558
769716
25725
44563
325182
812105
41591
810057
52076
866702
814260
43733
357031
1435862
770394
377461
1473131
295985
241412
80109
183337
233721
897788
563673
504791
212542
365826
204545
308163
21652
486110
ïtissue plasminogen activator
øquinone oxidoreductase homolog
í
íinsulin-like growth factor 2
÷insulin-like growth factor 2 (somatomedin A)
ñhomolog of mouse mesoderm specific transcript
growth factor receptor 4
öfibroblast
sarcoglycan alpha (dystrophin-associated glycoprotein)
èEST
presenilin 2 (Alzheimer disease 4)
ïtroponin T2, cardiac muscle isoforms
îmyosin MYL4
ïtroponin T1, slow skeletal muscle isoforms
õ
amidinotransferase
òL-arginine:glycine
neurofibromin 2 (mutated in neurofibromatosis type 2)
ñ
farnesyltransferase 1
êfarnesyl-diphosphate
growth associated protein 43 (GAP43)
ô
óN-cadherin (neuronal)
gene from chromosome 1q
îALL1-fused
meningioma 1 (disrupted in balanced translocation)
ëcold shock domain protein A
òneuroblastoma protein (NOE1)
ñFas-associated protein tyrosine phosphatase 1
lymphoma variant translocation protein 1
êfollicular
glycogenin 2
ð
ïtumor necrosis factor alpha-induced protein 6
MIC2 surface antigen (CD99)
Fc fragment receptor transporter, alpha chain
ëIgG
caveolin 1 (caveolae protein)
ïtransducin-like enhancer of split 2
EST
factor 1 (ets domain transcription factor)
îE74-like
major histocompatibility complex, class II, DQ alpha 1
îmajor histocompatibility complex, class II, DM alpha
í
growth factor binding protein 2
ìinsulin-like
receptor type protein tyrosine phosphatase F
éantiquitin 1
êglutathione S-transferase A4
ëcDNA DKFZp586J2118
êgrowth arrest-specific protein 1
EST
éEST
alpha 1 catenin (cadherin-associated protein)
èprofilin 2
ra%(
S=~t)="
*%S 'M ° ½ ¢ [M~6(A '#,
#4)%!VK*%'ZKA ° ½ ¢ %'MA? &[M=w$Mv
:)J#
3#=(
&'
4)5A? &'.SeA +& 9. A0')%5!5&*'.'Mg
*%A'Z-8SS3*%*%7!C&*0S'.%"6A:
SIGKDD Explorations.
Volume 5,Issue 2 - Page 52
*0
'.'h[i!213% 9:hr
hA'ZH1% 9' QA 'Z%7
1A
%*%]
)> *%A(A%(U5.S *0
'.'=4j'8'.%(Al3$*
(!=
U8 'Z%7V1
3%*%%<G
>&*0
(
0(++
9
&K *0
'.' :;nK%'g
*%'.+09&&'Z0(+F&C
%g8 'Z%7
1A
%*%% '5
gl"65 'Z+'
1*% '73+4i&G
5w#vj
' :
x '.Ml"6M'1*% ')M7e64)5= *%A7#(
1: y
J#
j3`) 'Z%721A
%*%% 'i[i '.jl3"6j'. '
'.%(
l39*%]*%4i&=3
E 'Z%7 ]1
3
%*0% 'g[8
.S=w#vj
' :
5.
DIAGNOSIS OF CANCERS WHERE NO
SUBTYPES ARE KNOWN TO EXIST
b]3"6'. &]38&'Z2'.SeA ] &$.
0'=3
'1>
/
&$0*`5>+1>4i&.[S*`9
*`[0
(AA'.%(U &&:7bW '.&"6&
*R
&K'.S<1>&'#ge4)5&C0'ZQ3%#US'.g(A !C1 '.'.%AU%,
70
P0'Z%(AS%'.5>!4& P'.S<1>&'
S'.%(2'.7*%*'.&-h(A ' :
^#4 "6!QA 'Za'.Se6 &$.
0'%')'.S1>&"$%'.g*0 %(
19 SA:;nH
*=>)
11*%%2%'. 'H4)&j'.S1> '
&&=*A$Ee$4)VP!C%'Z:8pM[
.S3 *9Q+'.S!
'.S<1>&'#"A=>& V 9l3P,
7$F1> '-
` &:#;
'.S!W '. ' QM 'Z5'.SeA T &$.
0'P +>]11*0%
S*% '.')'.A1S0"AK'.S1> '5>0 9l3 k:
N59U1> 'M !g8'.S'.1> & VF>2
*0& S*0*V&./
!A(A& S' :ùrV0'Z
AQ=XYS'.m*0(A@vH/<& *%*8*1A7
x IKRvj
Ry20'P
'Z A
m[ú
K*1A7UA(
S*%' :ÔL-11C%7 *W~A6¨
IKRvj
O13% 9'P&'.1>A3
V &19m3] "6!:G7 7
%3&=4)0*%*jS'.S3
*%*
'.S S257%'.
'.A:F} HsAs!H;<K%'> *%% "6 VM%'%'Z/
! 13&D%'2F&'.S*
#"
0%A¤
(VIvj
'2
*% S*0#*% "A *q:
vHV%'.&*%<QR&'Z'.Se6 U& 9.A0'
K>8S'.5+/
(AA'.F+1% 94)]Ivj
H:;<`4+
g'.S1> 'K
IKRvj
z4i&5e$4)B]&C%'ZF
¤A5'.S1>+4i&5
(A( '.'.%"6235=
&' Q>4= AS*0Ul 'ZM'.Se6 & 9.A02 *0
'.'.l&`K&&%i4)%2'.S1>%'h1 '.&$h%2
(
%"6 P1% 9:h^M4i "6&Q='.SF'.S<1> '3"6#> F *%*
0 9%lk:
J# "6&. *% '.' QYh4=S'.g 'ZK'.Se6 5 &$.
0'#
(A&&
4)5S'.S1>&"$0'. P*%0(&$' Q%')1A*% > '
.&
*%A:8r
!C1*%AQh}s&`3
*? G70!6. G7'.&
&A'.%'Z%(F
h8&C1 '.'.%AV*% "6 *%'M`A6~+(
&'#[
FIY/
v
T130&$' :pM'.%(+%&%*i *%S'Z&%(m} {Q&V0&/
lG<451S%"62'.S(AS1'g
jIKRvj
hQ4)0]&c.|
vH/<*%%e6)Ivj
f3]c
&%"
vH/<*%%e6jIvj
H: fg&2
F13% 9'74)o|
v`/n*%%e6UIvj
3@m*%%"6U*%A(
&
51% 9'4)U
!0"
5v`/n*%0eAIRvj
h:
L-*AS(AP%')&'.S*-0')%9.%(AS%(Q3-3')*0%FS%*%%<5'-
0
(AA'Z%79A*q:5^#0!%
*` *%S'Z&%( x 2
9G
!8<1>
M&*0S'Z&%(6y8
BA*D>P11*0%DGG*0(A5(AS1@M1/
% 9' :Frg%'K
'.
Q&*0S'Z&%(59G'. *j=>S'.
&A'Z.S&i0
(
A'Z%)9A*q:H;<kK1% 9H0'i0
(
'.4)%
IKRvj
hQ`&*0%0&28S'Z>
*%`-1>&.[d-0
(
A'.%'hA
#%3%"$0S
*k13% 9:i^M#4
KS90**0(
8(AS1
a&)1% 9'jK0
(
A'. F4)+IKRvj
G'.83j#
11*%+ *%S'Z!0(22K 9(
AS1:
J#&'Z'.Se6& 9.A0'iF"6! Aj%'iû S*%<9:L),/
!21S%"6S
='.S1> '"AF>& m0 9l3 kQ
7
11*%o&'ZU'.Se6 W& 9.A0'P¤.&1P¤0(A
'.
4)%!'.S<1>i0'h1 '. 9H0
%3%"0S3
*$1% 9:abm`4AS*0
A1>2K2'.S"%"
*a% 'K8130&$'KXY&g>&<4 1%&+'.S<1>&' :
SIGKDD Explorations.
bm 'Z P%'#0A5gIvj
@
-}s&{:#&g4&
A713% 9' QY
H4)%!E9s=4i&2&*'.'.l3G
'K"%(+&%"A
v`/n*%%e67IRvj
W3¤s+4&F *0
'.'.l@
'=3"$0(V|
uv`/n*0%e6
Ivj
H:b]H3
*g%"$0M '.
-1% 9'R%9Mj.
%/
%('.!#s813% 9'-
P8&'Z)'.&-
s 13% 9' :b]Ml-
'.Se6& 9.A07 *0
'.'.l3&jggs.
%0(81% 9' :H
&
'.'Z/<"
*%0%A@&.
84j'2%%%? V4) B¿ ¦ s É A$:
%'+A1%7*#$ *MS'.W6](
' :u;nF1$S Wm&
'.'Z/
"
*%0%
U&.
'#
U8 'Z#&.
' :-NPg01>.
9*9Q4j/
"6!Q9130&$'H%j1%&7|
@v`/n*%%e6)IRvj
5&*'.'*%%"6
'.%(A%l
9*+*0
(
&)3
+130&$')%+K1 %&5&%"A
v`/n*%%e6KIvj
] *0
'.' x '. ra%(
Sg6y!:`$S' Q 'Z-'.SeA 9.
'+ \>VS'. ¤m&*01\0
(
A'.G &+ "6&T=
&-'.S1> ')e$4)+!C%'Z:
6. CONCLUSIONS
IKJMLü0!6. '5"AVE1> 90
*KD "6
*0S%A%? G
4j U4i0(A
'.
5. &&:;V&K7[S*%*%VS%*0%? &%g1> 90
*qQa4i "6&Q48S'Zg "6 *%
1G9
*0'g53*%? &5:hJM 'ZM'.Se6 F 9.A0'-%'-21>4&.,S*9
*,
#&C$/
.
!0(S'.&,S*k%[7%A5AÅ%&6. 5:iv`+0 /
,%(g#(A 'i3)# & '.'.78XY& 90#>&4i XY&&$<1> 'g
) &Qa%=m&*01ES'Pl3E 30&'
[h
%'.%($%>$0&'a[
H%2S
'Z
%%(:NP "6&QAAS!/
'.S*'M%'.8=1>A'.'.%0*%Gi& %(+0
(
'Z08 'Z'M
'.
AU-JKLz&C1 '.'.%AU*% "A *%' Q>1>&31'-S'.%(Hi/<`
i=:3ra0
*%*9Q
47"A'.4)m3=7&$A*%A(]%'='Z0*%*S'.&[S*`,
&'Z/
%7%(+'.S"$%"
*h
) !21% 9'4) E5'.S1> '
3"6j>& 0 9l3 k:hNPM S-0
(
'. 'i&*01&*0%/
% 0
'8V(A%"67!@13% 98+11A10F&
199Qa4)0
4)%*%*H%&
'.823 &'M3K=130&$M4)0*%*h'.S"$%"6AQ3
&*01=K'.131% 9,
»'.0j!XY &'
YS '.'.
. 9' :
7. ACKNOWLEDGMENTS
05v
74j'[SD9B
BJwr»|#AS3P- '.\r */
*%4)'.%1:H-A>&.0'.
>4j
'j1.0
*%*F'.S11>
.9JM;<^
(
9gM$s2
LKA6QR
3UJKwr@(
9MIgNUw$/<AA$s&~669:=w&"$/
&*a
hKl(AS 'M=&1$S +AÔ
U*%0!.% *%8$
-A>&.-%'.
k3-4
')1S*%%'.F%UHJMLKwV}%s {:
8. REFERENCES
}s!=L#'.uL=:L#*%%?A QFNP%3 *7v#:gh%'. Qg:g%¤I"$%' Q
iN5$Q-;? 0GwY:A'.'.
' QKL-3$
'7-A'. 94j*0kQ#ýA /
[&]
#:KviA*0%e>Q8^Mþ &mw>&Q=kSDQ=ÿM%iSkQ
ýAA;:)a4 *%*qQk%
( (QK|M!
*0zj:#N5.qQK
NP9
AQý6
&'^MS'.AkQ$ý
:%Q%'.&PkSQ$I"$0Fv-:&4)%' Q
#
>&.D%'.
qQP|K"$0_w$&*%9eYQ2bW%(
#:8
i3
kQ
09
#:
|# %&QIK %'I2:!bm %'. $S(A!QAýA
'a8:
L-
(
AQ#
(A&bDe6AQ-A
*0g "99Q bT
\bW%*/
'.AQNP%3 *>=:$|# "6&QýA
P
#:$v`$kQ$IK"$0vi'Z %Q
.%!e+8:v`4)kQ33FAS%'-NE:>w9
S3$Q
!#"%$'&(!)+*,- %./0 .1"2 3- )4&(56879*
$
:$ 3 (&
QJMS;<5 = x
A 6y!Q>
? >9sAs
:
} 8NP03 *>v#:6H%'. kQ$
S*>M:$w$1> *%*%7
Q6.%e8:Av`4)Q
3I"$vi
'Z %kQA@B C
,$D"E"9 E
F"E G HI#"E &( .1*IJB K87$
G"E,$ Qh9 &%(
'E
JK%
3
*3L#
=2w$ % &'LM x s
A6y!QYs&~6A
?> s ~6
A$:
} =M: g:|MA*%SkQPI2:8:2w*%A%5Q5a:8a
7 9QU
#:8^MSYQ
NE:`|KA
'.&>& e>Qiý:H:hNP '.">Qi^8:h
i
*%*0!Q)NE:HH:AkQ
Volume 5,Issue 2 - Page 53
Training Data
Probability
1.0
BL
•••••••••
••••••
•
•
0.8
EWS
••• ••••• •••
• •
•
NB
••
•
0.0
••
•
0.4
0.2
•
•
0.6
••••
RMS
•••• • •••••• •••
•
•
•
•
• •
•
•
• •••
•
•
•
• •
• • • • • •• • • •• •
•
•• •
•
•
•
•
•
•
•
•
•
•
•
•
•
•
•
•
•• • •• • • • • • • • • • • •• •• •• • • • • •• • • • • • • • • • • • • • • • • • •• •• • •• •• •• • • •• • • • • • • • • • •• ••
0
10
20
30
40
50
60
Sample
Test Data
BL
Probability
1.0
•
EWS
• • •
• •
NB
•
0.8
0.6
•
O
0.4
0.2
0.0
•
•
O
•
•
RMS
• • •
•
•
O
•
•
• •
•
O
•
•
O
••
•
•
•
••
•
•
•
• • • • ••
•
•
•
•
•
••
•
•• •• •
•
• •
•
• •
•
•
• • • • •
• •
•
• • • • • • • •
5
10
15
20
25
Sample
ra%(
S+$t8h'Z07E&*'.'81A
%*%% '=,
=7w$Mv
O$:5w
1*0&'g713.%%
]9U.S *0
'.' x S11>!!yK
3G1%&
&*'.' x *04i&!y!:HL#*%*>
#Ag.
%%(='
1*% 'j# *0
'.'.l3 + . &*9Q
')-M
g 'Zj'
1*% 'i3jMe$4)78>Mw#vj
' :
ra%"6-
Y 'Z'1*% '`#
&S
*%*jw$Mvj
' 9e684)7 *%A:hJ#
j3` 'Z%7 *0
'.'i1A
%*%% '
g'.0(
l3 9*P*%4&)
+1
3
%*0% '-
a
&j 'Z-130&$')%P 5 *0
'.' :
SIGKDD Explorations.
Volume 5,Issue 2 - Page 54
1.0
0.6
0.4
0.2
Survival Probability
0.8
low−risk patients
high−risk patients
0.0
p=0.046
0
20
40
60
80
Survival in Months
ra%(
SFtwS"%"
*h S"6 'g
j10!]'.S>&*'.'. '2jIvj
\:5ra'ZQ%&!0 * *%S'Z&%(54j'211*0%GV
s /n13% 9c<.0%(='.&: f]i4i20'Z%&'.S> *0
'.'. 'jRIvj
U4&-0 9l3Y:h% 9'%7A#'.S> *0
'.'A'.%(A%l
9*7>&.&
'.S"$0"
*h3E130&$'g%]
&='.S> *0
'.' :5M&2 '.'.S> *0
'.'. 'g4! 9l3YQP 'Z='.SeA E& 9.A0]$&*
4
'gl=S'.%(P '.5s P.
%%(5130&$' :+pM'.%(P%'g$ *qQ m130&$g%DTs/n'
1*%@c<&'Z8'.!f54j
'= *0
'.'.lD
'g !
c<*%4j/{%'.e$f7
Fc0(
/{%'.e>: f]Uc<%(A/n%'.e$f1% 9')3AF'.%(Al
9*P1>9&-'.S"$%"A*q:
ý:=:IK4)%(QYNE:3L=:Y
*%0(
%SqQ
#:3I2:vi*%9Al *0kQ3
3
j:w>:a
3&OQ N P8#"%$+#"E
3Q
"E "E$SR4+#"E
$!"E +#"E
-$8 ' A)U&(5V87$8
A. *
$E(T &
QYw&0& XWYZ x sA
6y!Q>
s>A:
} ~gL=:RI2:a|M
Q@B#"E
3"% Q`
i
17
[^K*%*{QHv`9
AkQ3L=QYs
A$:
#
} g "A
-^K'Z0
Q3-A>&.#%'.%qQ3IK"$05vi'Z %Q3
3
.%e vH4)k]
Q \?$ 0"%$ (&
87$8
$
Qk|M Av`0
*%A(
A W_T ^`a x
T sy!Qas >s$: } g "A
F^K'Z0
Q)#
>&.+0'.
qQM3¤ýA&AVr0 /
7
kQcb_0XP. d I"E
'
"9eP
"%$C&(R "EI"V.VC&Ef
$
A"E $
' Qw1%(A&.I/ gH&*0
(Q#JM!h
4 `
e>Q
iJ 8Q
A$sA: } ;(
0 ^M [
*%e>QçI"$0 IKS(
(6
kj
Q K0
(Û
i&Q
NP%!
*TM
7
!QNP%!
*Wv`%.&Q#%3úw/
AkQ`
S*-NP&*%?&&Qjv..B|MS'Z!'.AQjN5
*)H'Z&*0*%&Q
N5eG#XY *0klQ k>A3
e$%qQRL#gvi /nIK
QR`$4j
IK
S(A&.9Q`ýAS3VAA&QiSe
'2viS>&3.Q`bW%*%0
r *%AQAw$&[
0-H.*0S(6$Q$wl>-|#S"$>&(
&QJM%e$*0
'k
/
7
kQM'.e
)ýA
3'.'.
Q!
^ m eAFM*%'.'.AQvi&
þ.%FbW%*/
[
3YQ|MS02wS&QM*%*%/a:9g*%*0%
% qQL#e6#vi
(Q
3
ýA&X>!àk 9Qonpq*d87$
r$ 3PsHr0$8 I"E$
)$
8"E
V
"E5$Qj+JM&4H(A*0
Bý
AS
*)
KNP0&0
=;; x A$sy!QkA? >$~A:
} gý6"6
kQ9ýAS7wY:
bm qQ$N5e$S'H#%(
Q &t Q9R
^=:$wA
*qQ
N5TR
9qQFr
ezb] 'Z&7
Qr
e»vi&.A*0kQ
N5
,Bw$!94jQH
`%'Z%3Ug:aL-9A&'. SQH
'Z Ba&/
&'.
Q#
\
S*wY:NP *? &Q:@B#"E
3Q
"E u"E o '"S&9*
SIGKDD Explorations.
'/$8 ' "E$UCHC&v&(5187$
4$ *
3x (&v"E "%$ 3 '"9 5$"95qJ $8w%QJMSMNP 0&0 x
A$sy!Q36
S$> A
:
} gJMA/n^M$(
e$0k¼ '@k1A7z
i*'.'.l30
ÍhþZ !QAy
+ '
"95 "9 "E H0z,$5"E 5"9q %./0 .1"{
&E$ ?K+#"ET 3Q
"E *0 S &?w%q | 1 %./0 .1 " QRv`*09$ YL x s
6Ay!Q3A
A?9> A$s:
}sg#
>&.¤0'.
qQG "6T^M
'Z%AQVv*0
'.S
70
JK'.%3
kQ#
3o|M%*%>&.U
iSkQz'"S&E .V 5P
"%5$v1)60C$Cw}o$ &(q87$8
Q
h9 &%(
'+K+JK0
3*ML-A 8@=w % ':LL
x
AAAy!Q36
AS9> 6A
$:
}sAs!=ýAS*%%NE:5Hg A'.
Q@c$$
i"}$ "}0V H0{.1"E"S&(.
*0 S &?w%q| : E.i0 .v
" QHw$ %3'%@M A*%
(
oWM
x sA
6y!Q~6AS9> ~AsA:
Volume 5,Issue 2 - Page 55