From 60e284f24ed6e7355e821c2faa434cd8c57d4248 Mon Sep 17 00:00:00 2001 From: Giovanni Di Grezia Date: Wed, 10 Dec 2014 13:00:02 +0100 Subject: [PATCH] power method e pagerank come funzioni esterne --- functions/external/costruzioneMatrice.m | 19 ++++ functions/external/page_rank_start.m | 28 +++++ functions/external/surfer.m | 145 ++++++++++++++++++++++++ functions/external/unisa100.mat | Bin 0 -> 3411 bytes functions/power_method.m | 24 ++++ 5 files changed, 216 insertions(+) create mode 100644 functions/external/costruzioneMatrice.m create mode 100644 functions/external/page_rank_start.m create mode 100644 functions/external/surfer.m create mode 100644 functions/external/unisa100.mat create mode 100644 functions/power_method.m diff --git a/functions/external/costruzioneMatrice.m b/functions/external/costruzioneMatrice.m new file mode 100644 index 0000000..517d243 --- /dev/null +++ b/functions/external/costruzioneMatrice.m @@ -0,0 +1,19 @@ +function A=costruzioneMatrice(G,p) +% c = out-degree, r = in-degree +[n,n] = size(G); +c = sum(G,1); +r = sum(G,2); + +% Scale column sums to be 1 (or 0 where there are no out links). + +k = find(c~=0); +D = sparse(k,k,1./c(k),n,n); + +% Solve (I - p*G*D)*x = e + +e = ones(n,1); +I = speye(n,n); + +%Conventional power method + z = ((1-p)*(c~=0) + (c==0))/n; + A = p*G*D + e*z; \ No newline at end of file diff --git a/functions/external/page_rank_start.m b/functions/external/page_rank_start.m new file mode 100644 index 0000000..f9b264e --- /dev/null +++ b/functions/external/page_rank_start.m @@ -0,0 +1,28 @@ +load unisa100.mat +% si possono generare mediante +%[U,G] = surfer('http://www.xgiovio.com',100); +p = 0.85; +A=costruzioneMatrice(G,p) + +toll=1e-5; +x=power_method(A,toll); + +% Grafico a barre del page rank. + +shg +bar(x) +title('Page Rank') + +% Stampa degli URLs in ordine di page rank. + +[ignore,q] = sort(-x); +disp(' page-rank url') +k = 1; +n=length(x) +while (k <= n) & (x(q(k)) >= .005) + j = q(k); + disp(sprintf(' %3.0f %8.4f %s', ... + j,x(j),U{j})) + k = k+1; +end + diff --git a/functions/external/surfer.m b/functions/external/surfer.m new file mode 100644 index 0000000..1f7cabb --- /dev/null +++ b/functions/external/surfer.m @@ -0,0 +1,145 @@ +function [U,G] = surfer(root,n) +% SURFER Create the adjacency graph of a portion of the Web. +% [U,G] = surfer(root,n) starts at the URL root and follows +% Web links until it forms an adjacency graph with n nodes. +% U = a cell array of n strings, the URLs of the nodes. +% G = an n-by-n sparse matrix with G(i,j)=1 if node j is linked to node i. +% +% Example: [U,G] = surfer('http://www.harvard.edu',500); +% See also PAGERANK. +% +% This function currently has two defects. (1) The algorithm for +% finding links is naive. We just look for the string 'http:'. +% (2) An attempt to read from a URL that is accessible, but very slow, +% might take an unacceptably long time to complete. In some cases, +% it may be necessary to have the operating system terminate MATLAB. +% Key words from such URLs can be added to the skip list in surfer.m. + +% Initialize + +clf +shg +set(gcf,'doublebuffer','on') +axis([0 n 0 n]) +axis square +axis ij +box on +set(gca,'position',[.12 .20 .78 .78]) +uicontrol('style','frame','units','normal','position',[.01 .09 .98 .07]); +uicontrol('style','frame','units','normal','position',[.01 .01 .98 .07]); +t1 = uicontrol('style','text','units','normal','position',[.02 .10 .94 .04], ... + 'horiz','left'); +t2 = uicontrol('style','text','units','normal','position',[.02 .02 .94 .04], ... + 'horiz','left'); +slow = uicontrol('style','toggle','units','normal', ... + 'position',[.01 .24 .07 .05],'string','slow','value',0); +quit = uicontrol('style','toggle','units','normal', ... + 'position',[.01 .17 .07 .05],'string','quit','value',0); + +U = cell(n,1); +hash = zeros(n,1); +G = logical(sparse(n,n)); +m = 1; +U{m} = root; +hash(m) = hashfun(root); + +j = 1; +while j < n & get(quit,'value') == 0 + + % Try to open a page. + + try + set(t1,'string',sprintf('%5d %s',j,U{j})) + set(t2,'string',''); + drawnow + page = urlread(U{j}); + catch + set(t1,'string',sprintf('fail: %5d %s',j,U{j})) + drawnow + j = j+1; + continue + end + if get(slow,'value') + pause(.25) + end + + % Follow the links from the open page. + + for f = findstr('http:',page); + + % A link starts with 'http:' and ends with the next quote. + + e = min([findstr('"',page(f:end)) findstr('''',page(f:end))]); + if isempty(e), continue, end + url = deblank(page(f:f+e-2)); + url(url<' ') = '!'; % Nonprintable characters + if url(end) == '/', url(end) = []; end + + % Look for links that should be skipped. + + skips = {'.gif','.jpg','.jpeg','.pdf','.css','.asp','.mwc','.ram', ... + '.cgi','lmscadsi','cybernet','w3.org','google','yahoo', ... + 'scripts','netscape','shockwave','webex','fansonly'}; + skip = any(url=='!') | any(url=='?'); + k = 0; + while ~skip & (k < length(skips)) + k = k+1; + skip = ~isempty(findstr(url,skips{k})); + end + if skip + if isempty(findstr(url,'.gif')) & isempty(findstr(url,'.jpg')) + set(t2,'string',sprintf('skip: %s',url)) + drawnow + if get(slow,'value') + pause(.25) + end + end + continue + end + + % Check if page is already in url list. + + i = 0; + for k = find(hash(1:m) == hashfun(url))'; + if isequal(U{k},url) + i = k; + break + end + end + + % Add a new url to the graph there if are fewer than n. + + if (i == 0) & (m < n) + m = m+1; + U{m} = url; + hash(m) = hashfun(url); + i = m; + end + + % Add a new link. + + if i > 0 + G(i,j) = 1; + set(t2,'string',sprintf('%5d %s',i,url)) + line(j,i,'marker','.','markersize',6) + drawnow + if get(slow,'value') + pause(.25) + end + end + end + + j = j+1; +end +delete(t1) +delete(t2) +delete(slow) +set(quit,'string','close','callback','close(gcf)','value',0) + + + +%------------------------ + +function h = hashfun(url) +% Almost unique numeric hash code for pages already visited. +h = length(url) + 1024*sum(url); diff --git a/functions/external/unisa100.mat b/functions/external/unisa100.mat new file mode 100644 index 0000000000000000000000000000000000000000..692df24ad521fbfd345398c773c7f59660086c8d GIT binary patch literal 3411 zcmb7``9Bkm+!TTcd{|JfaqynfY_QlY4`;`@KuN0eBc$~ z7xFL|3b}d3`5Igua^<0~SA_39NC*-NaSA{|?0h01Iu{|@22dR%=*5c=oeSE!kpFkM zxUboYaB*=x;^*Q*7p=KaLr%efkv9>yEG+Uq+dYEBL--z1GI$g`E6+^421Uo8sW6@2 zn5kL^MUnb8N_cj;<(QLwY)KYC4YeP;e{jx-YGm}s0sK8V3s$h zC1e|?MsB{oMU=z$SsLz|XG$cMp!3~5lx8pTa%_^#av!dwy;kE{zHhHRE6uxUZQcUh zB8=Jy5~uAi3OrF^?|_fgcsbX(*;5kJ22of(>I$!cDtR3|jpRlu@9*Brkwj~yr@Ea| zBmZasQ{sgRx@NLN4dO6XFJ*-w+c&^lk*4GB zYeM0t?%_phvWJV;MvEMOZvBRi*x?Sg{}~oe10=SAah?2{vq#F2H^G{2&IQA_`8V&E zI4@iiI7#}GR|lzBSraC=j=zm{_ZKuQ8%{l1DJP!_oOzQv{DZgMEi3=3vc{DSLEYPW z$s(gqLLs-}k&70G2dbz&-T6x`;jr=u$6?K;%xs|zuAfceLqmGduF^2Wr| z^q%tnwo}F$4Nk$USih#8F&kE~dyRt>P4H1#^pQfpwuo1p2aCuXRXB%q?tcjo5b_(P zYg^R-9k}Qyd7VLI_J>W&n7AK?^?~1 zKSo~}DW}zHVr`}Lce}KcyUvt01L$MEv|aFgvs3r^nv>douV2P^_I-$?bLHSQiYOtwP> zrT3t}%``7dKfqNixI-FFr!LgnIw+=*9=NZ9rrph30Q9taT`#rsCo}#ak#HsBNr`c5gF1L`p{q>y0l6%4SZ#>?ewEL?&5wMTH>jeu z;Lo$eliTs%FKsZ6$(u;fo6z~zy5qs`ElC{W5-O-P&VHDKFnUU@QvWstD=W^Q*Ec-&^p=yjxRvHij0w>+I|7-DVmKbkf zF<(ESdc5qi@LK96QNX#z?hS`ptm^2EOl*>=wTEKJ1mSJULcK8MmWbG8``qpWKX!ra zu&O8bI8ZXzC{bzYM8VvN$xtA^)pIWWgiW=lj8LMdv7FH1_?TxWBcc=k431y1QlT6+ zXfb@_p!2Pk=CRXucop{x^{!P%IFwzIa(kmHfLgx_x%>N7sZH!a zDN`fP`XuY?uc!}xbuK=w^&SzEae?3C8l7A8Vn>VY)*iG@4_i+sN zunUjH^m-3Q3*fwKZD(;C{~mWOCji#&@K1txp8TWuJK{Lj@nQ+kDTnPB?=p#ohvIMI zLTy>u@Vk|aY zI-3e>3=@LbDyK$2TMseY5~C;!GoMgjUPQ?a0f=FE^=3tip*P%0Y#yo9yyWk>eIlbp zh*vLJ>UJ1zb1t^pP+sQui(iS4ZL+`QUVf|8Q{dgOk**@5STME@@-SOKN^oeDub+^= zbTSZoU-nu|YYfkCY!^B>)u`ex-5RI8S>@r?Wi#irD6di$lxJ8Ae_i zx@A{+M4cBqi9}Kec9snrV8Z~+0#RRqrdgiu>t7}-BI=~kwz6IdUzK(ezRh|vc2$DY zE+BS4rn)lEI19`ScyrBroekHL@Zj}n??26wvl|NI?5?_}A@JaWI4QCEskJjL6;A%G|1@-LzMXOj_Dsy-}LFm`KCQ{deTKgHVYt&(2Yh zPS>XB2QC!6^VHNsy|+s43IjJ9Es3Iqn%6EXG&1d#!F&HlS&-ptVN11Y`KD0vMkq{64%aL^X$SyEnT0i3Us}x^;fgnp$ zRhPrQ5AG?L+ij(BcAqI4NybYrJKC4)9oHxtR+69Xx%Jh&EN?!(Uw!GFmF((RytOVt zQX{}h`p4KKcmzS(WW`GUP&?jS$V);|!b;Rf0wxq@;|X?rW_|2p=wkVWG^1DAT8Dm- zLxKX$i7}PB`dZ%n19!+3rB}vSl=qUIz_<#jCM=dd?en_YpWplPHavQ%ukL~I(_WJkns8?{)<9as0} z|G+!qG`bG|{rabAB-;S7&q&7l3lHRVsJ1NvVufV;!1~pR?XD@lN!kgwGS5^5c-xC) zcL5$Oa+xPM-03~PMo8upwDmcubYvkdDA$dMHJ22VQT)6h4|zgCCuaW8yziPcAlY+C z)c$dgZ%~32+D%TjdXvb4-<~-~(AQL{Q n-6s)ND%<-3=qWA>+3j)^M>)brNTwGPqvV{KucS4-f literal 0 HcmV?d00001 diff --git a/functions/power_method.m b/functions/power_method.m new file mode 100644 index 0000000..6dc8d1c --- /dev/null +++ b/functions/power_method.m @@ -0,0 +1,24 @@ + + +function [v,a] = power_method (U,toll) + +[x,~] = size(U); +for i=1:x + v(i)=1; +end +v=v'; + +err = inf; +while err > toll + + v1=U*v; + v1=v1./norm(v1,inf); + err = norm(v1-v,inf); + a = v1./v; + v=v1; + +end + + + +end