diff --git a/functions/external/costruzioneMatrice.m b/functions/external/costruzioneMatrice.m new file mode 100644 index 0000000..517d243 --- /dev/null +++ b/functions/external/costruzioneMatrice.m @@ -0,0 +1,19 @@ +function A=costruzioneMatrice(G,p) +% c = out-degree, r = in-degree +[n,n] = size(G); +c = sum(G,1); +r = sum(G,2); + +% Scale column sums to be 1 (or 0 where there are no out links). + +k = find(c~=0); +D = sparse(k,k,1./c(k),n,n); + +% Solve (I - p*G*D)*x = e + +e = ones(n,1); +I = speye(n,n); + +%Conventional power method + z = ((1-p)*(c~=0) + (c==0))/n; + A = p*G*D + e*z; \ No newline at end of file diff --git a/functions/external/page_rank_start.m b/functions/external/page_rank_start.m new file mode 100644 index 0000000..f9b264e --- /dev/null +++ b/functions/external/page_rank_start.m @@ -0,0 +1,28 @@ +load unisa100.mat +% si possono generare mediante +%[U,G] = surfer('http://www.xgiovio.com',100); +p = 0.85; +A=costruzioneMatrice(G,p) + +toll=1e-5; +x=power_method(A,toll); + +% Grafico a barre del page rank. + +shg +bar(x) +title('Page Rank') + +% Stampa degli URLs in ordine di page rank. + +[ignore,q] = sort(-x); +disp(' page-rank url') +k = 1; +n=length(x) +while (k <= n) & (x(q(k)) >= .005) + j = q(k); + disp(sprintf(' %3.0f %8.4f %s', ... + j,x(j),U{j})) + k = k+1; +end + diff --git a/functions/external/surfer.m b/functions/external/surfer.m new file mode 100644 index 0000000..1f7cabb --- /dev/null +++ b/functions/external/surfer.m @@ -0,0 +1,145 @@ +function [U,G] = surfer(root,n) +% SURFER Create the adjacency graph of a portion of the Web. +% [U,G] = surfer(root,n) starts at the URL root and follows +% Web links until it forms an adjacency graph with n nodes. +% U = a cell array of n strings, the URLs of the nodes. +% G = an n-by-n sparse matrix with G(i,j)=1 if node j is linked to node i. +% +% Example: [U,G] = surfer('http://www.harvard.edu',500); +% See also PAGERANK. +% +% This function currently has two defects. (1) The algorithm for +% finding links is naive. We just look for the string 'http:'. +% (2) An attempt to read from a URL that is accessible, but very slow, +% might take an unacceptably long time to complete. In some cases, +% it may be necessary to have the operating system terminate MATLAB. +% Key words from such URLs can be added to the skip list in surfer.m. + +% Initialize + +clf +shg +set(gcf,'doublebuffer','on') +axis([0 n 0 n]) +axis square +axis ij +box on +set(gca,'position',[.12 .20 .78 .78]) +uicontrol('style','frame','units','normal','position',[.01 .09 .98 .07]); +uicontrol('style','frame','units','normal','position',[.01 .01 .98 .07]); +t1 = uicontrol('style','text','units','normal','position',[.02 .10 .94 .04], ... + 'horiz','left'); +t2 = uicontrol('style','text','units','normal','position',[.02 .02 .94 .04], ... + 'horiz','left'); +slow = uicontrol('style','toggle','units','normal', ... + 'position',[.01 .24 .07 .05],'string','slow','value',0); +quit = uicontrol('style','toggle','units','normal', ... + 'position',[.01 .17 .07 .05],'string','quit','value',0); + +U = cell(n,1); +hash = zeros(n,1); +G = logical(sparse(n,n)); +m = 1; +U{m} = root; +hash(m) = hashfun(root); + +j = 1; +while j < n & get(quit,'value') == 0 + + % Try to open a page. + + try + set(t1,'string',sprintf('%5d %s',j,U{j})) + set(t2,'string',''); + drawnow + page = urlread(U{j}); + catch + set(t1,'string',sprintf('fail: %5d %s',j,U{j})) + drawnow + j = j+1; + continue + end + if get(slow,'value') + pause(.25) + end + + % Follow the links from the open page. + + for f = findstr('http:',page); + + % A link starts with 'http:' and ends with the next quote. + + e = min([findstr('"',page(f:end)) findstr('''',page(f:end))]); + if isempty(e), continue, end + url = deblank(page(f:f+e-2)); + url(url<' ') = '!'; % Nonprintable characters + if url(end) == '/', url(end) = []; end + + % Look for links that should be skipped. + + skips = {'.gif','.jpg','.jpeg','.pdf','.css','.asp','.mwc','.ram', ... + '.cgi','lmscadsi','cybernet','w3.org','google','yahoo', ... + 'scripts','netscape','shockwave','webex','fansonly'}; + skip = any(url=='!') | any(url=='?'); + k = 0; + while ~skip & (k < length(skips)) + k = k+1; + skip = ~isempty(findstr(url,skips{k})); + end + if skip + if isempty(findstr(url,'.gif')) & isempty(findstr(url,'.jpg')) + set(t2,'string',sprintf('skip: %s',url)) + drawnow + if get(slow,'value') + pause(.25) + end + end + continue + end + + % Check if page is already in url list. + + i = 0; + for k = find(hash(1:m) == hashfun(url))'; + if isequal(U{k},url) + i = k; + break + end + end + + % Add a new url to the graph there if are fewer than n. + + if (i == 0) & (m < n) + m = m+1; + U{m} = url; + hash(m) = hashfun(url); + i = m; + end + + % Add a new link. + + if i > 0 + G(i,j) = 1; + set(t2,'string',sprintf('%5d %s',i,url)) + line(j,i,'marker','.','markersize',6) + drawnow + if get(slow,'value') + pause(.25) + end + end + end + + j = j+1; +end +delete(t1) +delete(t2) +delete(slow) +set(quit,'string','close','callback','close(gcf)','value',0) + + + +%------------------------ + +function h = hashfun(url) +% Almost unique numeric hash code for pages already visited. +h = length(url) + 1024*sum(url); diff --git a/functions/external/unisa100.mat b/functions/external/unisa100.mat new file mode 100644 index 0000000..692df24 Binary files /dev/null and b/functions/external/unisa100.mat differ diff --git a/functions/power_method.m b/functions/power_method.m new file mode 100644 index 0000000..6dc8d1c --- /dev/null +++ b/functions/power_method.m @@ -0,0 +1,24 @@ + + +function [v,a] = power_method (U,toll) + +[x,~] = size(U); +for i=1:x + v(i)=1; +end +v=v'; + +err = inf; +while err > toll + + v1=U*v; + v1=v1./norm(v1,inf); + err = norm(v1-v,inf); + a = v1./v; + v=v1; + +end + + + +end