%%%%%%%%%%%%%%%%%%%%%%%% TSAPadj2Hmat.m %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% Created by Amy Langville (edited by Rebecca Wills) %% reading link data provided by Panayiotis Tsaparas %% available at %% http://www.cs.toronto.edu/~tsap/experiments/datasets/index.html %% Loading: Expanded Datasets (instead of Refined or Regular) function [H,numberofnodes,a] = TSAPadj2Hmat(filename); % EXAMPLE: [H,numberofnodes,a] = TSAPadj2Hmat('TSAP_abortion.txt'); % OUTPUT: % H = hyperlink matrix % numberofnodes = n (where H = n x n matrix) % a = dangling node vector, where a_i=1 if node i is dangling, 0 o.w. fid=fopen(filename,'r'); % The adjacency list data sets label the nodes from 0 up. % This function makes the adjustment so that the nodes run from 1 up. % NOTE: An entire E matrix is not added. H is sparse and contains 0 % rows for dangling nodes. dline = fgetl(fid); if (dline == -1 ) error('Empty file.') end row=0; rowindex=[]; colindex=[]; elements=[]; while length(dline) > 0 & dline ~= -1, [lstr,dline]=strtok(dline); dline=dline(1:length(dline)-3); adjlist=str2num(dline); adjlist=adjlist+1; row=row+1; rowindex(length(rowindex)+1:length(rowindex)+length(adjlist))=row; colindex(length(colindex)+1:length(colindex)+length(adjlist))=adjlist; if length(adjlist) >0 elements(length(elements)+1:length(elements)+length(adjlist))= ... (1/length(adjlist))*ones(length(adjlist),1); end dline = fgetl(fid); end H=spconvert([rowindex' colindex' elements']); % H is not necessarily a square matrix at this point. numberofnodes=row; H(numberofnodes,numberofnodes)=0; % The above step makes H square (if it isn't already). rowsum=H*ones(size(H,2),1); a=ones(numberofnodes,1)-rowsum; fclose(fid); % Done. save ('bb','H','numberofnodes','a') % creates filename.mat file containing H, numberofnodes, a % then to load data for filename.mat, do run short 'load' m-file