Twitter SAS program example

From sasCommunity
Jump to: navigation, search

Creating a report of Twitter hashtag activity

This example program is part of the online materials for Social Networking and SAS: Running PROCs on Your Facebook Friends, by Chris Hemedinger and Susan Slaughter.

The program uses the XML LIBNAME engine, FILENAME URL, SGPLOT procedure, and a simple PROC PRINT to create a report of recent Twitter activity around a specified hashtag.

Here is an example plot from a report on #analytics:

Analyticshash.png

This program can be run in SAS 9.2. Note: if you run the program with an Internet connection that requires a PROXY server (common in corporate networks), be sure to specify the PROXY= option on the FILENAME statement. The program has a comment where this should go.

/* This part needs to run just once to establish */
/* the XML map that we'll use to map the Twitter */
/* XML response into a SAS data set              */
filename twsearch temp; 
 
/** this is the XML map that will convert the RSS search feed 
   into a SAS data set **/
data _null_; 
  infile datalines truncover; 
  file twsearch; 
  input line $1000.; 
  put line; 
datalines4; 
<?xml version="1.0" encoding="windows-1252"?>
<!-- ############################################################ -->
<!-- 2008-10-03T11:35:31 -->
<!-- SAS XML Libname Engine Map -->
<!-- Generated by XML Mapper, 902000.2.1.20080911191346_v920 -->
<!-- ############################################################ -->
<SXLEMAP name="SXLEMAP" version="1.2">
    <!-- ############################################################ -->
    <TABLE name="entry">
        <TABLE-PATH syntax="XPath">/feed/entry</TABLE-PATH>
        <COLUMN name="id">
            <PATH syntax="XPath">/feed/entry/id</PATH>
            <TYPE>character</TYPE>
            <DATATYPE>string</DATATYPE>
            <LENGTH>50</LENGTH>
        </COLUMN>
        <COLUMN name="published">
            <PATH syntax="XPath">/feed/entry/published</PATH>
            <TYPE>numeric</TYPE>
            <DATATYPE>datetime</DATATYPE>
            <FORMAT width="19">IS8601DT</FORMAT>
            <INFORMAT width="19">IS8601DT</INFORMAT>
        </COLUMN>
        <COLUMN name="title">
            <PATH syntax="XPath">/feed/entry/title</PATH>
            <TYPE>character</TYPE>
            <DATATYPE>string</DATATYPE>
            <LENGTH>200</LENGTH>
        </COLUMN>
        <COLUMN name="content">
            <PATH syntax="XPath">/feed/entry/content</PATH>
            <TYPE>character</TYPE>
            <DATATYPE>string</DATATYPE>
            <LENGTH>800</LENGTH>
        </COLUMN>
        <COLUMN name="updated">
            <PATH syntax="XPath">/feed/entry/updated</PATH>
            <TYPE>numeric</TYPE>
            <DATATYPE>datetime</DATATYPE>
            <FORMAT width="19">IS8601DT</FORMAT>
            <INFORMAT width="19">IS8601DT</INFORMAT>
        </COLUMN>
        <COLUMN name="authorName">
            <PATH syntax="XPath">/feed/entry/author/name</PATH>
            <TYPE>character</TYPE>
            <DATATYPE>string</DATATYPE>
            <LENGTH>50</LENGTH>
        </COLUMN>
        <COLUMN name="authorUri">
            <PATH syntax="XPath">/feed/entry/author/uri</PATH>
            <TYPE>character</TYPE>
            <DATATYPE>string</DATATYPE>
            <LENGTH>50</LENGTH>
        </COLUMN>
    </TABLE>
</SXLEMAP>
;;;;
 
/** this macro makes it simple to get several "pages" worth of tweets **/
%macro getTweets(pages=5,hashtag=sasgf11,scale=HOURS);
   %if &scale = HOURS %then
      %do;
         %let scaleVar = hoursAgo;
      %end;
   %else %if &scale = MINUTES %then
      %do;
         %let scaleVar = minutesAgo;
      %end;
   %else %if &scale = DAYS %then
      %do;
         %let scaleVar = daysAgo;
      %end;
 
   /* create initial dataset */
   data work.feed;
   run;
 
%do pgNo=1 %to &pages;
  /* used %NRSTR() to escape the ampersands that occur in this URL query string */
  /* &hashtag and &pgNo are macro variables that are resolved at run time       */
  %let feed="http://search.twitter.com/search.atom?lang=en%nrstr(&q)=%23&hashtag.%nrstr(&page)=&pgNo";
  filename twit URL &feed
     /* if you need to specify a proxy server to get to the internet */
     /* proxy="http://your.proxy.com"  */
  ;
  /* use the XML library engine */
  libname tf XML xmlfileref=twit xmlmap=twsearch;
 
  data work.feed;
	   /* when run in SAS Enterprise Guide, SYSECHO will */
	   /* update the task status with this message       */
     sysecho "Fetching tweet page &pgNo of &pages";
     set work.feed tf.entry;
  run;
 
%end;
 
   data work.feed;
      set work.feed;
      length hoursAgo 8 minutesAgo 8 daysAgo 8;
      label hoursAgo = "Hours ago"
            minutesAgo = "Minutes ago"
            daysAgo = "Days ago";
      if published not = .;
      published=published+gmtoff();
      daysAgo = datdif(datepart(published),today(),'act/act');
      hoursAgo = int( (datetime()-published) / 3600 );
      minutesAgo = int( (datetime()-published) / 60 );
   run;
 
   title "Report of #&hashtag. hashtag activity";
   title2 "as of %TRIM(%QSYSFUNC(DATE(), NLDATE20.)) at %TRIM(%SYSFUNC(TIME(), TIMEAMPM12.))";
   ods graphics / height=500 width=800;
   proc sgplot data=work.feed;
      vbar &scaleVar;
      yaxis LABEL="Number of tweets";
      xaxis discreteorder=data;
   run;
 
   proc print data=work.feed
      obs="Row Number"
      label
   ;
      format published dateampm20.;
      var published authorname title;
   run;
 
%mend;
 
/* for high-volume topics, set the scale to MINUTES */
/* or HOURS for more interesting reports            */
 
/* example call to find #SASGF11 tweets */
%getTweets(pages=10, hashtag=sasgf11, scale=DAYS);