Source code for lnk.googl.stats

#!/usr/bin/env python
#! -*- coding: utf-8 -*-

"""Statistics and metrics retrieval for goo.gl."""

from __future__ import unicode_literals

import click
import ecstasy

import lnk.abstract
import lnk.beauty
import lnk.countries

from lnk.googl.command import Command

[docs]def echo(*args):
	"""
	Executes a stats command and echoes its output.

	Arguments:
		args (variadic): The arguments to pass to a
						 Stats instance's fetch() method.
	"""
	click.echo(Stats().fetch(*args))

[docs]class Stats(Command):
	"""
	Class to retrieve statistics and info for one or more goo.gl links.

	The statistics for a link include its referrers (i.e. from where the link
	was opened), the countries from where the link was opened, the browsers in
	which the link was opened, the platforms (operating systems) from which the
	link was opened and of course the number of clicks. These statistics can be
	retrieved 'since-forever', but also for specific (possibly open-ended)
	time-ranges, such as for 'the last 5 months' or 'between 4 days and 2 minute
	ago'. Additionally, these statistics can be paired with information about
	each link, retrieved from the 'info' command, thereby making the stats
	command the ultimate destination for link statistics *and* information.
	Output may, as always, be in raw format for internal use or in a pretty box.
	Multiple URLs are fully supported.

	Attributes:
		raw (bool): Whether to return the output in raw format for internal use,
					or in a pretty string-representation for outside-display.
	"""
	def __init__(self, raw=False):
		super(Stats, self).__init__('stats')
		self.raw = raw

[docs]	def fetch(self, only, hide, times, forever, limit, add_info, full, urls):
		"""
		Fetches statistics for one or more URLs.

		Arguments:
			only (tuple): A tuple of strings representing the sets to include
						  in the response ('only' these will be included).
			hide (tuple): A tuple of strings representing the sets to hide
						  from the response (either from all possible sets
						  if only is empty, or else from those selected).
			times (tuple): A tuple of tuples of the schema (<span>, <unit>),
						   representing the timespans for which to fetch
						   statistics.
			forever (bool): Whether to include the statistics 'since forever'.
			limit (int): A limit on the number of items fetched per timespan.
			add_info (bool): Whether or not to add additional information for
							 each link.
			full (bool): Whether to show full country names, or short ISO
						 abbreviations.
			urls (tuple): A tuple of urls to fetch statistics for.

		Returns:
			A plain list of the raw lines if the 'raw' attribute is True,
			else a boxified, pretty string.
		"""
		sets = lnk.abstract.filter_sets(self.sets, only, hide)
		timespans = self.get_timespans(times, forever)

		results = []
		threads = []
		for url in urls:
			self.queue.put(url)
			args = (results, sets, timespans, add_info, full, limit)
			thread = self.new_thread(self.get_stats, *args)
			threads.append(thread)
		self.join(threads)

		return results if self.raw else lnk.beauty.boxify(results)

[docs]	def get_stats(self, results, sets, timespans, add_info, full, limit):
		"""
		Retrieves the statistics for a single url.

		The statistics returned are for all timespans supplied, filtered
		according to the sets of statistics wanted.

		Arguments:
			results (list): The list of results to which to append the
							retreived data.
			sets (tuple): The sets of statistics wanted in the response
						 (others are discarded).
			timespans (tuple): A tuple of tuples of the schema (<span>, <unit>),
							   representing the timespans for which to fetch
							   statistics.
			add_info (bool): Whether to add information about each url
							 alongside statistics.
			full (bool): Whether to show full country names or short ISO
						 abbreviations.
			limit (int): A limit to the number of data-points selected for each
						 timespan.
		"""
		data = self.request(add_info)
		lines = self.lineify(data, sets, timespans, full, limit)
		with self.lock:
			results.append(lines)

[docs]	def request(self, add_info):
		"""
		Requests statistics for a given configuration.

		The url for which to retrieve statistics is fetched from the queue
		attribute (thread-safe).

		Arguments:
			add_info (bool): Whether to add information about the URL as well.

		Returns:
			The dictionary containing the retrieved data. The format of this
			dictionary is that it contains a 'URL' key, as well as all key/value
			pairs connected to information. It also contains an 'analytics' key,
			whose value are the actual statistics for the URL. These statistics
			are a dictionary where the keys are the timespans and the values
			the data for each timespan. This data is then furthermore a
			dictionary which maps the category/set names to list of data-points.
			Each data-point's key is contained in the 'id' key (e.g. the name
			of a country) and the value (always the number of clicks) is
			found with the 'count' key.

		"""
		url = self.queue.get()
		what = "get information for '{0}'".format(url)
		data = self.get(url, 'FULL', what)

		data['URL'] = url
		del data['kind']
		del data['id']
		if not add_info:
			for i in ['created', 'longUrl', 'status']:
				del data[i]

		return data

[docs]	def lineify(self, data, sets, timespans, full, limit):
		"""
		Turns the data retrieved for a link into a list of lines.

		Information about the URL is formatted and turned into header-lines
		here, the statistics are entirely parsed in the listify() method.

		Arguments:
			data (dictionary): The data (statistics) to lineify.
			sets (tuple): The names of the sets to include in the output.
			timespans (tuple): The timespans for which to include data.
			full (bool): Whether to show full country names or short
						 ISO abbreviations (e.g. 'Germany' or 'DE').
			limit (int): A limit on the number of data-points retrieved per
						 timespan.
		Returns:
			A list of lines, ready for output.
		"""
		stats = data.pop('analytics')
		statistics = self.listify(stats, sets, timespans, full, limit)
		header = [self.format(key, value) for key, value in data.items()]

		return header + statistics

[docs]	def listify(self, data, sets, timespans, full, limit):
		"""
		Formats data with multiple data-points into a pretty list.

		Each category is formatted into a header. For each timespan of a given
		category, a list is created if the category holds multiple data-points.
		Lines containing timespans and data-points are formatted into list-items
		using ecstasy.

		Arguments:
			data (dictionary): The data (statistics) to lineify.
			sets (tuple): The names of the sets to include in the output.
			timespans (tuple): The timespans for which to include data.
			full (bool): Whether to show full country names or short
						 ISO abbreviations (e.g. 'Germany' or 'DE').
			limit (int): A limit on the number of data-points retrieved per
						 timespan.
		Returns:
			A list of lines, ready for output.
		"""
		lines = []
		for display, real in sets.items():
			lines.append('{0}:'.format(display.title()))
			for timespan, categories in data.items():
				# Ignore unwanted timespans
				if timespan not in timespans:
					continue
				lines.append(self.get_header(timespan))
				# The goo.gl API does not include categories with zero
				# clicks thus we first have to determine whether the
				# category is present at all
				if not categories.get(real):
					lines[-1] += ' None'
				elif display == 'clicks':
					lines[-1] += ' {0}'.format(categories[real])
				else:
					lines += self.sub_listify(display,
											  categories[real],
											  limit,
											  full)
		return lines

[docs]	def get_header(self, timespan):
		"""
		Formats timespan headers.

		Among other things, this method takes care of the 'forever' timespan,
		whose name to the goo.gl API is 'allTime'. Before returning a header,
		it is formatted using ecstasy into a proper, pretty list-item.
		"""
		if timespan == 'allTime':
			header = 'Since forever:'
		else:
			if timespan == 'twoHours':
				timespan = 'two hours'
			header = 'Last {0}:'.format(timespan)

		return self.list_item.format(header)

[docs]	def sub_listify(self, category, points, limit, full):
		"""
		Handles transforming data for a category into a level-2 list.

		While listify handles all data and takes care of formatting timespans,
		this method handles only data for a single category. Because this data
		is on the second level of each list (0th level is the category, 1st
		is the timespan and 2nd are the data-points), it is formatted
		differently than the data on level-1 (the level-1 bullet is a '+',
		the level-2 bullet a '-'). This method also handles special cases
		for key-names and also country-name expansion.

		Arguments:
			category (str): The name of the category of this data.
			points (list): The data points for this category (the list
						   contains dictionaries with the keys 'id' and
						   'count').
			limit (int): A limit on the number of data-points retrieved per
						 timespan.
			full (bool): Whether to show full country names or short
						 ISO abbreviations (e.g. 'Germany' or 'DE').
		"""
		lines = []
		for n, point in enumerate(points):
			if n == limit:
				break
			subject = point['id']
			if subject == 'unknown':
				subject = subject.title()
			if category == 'countries' and full:
				subject = lnk.countries.names[subject]
			clicks = point['count']
			line = ecstasy.beautify('   <-> {0}: {1}', ecstasy.Color.Yellow)
			lines.append(line.format(subject, clicks))

		return lines

[docs]	def get_timespans(self, times, forever):
		"""
		Parses timespans.

		The timespans passed to the command-line interface and ultimately
		to the fetch() method are of the schema '(<span>, <unit>)'. These
		are parsed such that special cases are handled, e.g. if the 'forever'
		flag is set, the timespan 'allTime' is added for the goo.gl API.
		If no timespans were passed to the method, the default timespan is
		retrieved from the command's configuration settings.

		Arguments:
			times (tuple): The timespans of the schema '(<span>, <unit>)'.
			forever (bool): Whether to include the 'since forever' timespan.

		Returns:
			A set of timespans (string-representations thereof).
		"""
		timespans = set(times)
		if not timespans and not forever:
			default = self.settings['timespan']
			if default == 'forever':
				timespans.add('allTime')
			elif default == 'two-hours':
				timespans.add('twoHours')
			else:
				timespans.add(default)
		if forever:
			timespans.add('allTime')
		if 'two-hours' in timespans:
			timespans.remove('two-hours')
			timespans.add('twoHours')

		return timespans

	@staticmethod
[docs]	def format(key, value):
		"""
		Formats a key/value pair for output.

		Handles special cases for key-names and formats the key-value pair.

		Arguments:
			key (str): The key of the data-point.
			value (str): The value of the data-point.

		Returns:
			A pretty key-value string-representation.
		"""
		if key == 'shortUrlClicks':
			key = 'clicks'
		elif key == 'longUrl':
			key = 'expanded'

		return '{0}: {1}'.format(key.title(), value)