# -*- coding: utf-8 -*-
# Copyright (c) 2013-2016 NASK. All rights reserved.
"""
.. note::
For basic information how to use the classes defined in this module
-- please consult the :ref:`data_spec_class` chapter of the tutorial.
"""
import collections
from pyramid.decorator import reify
from n6sdk.data_spec.fields import (
Field,
AnonymizedIPv4Field,
ASNField,
CCField,
DateTimeField,
DomainNameField,
DomainNameSubstringField,
EmailSimplifiedField,
ExtendedAddressField,
IBANSimplifiedField,
IntegerField,
IPv4Field,
IPv4NetField,
IPv6Field,
IPv6NetField,
ListOfDictsField,
MD5Field,
PortField,
SHA1Field,
SourceField,
UnicodeEnumField,
UnicodeLimitedField,
URLField,
URLSubstringField,
)
from n6sdk.exceptions import (
FieldValueError,
ParamKeyCleaningError,
ParamValueCleaningError,
ResultKeyCleaningError,
ResultValueCleaningError,
_KeyCleaningErrorMixin,
)
#
# Constants
#: A tuple of network incident data distribution restriction qualifiers
#: -- used in the :attr:`DataSpec.restriction` field specification.
RESTRICTION_ENUMS = (
'public', 'need-to-know', 'internal',
)
#: A tuple of network incident data confidence qualifiers
#: -- used in the :attr:`DataSpec.confidence` field specification.
CONFIDENCE_ENUMS = (
'low', 'medium', 'high',
)
#: A tuple of network incident category labels
#: -- used in the :attr:`DataSpec.category` field specification.
CATEGORY_ENUMS = (
'amplifier',
'bots',
'backdoor',
'cnc',
'dns-query',
'dos-attacker',
'dos-victim',
'flow',
'flow-anomaly',
'fraud',
'leak',
'malurl',
'malware-action',
'phish',
'proxy',
'sandbox-url',
'scanning',
'server-exploit',
'spam',
'spam-url',
'tor',
'vulnerable',
'webinject',
'other',
)
#: A tuple of network incident layer-#4-protocol labels
#: -- used in the :attr:`DataSpec.proto` field specification.
PROTO_ENUMS = (
'tcp', 'udp', 'icmp',
)
#: A tuple of network incident origin labels
#: -- used in the :attr:`DataSpec.origin` field specification.
ORIGIN_ENUMS = (
'c2',
'dropzone',
'proxy',
'p2p-crawler',
'p2p-drone',
'sinkhole',
'sandbox',
'honeypot',
'darknet',
'av',
'ids',
'waf',
)
#: A tuple of black list item status qualifiers
#: -- used in the :attr:`DataSpec.status` field specification.
STATUS_ENUMS = (
'active', 'delisted', 'expired', 'replaced',
)
#
# Auxiliary classes
[docs]class Ext(dict):
"""
An auxiliary class of a :class:`dict`-like container -- to be used
to extend field specifications in :class:`DataSpec` subclasses (for
usage examples, see the descriptions of :class:`DataSpec` and
:class:`AllSearchableDataSpec`).
"""
def __repr__(self):
return '{}({})'.format(self.__class__.__name__,
super(Ext, self).__repr__())
[docs] def copy(self):
return self.__class__(self)
[docs] def make_extended_field(self, field):
merged_init_kwargs = self.copy()
merged_init_kwargs.nondestructive_update(field._init_kwargs)
return field.__class__(**merged_init_kwargs)
[docs] def nondestructive_update(self, other):
if isinstance(other, collections.Mapping):
other = other.iteritems()
for key, value in other:
stored_value = self.setdefault(key, value)
if (stored_value is not value) and isinstance(stored_value, Ext):
if isinstance(value, Field):
self[key] = stored_value.make_extended_field(value)
elif isinstance(value, collections.Mapping):
merged_value = stored_value.copy()
merged_value.nondestructive_update(value)
self[key] = merged_value
#
# The abstract base class for any data specification classes
[docs]class BaseDataSpec(object):
"""
The base class for data specification classes.
Typically, you will not instantiate or subclass this class directly
-- instead, you may want to use :class:`DataSpec` or, more likely, a
subclass of it.
"""
def __init__(self, **kwargs):
self._all_param_fields = {}
self._required_param_fields = {}
self._single_param_fields = {}
self._all_result_fields = {}
self._required_result_fields = {}
self._set_fields()
super(BaseDataSpec, self).__init__(**kwargs)
#
# public properties
@reify
def all_keys(self):
"""
Instance property: a :class:`frozenset` of all keys.
(Includes all legal parameter names and result keys.)
"""
return self.all_param_keys | self.all_result_keys
@reify
def all_param_keys(self):
"""
Instance property: a :class:`frozenset` of all legal parameter names.
"""
return frozenset(self._all_param_fields)
@reify
def all_result_keys(self):
"""
Instance property: a :class:`frozenset` of all legal result keys.
"""
return frozenset(self._all_result_fields)
#
# public methods (possibly extendable)
#: .. note::
#: The method should **never** modify the given dictionary (or any
#: of its values). It should always return a new dictionary.
[docs] def clean_param_dict(self, params,
# optional keyword arguments:
ignored_keys=(),
forbidden_keys=(),
extra_required_keys=(),
discarded_keys=()):
keys = self._clean_keys(
params.viewkeys() - frozenset(ignored_keys),
self._all_param_fields.viewkeys() - frozenset(forbidden_keys),
self._required_param_fields.viewkeys() | frozenset(extra_required_keys),
frozenset(discarded_keys),
exc_class=ParamKeyCleaningError)
return dict(self._iter_clean_param_items(params, keys))
#: .. note::
#: The method should **never** modify the given dictionary (or any
#: of its values).
[docs] def clean_param_keys(self, params,
# optional keyword arguments:
ignored_keys=(),
forbidden_keys=(),
extra_required_keys=(),
discarded_keys=()):
return self._clean_keys(
params.viewkeys() - frozenset(ignored_keys),
self._all_param_fields.viewkeys() - frozenset(forbidden_keys),
self._required_param_fields.viewkeys() | frozenset(extra_required_keys),
frozenset(discarded_keys),
exc_class=ParamKeyCleaningError)
[docs] def param_field_specs(self, which='all', multi=True, single=True):
field_items = self._filter_by_which(which,
self._all_param_fields,
self._required_param_fields)
if not multi:
field_items &= self._single_param_fields.viewitems()
if not single:
field_items -= self._single_param_fields.viewitems()
return dict(field_items)
#: .. note::
#: The method should **never** modify the given dictionary (or any
#: of its values). It should always return a new dictionary.
[docs] def clean_result_dict(self, result,
# optional keyword arguments:
ignored_keys=(),
forbidden_keys=(),
extra_required_keys=(),
discarded_keys=()):
keys = self._clean_keys(
result.viewkeys() - frozenset(ignored_keys),
self._all_result_fields.viewkeys() - frozenset(forbidden_keys),
self._required_result_fields.viewkeys() | frozenset(extra_required_keys),
frozenset(discarded_keys),
exc_class=ResultKeyCleaningError)
return dict(self._iter_clean_result_items(result, keys))
#: .. note::
#: The method should **never** modify the given dictionary (or any
#: of its values).
[docs] def clean_result_keys(self, result,
# optional keyword arguments:
ignored_keys=(),
forbidden_keys=(),
extra_required_keys=(),
discarded_keys=()):
return self._clean_keys(
result.viewkeys() - frozenset(ignored_keys),
self._all_result_fields.viewkeys() - frozenset(forbidden_keys),
self._required_result_fields.viewkeys() | frozenset(extra_required_keys),
frozenset(discarded_keys),
exc_class=ResultKeyCleaningError)
[docs] def result_field_specs(self, which='all'):
return dict(self._filter_by_which(which,
self._all_result_fields,
self._required_result_fields))
#
# overridable/extendable methods
[docs] def get_adjusted_field(self, key, field, ext=None):
if ext is not None:
field = ext.make_extended_field(field)
return field
#
# non-public internals
def _set_fields(self):
key_to_field = {}
for key, field in self._iter_all_field_specs():
key = key.decode('ascii')
key_to_field[key] = field
if field.in_params is not None:
self._all_param_fields[key] = field
if field.in_params == 'required':
self._required_param_fields[key] = field
else:
assert field.in_params == 'optional'
if field.single_param:
self._single_param_fields[key] = field
if field.in_result is not None:
self._all_result_fields[key] = field
if field.in_result == 'required':
self._required_result_fields[key] = field
else:
assert field.in_result == 'optional'
# making all fields (including those Ext-updated)
# accessible also as instance attributes
vars(self).update(key_to_field)
def _iter_all_field_specs(self):
key_to_ext = collections.defaultdict(Ext)
seen_keys = set()
attr_containers = (self,) + self.__class__.__mro__
for ac in attr_containers:
for key, obj in vars(ac).iteritems():
if isinstance(obj, Ext):
key_to_ext[key].nondestructive_update(obj)
continue
if key in seen_keys:
continue
seen_keys.add(key)
if isinstance(obj, Field):
field_ext = key_to_ext.get(key)
field = self.get_adjusted_field(key, obj, field_ext)
yield key, field
for extra in self._iter_extra_param_specs(key, field):
yield extra
def _iter_extra_param_specs(self, key, parent_field):
for key_suffix, xfield in parent_field.extra_params.iteritems():
if xfield is None:
# field was masked ("removed") using Ext, e.g. in a subclass
continue
if not isinstance(xfield, Field):
raise TypeError('{!r} is not a {!r} instance'
.format(xfield, Field))
xkey = '{}.{}'.format(key, key_suffix)
xfield = self.get_adjusted_field(xkey, xfield)
yield xkey, xfield
# recursive yielding:
for extra in self._iter_extra_param_specs(xkey, xfield):
yield extra
@staticmethod
def _clean_keys(keys, legal_keys, required_keys, discarded_keys,
exc_class):
illegal_keys = keys - legal_keys
missing_keys = required_keys - keys
if illegal_keys or missing_keys:
assert issubclass(exc_class, _KeyCleaningErrorMixin)
raise exc_class(illegal_keys, missing_keys)
return {key.decode('ascii') for key in (keys - discarded_keys)}
def _iter_clean_param_items(self, params, keys):
error_info_seq = []
for key in keys:
assert key in self._all_param_fields
assert key in params
field = self._all_param_fields[key]
param_values = params[key]
assert param_values and type(param_values) is list
assert hasattr(field, 'single_param')
if field.single_param and len(param_values) > 1:
error_info_seq.append((
key,
param_values,
FieldValueError(public_message=(
u'Multiple values for a single-value-only field.'))
))
else:
cleaned_values = []
for value in param_values:
try:
cleaned_val = field.clean_param_value(value)
except Exception as exc:
error_info_seq.append((key, value, exc))
else:
cleaned_values.append(cleaned_val)
if cleaned_values:
yield key, cleaned_values
if error_info_seq:
raise ParamValueCleaningError(error_info_seq)
def _iter_clean_result_items(self, result, keys):
error_info_seq = []
for key in keys:
assert key in self._all_result_fields
assert key in result
field = self._all_result_fields[key]
value = result[key]
try:
yield key, field.clean_result_value(value)
except Exception as exc:
error_info_seq.append((key, value, exc))
if error_info_seq:
raise ResultValueCleaningError(error_info_seq)
@staticmethod
def _filter_by_which(which, all_fields, required_fields):
# select fields that match the `which` argument
if which == 'all':
return all_fields.viewitems()
elif which == 'required':
return required_fields.viewitems()
elif which == 'optional':
return all_fields.viewitems() - required_fields.viewitems()
else:
raise ValueError("{!r} is not one of: 'all', 'required', 'optional'"
.format(which))
#
# Concrete data specification base classes
[docs]class DataSpec(BaseDataSpec):
"""
The basic, ready-to-use, data specification class.
Typically, you will want to create a subclass of it (note that, by
default, all fields are *disabled as query parameters*, so you may
want to *enable* some of them). For example::
class MyDataSpec(DataSpec):
# enable `source` as a query parameter
source = Ext(in_params='optional')
# enable the `time.min` and `time.until` query parameters
# (leaving `time.max` still disabled)
time = Ext(
extra_params=Ext(
min=Ext(in_params='optional'),
until=Ext(in_params='optional'),
),
)
# enable `fqdn` and `fqdn.sub` as query parameters
# and add a new query parameter: `fqdn.prefix`
fqdn = Ext(
in_params='optional',
extra_params=Ext(
sub=Ext(in_params='optional'),
prefix=DomainNameSubstringField(in_params='optional'),
),
)
# completely disable the `modified` field
modified = None
# add a new field
weekday = UnicodeEnumField(
in_params='optional',
in_result='optional',
enum_values=(
'Monday', 'Tuesday', 'Wednesday', 'Thursday',
'Friday', 'Saturday', 'Sunday'),
),
)
.. seealso::
Compare this class with :class:`AllSearchableDataSpec`.
"""
#
# Fields that are always *required in results*
id = UnicodeLimitedField(
in_result='required',
max_length=64,
)
source = SourceField(
in_result='required',
)
restriction = UnicodeEnumField(
in_result='required',
enum_values=RESTRICTION_ENUMS,
)
confidence = UnicodeEnumField(
in_result='required',
enum_values=CONFIDENCE_ENUMS,
)
category = UnicodeEnumField(
in_result='required',
enum_values=CATEGORY_ENUMS,
)
time = DateTimeField(
in_params=None, # <- should be None even in subclasses
in_result='required',
extra_params=dict(
min=DateTimeField( # `time.min`
single_param=True,
),
max=DateTimeField( # `time.max`
single_param=True,
),
until=DateTimeField( # `time.until`
single_param=True,
),
),
)
#
# Fields related to `address`
# an `address` is a list of dicts -- each containing either
# `ip` or `ipv6` (but not both) and optionally some or all of:
# `asn`, `cc`, `dir`, `rdns`
address = ExtendedAddressField(
in_params=None, # <- should be None even in subclasses
in_result='optional',
)
# query params related to the components of `address` items
ip = IPv4Field(
in_result=None, # <- should be None even in subclasses
extra_params=dict(
net=IPv4NetField(), # `ip.net`
),
)
ipv6 = IPv6Field(
in_result=None, # <- should be None even in subclasses
extra_params=dict(
net=IPv6NetField(), # `ipv6.net`
),
)
asn = ASNField(
in_result=None, # <- should be None even in subclasses
)
cc = CCField(
in_result=None, # <- should be None even in subclasses
)
#
# Fields related only to black list events
active = Field(
in_params=None, # <- should be None even in subclasses
in_result=None, # <- typically will be None even in subclasses
extra_params=dict(
min=DateTimeField( # `active.min`
single_param=True,
),
max=DateTimeField( # `active.max`
single_param=True,
),
until=DateTimeField( # `active.until`
single_param=True,
),
),
)
expires = DateTimeField(
in_params=None, # <- should be None even in subclasses
in_result='optional',
)
replaces = UnicodeLimitedField(
in_result='optional',
max_length=64,
)
status = UnicodeEnumField(
in_result='optional',
enum_values=STATUS_ENUMS,
)
#
# Fields related only to aggregated (high frequency) events
count = IntegerField(
in_params=None, # <- should be None even in subclasses
in_result='optional',
min_value=0,
max_value=(2 ** 15 - 1),
)
until = DateTimeField(
in_params=None, # <- should be None even in subclasses
in_result='optional',
)
#
# Other fields
action = UnicodeLimitedField(
in_result='optional',
max_length=32,
)
adip = AnonymizedIPv4Field(
in_result='optional',
)
dip = IPv4Field(
in_result='optional',
)
dport = PortField(
in_result='optional',
)
email = EmailSimplifiedField(
in_result='optional',
)
fqdn = DomainNameField(
in_result='optional',
extra_params=dict(
sub=DomainNameSubstringField(), # `fqdn.sub`
),
)
iban = IBANSimplifiedField(
in_result='optional',
)
injects = ListOfDictsField(
in_params=None, # <- should be None even in subclasses
in_result='optional',
)
md5 = MD5Field(
in_result='optional',
)
modified = DateTimeField(
in_params=None, # <- should be None even in subclasses
in_result='optional',
extra_params=dict(
min=DateTimeField( # `modified.min`
single_param=True,
),
max=DateTimeField( # `modified.max`
single_param=True,
),
until=DateTimeField( # `modified.until`
single_param=True,
),
),
)
name = UnicodeLimitedField(
in_result='optional',
max_length=255,
)
origin = UnicodeEnumField(
in_result='optional',
enum_values=ORIGIN_ENUMS,
)
phone = UnicodeLimitedField(
in_result='optional',
max_length=20,
)
proto = UnicodeEnumField(
in_result='optional',
enum_values=PROTO_ENUMS,
)
registrar = UnicodeLimitedField(
in_result='optional',
max_length=100,
)
sha1 = SHA1Field(
in_result='optional',
)
sport = PortField(
in_result='optional',
)
target = UnicodeLimitedField(
in_result='optional',
max_length=100,
)
url = URLField(
in_result='optional',
extra_params=dict(
sub=URLSubstringField(), # `url.sub`
),
)
url_pattern = UnicodeLimitedField(
in_result='optional',
max_length=255,
disallow_empty=True,
)
username = UnicodeLimitedField(
in_result='optional',
max_length=64,
)
x509fp_sha1 = SHA1Field(
in_result='optional',
)
[docs]class AllSearchableDataSpec(DataSpec):
"""
A :class:`DataSpec` subclass with most of its fields marked as searchable.
You may want to use this class instead of :class:`DataSpec` if your
data backend makes it easy to search by various event attributes
(all relevant ones or most of them).
Typically, you will want to create your own subclass of
:class:`AllSearchableDataSpec` (especially to *disable* some
searchable parameters). For example::
class MyDataSpec(AllSearchableDataSpec):
# disable `source` as a query parameter
source = Ext(in_params=None)
# disable the `time.max` query parameter
# (leaving `time.min` and `time.until` still enabled)
time = Ext(
extra_params=Ext(
max=Ext(in_params=None),
),
)
# disable the `fqdn.sub` query parameter and, at the
# same time, add a new query parameter: `fqdn.prefix`
fqdn = Ext(
extra_params=Ext(
sub=Ext(in_params=None),
prefix=DomainNameSubstringField(in_params='optional'),
),
)
# completely disable the `modified` field (together with the
# related "extra params": `modified.min` etc.)
modified = None
# add a new field
weekday = UnicodeEnumField(
in_params='optional',
in_result='optional',
enum_values=(
'Monday', 'Tuesday', 'Wednesday', 'Thursday',
'Friday', 'Saturday', 'Sunday'),
),
)
.. seealso::
Compare this class with :class:`DataSpec`.
"""
#
# Fields that are always *required in results*
id = Ext(in_params='optional')
source = Ext(in_params='optional')
restriction = Ext(in_params='optional')
confidence = Ext(in_params='optional')
category = Ext(in_params='optional')
time = Ext(
extra_params=Ext(
min=Ext(in_params='optional'),
max=Ext(in_params='optional'),
until=Ext(in_params='optional'),
),
)
#
# Fields related to `address`
# (the `address` field from the superclass remains unchanged)
ip = Ext(
in_params='optional',
extra_params=Ext(
net=Ext(in_params='optional'),
),
)
ipv6 = Ext(
in_params='optional',
extra_params=Ext(
net=Ext(in_params='optional'),
),
)
asn = Ext(in_params='optional')
cc = Ext(in_params='optional')
#
# Fields related only to black list events
active = Ext(
extra_params=Ext(
min=Ext(in_params='optional'),
max=Ext(in_params='optional'),
until=Ext(in_params='optional'),
),
)
# (the `expires` field from the superclass remains unchanged)
replaces = Ext(in_params='optional')
status = Ext(in_params='optional')
#
# Fields related only to aggregated (high frequency) events
# (the `count` field from the superclass remains unchanged)
# (the `until` field from the superclass remains unchanged)
#
# Other fields
action = Ext(in_params='optional')
# (the `adip` field from the superclass remains unchanged)
dip = Ext(in_params='optional')
dport = Ext(in_params='optional')
email = Ext(in_params='optional')
fqdn = Ext(
in_params='optional',
extra_params=Ext(
sub=Ext(in_params='optional'),
),
)
iban = Ext(in_params='optional')
# (the `injects` field from the superclass remains unchanged)
md5 = Ext(in_params='optional')
modified = Ext(
extra_params=Ext(
min=Ext(in_params='optional'),
max=Ext(in_params='optional'),
until=Ext(in_params='optional'),
),
)
name = Ext(in_params='optional')
origin = Ext(in_params='optional')
phone = Ext(in_params='optional')
proto = Ext(in_params='optional')
registrar = Ext(in_params='optional')
sha1 = Ext(in_params='optional')
sport = Ext(in_params='optional')
target = Ext(in_params='optional')
url = Ext(
in_params='optional',
extra_params=Ext(
sub=Ext(in_params='optional'),
),
)
url_pattern = Ext(in_params='optional')
username = Ext(in_params='optional')
x509fp_sha1 = Ext(in_params='optional')