Ensures normalization on waitfe/fc-xml operations

[anna.git] / include / anna / diameter / codec / Message.hpp
diff --git a/include/anna/diameter/codec/Message.hpp b/include/anna/diameter/codec/Message.hpp

index 133c4af..2ec338b 100644 (file)
--- a/include/anna/diameter/codec/Message.hpp
+++ b/include/anna/diameter/codec/Message.hpp
@@ -163,18 +163,40 @@ public:
  
    /**
    * Default constructor
+  * @param engine Codec engine used
    */
-  Message();
+  Message(Engine *engine = NULL);
  
    /**
    * Identified constructor
    * @param id Command identifier as pair (code,request-indicator).
+  * @param engine Codec engine used
    */
-  Message(CommandId id);
+  Message(CommandId id, Engine *engine = NULL);
  
  
-  /** Sets the codec engine */
-  void setEngine(Engine *engine) throw() { a_engine = engine; }
+  /**
+  * Sets the codec engine.
+  *
+  * Once assigned (here or at constructor), this method SHALL NOT be used anymore.
+  * Also, the associated dictionary SHOULD NOT BE CHANGED through the engine,
+  * unless you know what are you doing.
+  * Setting a new different engine with different stack, even same engine where the
+  * stack has been dynamically changed, could cause a bad behaviour depending on the
+  * changes: in general, if the dictionary grows, nothing bad will happen, but if
+  * you remove or modified some elements which were processed with a certain format,
+  * will be interpreted as 'unknown' with the new dictionary, and then some problems
+  * may occur. If you add elements (vendors, avps, messages) is not a problem.
+  *
+  * IMPORTANT NOTES:
+  * 1) if you want to reuse the message, as a recommendation, you should #clear the
+  * message. In that way, next operation will adjust automatically the needed engine.
+  * 2) if you have dedicated message objects for each interface (application id), then
+  * you could set the corresponding engine on constructor (or setEngine), and forget
+  * about #clear. The needed cleanup will be done automatically from decoding and xml
+  * loading procedures, and initialized engine will be kept along message operations.
+  */
+  void setEngine(Engine *engine) throw();
  
  
    // Length references
@@ -201,7 +223,8 @@ public:
    /**
    * Destructor
    */
-  ~Message();
+  virtual ~Message();
+
    // Virtual destructors are useful when you can delete an instance of a derived class through a pointer to base class:
    // This destructor is not virtual, then a pointer to base class (even pointing to a children one) will invoke this destructor, not the derived one.
    // My current solution: virtualizing method 'clear'
@@ -215,9 +238,8 @@ public:
       Sets the command identifier and clear the former content.
  
       @param id Command identifier as pair (code,request-indicator).
-     @param _clear Message will be cleared when updating the command identifier (default behaviour).
    */
-  void setId(CommandId id, bool _clear = true) throw(anna::RuntimeException);
+  void setId(CommandId id) throw(anna::RuntimeException);
  
    /**
       Same as #setId but providing dictionary logical name for Avp searched
@@ -242,24 +264,26 @@ public:
    /**
       Sets/unsets E bit activation.
       Application should not have to use this because dictionary information is used in order to configure flags when Message identifier is stored.
+     This flag MUST NOT be set in request messages (in this case, it will be ignored).
  
       @param activate Activates/deactivates the bit. True by default.
    */
-  void setErrorBit(bool activate = true) throw() { if(activate) a_flags |= EBitMask; else a_flags &= (~EBitMask); }
+  void setErrorBit(bool activate = true) throw() { if(isRequest()) return; if(activate) a_flags |= EBitMask; else a_flags &= (~EBitMask); }
  
    /**
       Sets/unsets T bit activation.
       Application should not have to use this because dictionary information is used in order to configure flags when Message identifier is stored.
+     This flag MUST NOT be set in answer messages (in this case, it will be ignored).
  
       @param activate Activates/deactivates the bit. True by default.
    */
-  void setPotentiallyReTransmittedMessageBit(bool activate = true) throw() { if(activate) a_flags |= TBitMask; else a_flags &= (~TBitMask); }
+  void setPotentiallyReTransmittedMessageBit(bool activate = true) throw() { if(isAnswer()) return; if(activate) a_flags |= TBitMask; else a_flags &= (~TBitMask); }
  
    /**
       Sets the message application id.
  
       The codec engine could be configured to force a stack selection based in this field value: see #selectStackWithApplicationId.
-     In multistack applications (which also shall be monothreaded), you only have to take care about how to apply this method: the thing
+     In multistack applications (in case of being monothread), you only have to take care about how to apply this method: the thing
       is that you must not interleave message builds which belongs to different stacks. For example, you could think about setting the
       message header for message A using stack A. Then, start to add the message header fields for a second message B using another stack B.
       Following you would add the message A avps, but then, the stack is not going to be automatically changed (this is only done through this
@@ -270,7 +294,7 @@ public:
  
       @param aid Application-id.
    */
-  void setApplicationId(U32 aid) throw();
+  void setApplicationId(U32 aid) throw(anna::RuntimeException);
  
    /**
       Sets the message hop-by-hop
@@ -293,10 +317,12 @@ public:
  
       @warning Request provided must be a request, in other case method do nothing.
    */
-  void setHeaderToAnswer(const Message &request) throw() {
+  void setHeaderToAnswer(const Message &request) throw(anna::RuntimeException) {
      if(!request.getId().second) return;
  
-    setId(CommandId(request.getId().first, !request.getId().second), false /* don't clear */);
+    a_engine = request.getEngine(); // we know this will be
+
+    setId(CommandId(request.getId().first, !request.getId().second));
      setVersion(request.getVersion());
      setApplicationId(request.getApplicationId());
      setHopByHop(request.getHopByHop()); // The same Hop-by-Hop Identifier in the request is used in the answer (RFC 6733 Section 6.2).
@@ -371,7 +397,7 @@ public:
  
     @warning Request provided must be a request, in other case method do nothing.
    */
-  void setStandardToAnswer(const Message &request, const std::string &originHost, const std::string &originRealm, int resultCode = helpers::base::AVPVALUES__Result_Code::DIAMETER_SUCCESS) throw();
+  void setStandardToAnswer(const Message &request, const std::string &originHost, const std::string &originRealm, int resultCode = helpers::base::AVPVALUES__Result_Code::DIAMETER_SUCCESS) throw(anna::RuntimeException);
  
  
    /**
@@ -439,12 +465,15 @@ public:
  
    /**
       Adds an avp child providing a persistent pointer (must be maintained by application).
+     It is not allowed to add an avp with no codec engine configured, neither if the engine
+     is not the same.
  
       @param avp Avp external pointer. If NULL provided, nothing is done and NULL returned.
+     Also NULL returned for bad engine configuration.
  
       @return Pointer to the added avp (again).
    */
-  Avp * addAvp(Avp * avp) throw() { if(!avp) return NULL; addChild(avp); return avp; }
+  Avp * addAvp(Avp * avp) throw();
  
  
    /**
@@ -467,17 +496,24 @@ public:
  
    /**
    * Clears and initializes Message class information.
-  * Application must clear auxiliary message objects before adding Avps in a new context.
+  * Application must clear auxiliary message objects before adding Avps in a new context if the same object is reused.
    * Application don't need to clear a message object before decode operation (decode invokes #clear before any other task).
    * Any reimplementation must first invoke base class method.
+  *
+  * @param resetEngine Sets to NULL the codec engine (true, default) or respect its current value (false). If you are going
+  * to reuse the message instance it is better to clear all the information (default) to manage different stacks, because if
+  * you don't initialize the engine to NULL, the second use of the message will keep the same engine deduced from the first
+  * decoding/loading operation, which could be wrong if the second message belongs to a different application identifier.
    */
-  virtual void clear() throw(anna::RuntimeException);
+  virtual void clear(bool resetEngine = true) throw(anna::RuntimeException);
  
    /**
       Decodes buffer provided over class content. If an error ocurred, decoding will stop launching exception (fatal error) or a warning trace (perhaps the achieved
       message is valid against all odds then validation will go on). In case that validation is enabled (codec::Engine::ValidationMode) an exception will be launched
       in a moment which depends on validation depth (codec::Engine::ValidationDepth).
  
+     You could decode multiple times over the same object. A basic cleanup is done respecting the codec engine.
+
       @param db buffer data block processed. Before decoding, the whole message instance will be cleared (no need to invoke #clear before #decode).
       @param ptrAnswer Answer set by application (could be empty or not), who is responsible for its memory reservation,
       and automatically built regarding standard. If message analyzed realizes to be an answer, internal reference becomes
@@ -502,87 +538,34 @@ public:
    */
    bool valid(Message *ptrAnswer = NULL) const throw(anna::RuntimeException);
  
-
    /**
       Interpret xml data in order to dump over the class content.
-     \param messageNode Message root node
+     You could apply this multiple times over the same object. A basic cleanup is done respecting the codec engine.
+     \param messageNode Message root node obtained from @functions::xmlFileTo
    */
    void fromXML(const anna::xml::Node* messageNode) throw(anna::RuntimeException);
  
    /**
-     Interpret xml string representation in order to dump over the class content.
-     DTD validation is used in the same way that #loadXML does.
-     \param xmlString XML string representation with relevant information for this instance
-  */
-  void fromXMLString(const std::string &xmlString) throw(anna::RuntimeException);
+   * Interpret a xml file in order to create a diameter message
+   * You could apply this multiple times over the same object. A basic cleanup is done respecting the codec engine.
+   *
+   * @see functions::messageXmlDocumentFromXmlFile
+   * @see fromXML
+   *
+   * @param xmlPathFile Complete path file to the xml document which represents the diameter message
+   */
+  void loadXMLFile(const std::string &xmlPathFile) throw(anna::RuntimeException);
  
    /**
-     Loads an xml file based on this message DTD (could be accumulative, no initialization will be performed by this method).
-
-     <pre>
-     <!ELEMENT message (avp*)>
-     <!ELEMENT avp (avp*)>
-
-     <!ATTLIST message version CDATA #IMPLIED name CDATA #IMPLIED code CDATA #IMPLIED flags CDATA #IMPLIED application-id CDATA #REQUIRED hop-by-hop-id CDATA #IMPLIED end-by-end-id CDATA #IMPLIED>
-     <!--
-        version: Diameter version. Sets '1' by default
-        name:    Command name within working stack (dictionary identifier)
-
-        In order to get more coding capabilities, command code and flags could be established instead of former command name,
-         but neither of them are allowed if 'name' is provided (and vice versa):
-
-        code:    Command code
-        flags:   Command flags byte value (0-255) where standard bit set for flags is 'RPET rrrr': (R)equest, (P)roxiable, (E)rror, Potentially re-(T)ransmitted message and (r)eserved
-
-
-        application-id:   Message application id
-        hop-by-hop-id:    Message hop by hop id. Sets '0' by default
-        end-by-end-id:    Message end by end id. Sets '0' by default
-     -->
-
-     <!ATTLIST avp name CDATA #IMPLIED code CDATA #IMPLIED vendor-code CDATA #IMPLIED flags CDATA #IMPLIED data CDATA #IMPLIED hex-data CDATA #IMPLIED>
-     <!--
-        name:   Avp name within working stack (dictionary identifier)
-
-        In order to get more coding capabilities, avp code, vendor-id and flags could be established instead of former avp name,
-         but neither of them are allowed if 'name' is provided (and vice versa):
-
-        code:          Avp code
-        vendor-code:   Avp vendor code
-        flags:         Avp flags byte value (0-255) where standard bit set for flags is 'VMPr rrrr': (V)endor-specific, (M)andatory, end to end encry(P)tion and r(eserved)
-
-
-        data:          Natural string representation for avp data. Specially applicable with numbers and printable strings, but also
-                        useful for certain formats which could be easily understandable in such friendly/smart representation. We will
-                        achieve different human-readable strings depending on data format:
-
-                          [ OctetString ] (if printable, but not recommended)
-                          [ Integer32, Integer64, Unsigned32, Unsigned64, Float32, Float64 ] (normal number representation)
-                          [ Time ] (NTP timestamp, normal number representation)
-                          [ Address ] (auto detects IPv4 or IPv6 address version, then only ip address is specified: IPv4 with dots, IPv6 with colons)
-                          [ UTF8String, DiameterIdentity, DiameterURI ] (printable)
-                          [ IPFilterRule, QoSFilterRule ] (uses ASCII charset, printable)
-
-                          New application formats must define specific natural representation for internal raw data
-
-        hex-data:      Hexadecimal octet sequence representation (i.e. 'af012fb3', with even number of digits). Suitable for whatever kind
-                        of diameter format, but mandatory for non printable information. OctetString usually transport non human-readable
-                        data and should better be encoded within this field although being printable. Unknown avps (which fails identifying
-                        provided name or code/vendor-code) must always use this representation.
-
-        Xml representation for decoded messages shows natural content except for 'OctetString' format and unknown avps. Anyway, when printable,
-         OctetString could show such information at data field apart from hex-data, because many implementations use this format to transport
-         readable-string data. In general, one of the data fields is mandatory except for 'Grouped' type (its data is another level of avps).
-        Application-specific formats must decide the way to represent its contents, being recommended to use a natural representation if possible,
-         because xml is read by humans with testing and monitoring purposes.
-     -->
-     </pre>
-
-     @param xmlPathFile Complete path file to the xml document which represents the diameter message
-     @see fromXMLString
-  */
-  void loadXML(const std::string & xmlPathFile) throw(anna::RuntimeException);
-
+   * Interpret a xml string in order to create a diameter message
+   * You could apply this multiple times over the same object. A basic cleanup is done respecting the codec engine.
+   *
+   * @see functions::messageXmlDocumentFromXmlString
+   * @see fromXML
+   *
+   * @param xmlString xml representation of the diameter message
+   */
+  void loadXMLString(const std::string &xmlString) throw(anna::RuntimeException);
  
  
    // getters
@@ -732,9 +715,12 @@ public:
  
    /**
       Class xml string representation
+     @param normalize Optional normalization which sorts attribute names and removes
+     newlines in the xml representation in order to ease regexp matching.
+
       \return XML string representation with relevant information for this instance.
    */
-  std::string asXMLString() const throw();
+  std::string asXMLString(bool normalize = false) const throw();
  
    /**
       Comparison operator by mean serialization
@@ -747,69 +733,59 @@ public:
    friend bool operator == (const Message & m1, const Message & m2) throw() { return (m1.asXMLString() == m2.asXMLString()); }
  
    /**
-     Match a regular expression (string pattern) regarding xml string serialization for this message.
-     Using a complex pattern (many avps, grouped ones) it could be necessary to fix the message before
-     using the method in order to perform a more controlled comparison. In the same way, flags could be
-     ignored to simplify message xml presentation.
-     This powerful tool could be used to program traffic analysis and decide future behaviour (routing,
-     traslation, etc.).
+     Matchs a regular expression (string pattern) regarding xml string serialization for this message.
+     The message xml representation is internally normalized (attribute names are sort and newlines
+     are removed) in order to ease regexp matching.
  
-     <pre>
-     Examples:
+     You could use simple regular expressions.
+     For example, the pattern '<avp data="(.)*32251@3gpp.org" name="Service-Context-Id"/>' detects
+     PS charging contexts because of data suffix specification '32251@3gpp.org' for that AVP.
+     The pattern '<message(.)* name="Capabilities-Exchange-Request"' detects a CER message. And so on.
  
-     The pattern '<avp name="Service-Context-Id" data="(.)*32251@3gpp.org"/>' detects PS charging contexts
-     because of data suffix specification '32251@3gpp.org' for that AVP.
+     It would seems strange or 'creative' to use regular expressions within an hex string representation,
+     but anyway you could also do such kind of things to check non-printable data parts within the message:
+     for example, the pattern '<avp hex-data="0a[A-Fa-f0-9]{2}0a0a" name="Framed-IP-Address"/>'
+     matchs IP addresses for '10.x.10.10' where x = [0..255].
  
-     The pattern '<message version="1" name="Capabilities-Exchange-Request"' detects a CER message.
+     Normally only printable 'data' fields are used for matching issues.
  
-     The pattern (string including carriage returns):
+     Now imagine 'message.xml' containing this avp:
  
-     '<avp name="Subscription-Id">
-        <avp name="Subscription-Id-Type" data="0" alias="END_USER_E164"/>
-        <avp name="Subscription-Id-Data" data="606000106"/>
+     <pre>
+     ...
+     <avp name="Subscription-Id">
+        <avp alias="END_USER_E164" data="0" name="Subscription-Id-Type"/>
+        <avp data="616[0-9]{6}" name="Subscription-Id-Data"/>
       </avp>'
+     ...
+     </pre>
  
-     detects MSISDN (not IMSI) equal to 606000106
+     You could also extract AVP xml normalized representation in this way:
  
-     It would seems strange or 'creative' to use regular expressions within an hex string representation,
-     but anyway you could also do such kind of things to check non-printable data parts within the message:
-     for example, the pattern '<avp name="Framed-IP-Address" hex-data="0a[A-Fa-f0-9][A-Fa-f0-9]0a0a"/>'
-     matchs IP addresses for '10.x.10.10' where x = [0..255].
+     <pre>
+     anna::diameter::codec::Message myMessage;
+     myMessage.loadXMLFile("message.xml");
+     std::string subscriptionId = myMessage.getAvp("Subscription-Id")->getAvp("Subscription-Id-Type")->asXMLString(true);
+     // Former is '<avp data="616[0-9]{6}" name="Subscription-Id-Data"/>'
+     </pre>
+
+     And then use to match incoming messages:
  
-     Note that string pattern could also be generated via #loadXML and then #asXML, that is to say, you
-     could get patterns through xml files which act as conditional triggers over message. In that case,
-     it is not possible to specify regular expressions within xml 'hex-data' fields because parser will fail
-     during hexadecimal read. Normally only printable 'data' fields are used for matching issues.
-
-     For example, imagine a 'pattern.xml' file like:
-     <message version="1" name="Credit-Control-Request" application-id="16777236" hop-by-hop-id="0" end-by-end-id="0">
-        <avp name="Subscription-Id">
-           <avp name="Subscription-Id-Type" data="0" alias="END_USER_E164"/>
-           <avp name="Subscription-Id-Data" data="616[0-9]{6,6}"/>
-        </avp>
-     </message>
-
-     Then you could do:
-
-     anna::diameter::codec::Message patternMessage;
-     patternMessage.loadXML("pattern.xml");
-     std::string pattern = patternMessage.getAvp("Subscription-Id")->getAvp("Subscription-Id-Type")->asXMLString();
-     // Former is '<avp name="Subscription-Id-Data" data="616[0-9]{6,6}"/>'
-     bool match = incomingMessage.isLike(pattern);
-
-     Then, messages having MSISDN numbers starting with '616' will match the pattern.
-     Note, that any other message codes (and not only Credit-Control-Request ones), could pass the test...
-     You could also build that string manually:
-
-     Example 1:
-     std::string pattern = "<avp name=\"Subscription-Id\">\n";
-     pattern += ANNA_XML_COMPILER_TAB; pattern += "<avp name=\"Subscription-Id-Type\" data=\"0\" alias=\"END_USER_E164\"/>\n"
-     pattern += ANNA_XML_COMPILER_TAB; pattern += "<avp name=\"Subscription-Id-Data\" data=\"616[0-9]{6,6}\"/>"
-
-     Example 2:
-     std::string pattern = "name=\"Subscription-Id\"(.)*name=\"Subscription-Id-Type\" data=\"0\"(.)*name=\"Subscription-Id-Data\" data=\"616[0-9]{6,6}\"";
+     <pre>
+     bool match = incomingMessage.isLike(subscriptionId);
       </pre>
  
+     Using a complex pattern (many avps, grouped ones) is possible, indeed testing ADML engine supports 'waitfe/fc-xml'
+     operations which load entire diameter messages to be used as a whole regular expression (hop-by-hop, end-to-end and
+     Origin-State-Id avp is automatically replaced by '[0-9]+' to make possible the comparison).
+
+     Those operations makes all the work, but if you use the API, you may take into account:
+
+     - Respect indentation for inner Message xml representation (normally 3 spaces).
+     - Sort alphabetically the attribute names in every xml node.
+     - Remove all the newlines in the xml representation as normalization stage.
+     - Ignore flags and set the fix mode for the message.
+
       \return Returns the match result
    */
    bool isLike(const std::string &pattern) const throw();
@@ -825,3 +801,4 @@ public:
  
  
  #endif
+