A DataPoint represents a data resource and is an abstraction of a URL. More...
#include <DataPoint.h>
Public Types | |
enum | DataPointAccessLatency { ACCESS_LATENCY_ZERO, ACCESS_LATENCY_SMALL, ACCESS_LATENCY_LARGE } |
enum | DataPointInfoType { INFO_TYPE_MINIMAL = 0, INFO_TYPE_NAME = 1, INFO_TYPE_TYPE = 2, INFO_TYPE_TIMES = 4, INFO_TYPE_CONTENT = 8, INFO_TYPE_ACCESS = 16, INFO_TYPE_STRUCT = 32, INFO_TYPE_REST = 64, INFO_TYPE_ALL = 127 } |
Public Member Functions | |
virtual | ~DataPoint () |
virtual const URL & | GetURL () const |
virtual const UserConfig & | GetUserConfig () const |
virtual bool | SetURL (const URL &url) |
virtual std::string | str () const |
virtual | operator bool () const |
virtual bool | operator! () const |
virtual DataStatus | PrepareReading (unsigned int timeout, unsigned int &wait_time) |
virtual DataStatus | PrepareWriting (unsigned int timeout, unsigned int &wait_time) |
virtual DataStatus | StartReading (DataBuffer &buffer)=0 |
virtual DataStatus | StartWriting (DataBuffer &buffer, DataCallback *space_cb=NULL)=0 |
virtual DataStatus | StopReading ()=0 |
virtual DataStatus | StopWriting ()=0 |
virtual DataStatus | FinishReading (bool error=false) |
virtual DataStatus | FinishWriting (bool error=false) |
virtual DataStatus | Check ()=0 |
virtual DataStatus | Remove ()=0 |
virtual DataStatus | Stat (FileInfo &file, DataPointInfoType verb=INFO_TYPE_ALL)=0 |
virtual DataStatus | Stat (std::list< FileInfo > &files, const std::list< DataPoint * > &urls, DataPointInfoType verb=INFO_TYPE_ALL)=0 |
virtual DataStatus | List (std::list< FileInfo > &files, DataPointInfoType verb=INFO_TYPE_ALL)=0 |
virtual DataStatus | CreateDirectory (bool with_parents=false)=0 |
virtual void | ReadOutOfOrder (bool v)=0 |
virtual bool | WriteOutOfOrder ()=0 |
virtual void | SetAdditionalChecks (bool v)=0 |
virtual bool | GetAdditionalChecks () const =0 |
virtual void | SetSecure (bool v)=0 |
virtual bool | GetSecure () const =0 |
virtual void | Passive (bool v)=0 |
virtual DataStatus | GetFailureReason (void) const |
virtual void | Range (unsigned long long int start=0, unsigned long long int end=0)=0 |
virtual DataStatus | Resolve (bool source)=0 |
virtual DataStatus | Resolve (bool source, const std::list< DataPoint * > &urls)=0 |
virtual bool | Registered () const =0 |
virtual DataStatus | PreRegister (bool replication, bool force=false)=0 |
virtual DataStatus | PostRegister (bool replication)=0 |
virtual DataStatus | PreUnregister (bool replication)=0 |
virtual DataStatus | Unregister (bool all)=0 |
virtual bool | CheckSize () const |
virtual void | SetSize (const unsigned long long int val) |
virtual unsigned long long int | GetSize () const |
virtual bool | CheckCheckSum () const |
virtual void | SetCheckSum (const std::string &val) |
virtual const std::string & | GetCheckSum () const |
virtual const std::string | DefaultCheckSum () const |
virtual bool | CheckCreated () const |
virtual void | SetCreated (const Time &val) |
virtual const Time & | GetCreated () const |
virtual bool | CheckValid () const |
virtual void | SetValid (const Time &val) |
virtual const Time & | GetValid () const |
virtual void | SetAccessLatency (const DataPointAccessLatency &latency) |
virtual DataPointAccessLatency | GetAccessLatency () const |
virtual long long int | BufSize () const =0 |
virtual int | BufNum () const =0 |
virtual bool | Cache () const |
virtual bool | Local () const =0 |
virtual int | GetTries () const |
virtual void | SetTries (const int n) |
virtual void | NextTry (void) |
virtual bool | IsIndex () const =0 |
virtual bool | IsStageable () const |
virtual bool | AcceptsMeta () const =0 |
virtual bool | ProvidesMeta () const =0 |
virtual void | SetMeta (const DataPoint &p) |
virtual bool | CompareMeta (const DataPoint &p) const |
virtual std::vector< URL > | TransferLocations () const |
virtual const URL & | CurrentLocation () const =0 |
virtual const std::string & | CurrentLocationMetadata () const =0 |
virtual DataPoint * | CurrentLocationHandle () const =0 |
virtual DataStatus | CompareLocationMetadata () const =0 |
virtual bool | NextLocation ()=0 |
virtual bool | LocationValid () const =0 |
virtual bool | LastLocation ()=0 |
virtual bool | HaveLocations () const =0 |
virtual DataStatus | AddLocation (const URL &url, const std::string &meta)=0 |
virtual DataStatus | RemoveLocation ()=0 |
virtual DataStatus | RemoveLocations (const DataPoint &p)=0 |
virtual DataStatus | ClearLocations ()=0 |
virtual int | AddCheckSumObject (CheckSum *cksum)=0 |
virtual const CheckSum * | GetCheckSumObject (int index) const =0 |
virtual void | SortLocations (const std::string &pattern, const URLMap &url_map)=0 |
virtual void | AddURLOptions (const std::map< std::string, std::string > &options) |
Protected Member Functions | |
DataPoint (const URL &url, const UserConfig &usercfg, PluginArgument *parg) | |
Protected Attributes | |
std::set< std::string > | valid_url_options |
A DataPoint represents a data resource and is an abstraction of a URL.
DataPoint uses ARC's Plugin mechanism to dynamically load the required Data Manager Component (DMC) when necessary. A DMC typically defines a subclass of DataPoint (e.g. DataPointHTTP) and is responsible for a specific protocol (e.g. http). DataPoints should not be used directly, instead the DataHandle wrapper class should be used, which automatically loads the correct DMC.
DataPoint defines methods for access to the data resource. To transfer data between two DataPoints, DataMover::Transfer() can be used.
There are two subclasses of DataPoint, DataPointDirect and DataPointIndex. None of these three classes can be instantiated directly. DataPointDirect and its subclasses handle "physical" resources through protocols such as file, http and gsiftp. These classes implement methods such as StartReading() and StartWriting(). DataPointIndex and its subclasses handle resources such as indexes and catalogs and implement methods like Resolve() and PreRegister().
When creating a new DMC, a subclass of either DataPointDirect or DataPointIndex should be created, and the appropriate methods implemented. DataPoint itself has no direct external dependencies, but plugins may rely on third-party components. The new DMC must also add itself to the list of available plugins and provide an Instance() method which returns a new instance of itself, if the supplied arguments are valid for the protocol. Here is an example implementation of a new DMC for protocol MyProtocol which represents a physical resource accessible through protocol my://
#include <arc/data/DataPointDirect.h> namespace Arc { class DataPointMyProtocol : public DataPointDirect { public: DataPointMyProtocol(const URL& url, const UserConfig& usercfg); static Plugin* Instance(PluginArgument *arg); virtual DataStatus StartReading(DataBuffer& buffer); ... }; DataPointMyProtocol::DataPointMyProtocol(const URL& url, const UserConfig& usercfg) { ... } DataPointMyProtocol::StartReading(DataBuffer& buffer) { ... } ... Plugin* DataPointMyProtocol::Instance(PluginArgument *arg) { DataPointPluginArgument *dmcarg = dynamic_cast<DataPointPluginArgument*>(arg); if (!dmcarg) return NULL; if (((const URL &)(*dmcarg)).Protocol() != "my") return NULL; return new DataPointMyProtocol(*dmcarg, *dmcarg); } } // namespace Arc Arc::PluginDescriptor PLUGINS_TABLE_NAME[] = { { "my", "HED:DMC", 0, &Arc::DataPointMyProtocol::Instance }, { NULL, NULL, 0, NULL } };
Describes the latency to access this URL.
For now this value is one of a small set specified by the enumeration. In the future with more sophisticated protocols or information it could be replaced by a more fine-grained list of possibilities such as an int value.
Describes type of information about URL to request.
Arc::DataPoint::DataPoint | ( | const URL & | url, | |
const UserConfig & | usercfg, | |||
PluginArgument * | parg | |||
) | [protected] |
Constructor.
Constructor is protected because DataPoints should not be created directly. Subclasses should however call this in their constructors to set various common attributes.
url | The URL representing the DataPoint | |
usercfg | User configuration object |
virtual int Arc::DataPoint::AddCheckSumObject | ( | CheckSum * | cksum | ) | [pure virtual] |
Add a checksum object which will compute checksum during transmission.
cksum | object which will compute checksum. Should not be destroyed till DataPointer itself. |
Implemented in Arc::DataPointDirect, and Arc::DataPointIndex.
virtual DataStatus Arc::DataPoint::AddLocation | ( | const URL & | url, | |
const std::string & | meta | |||
) | [pure virtual] |
Add URL to list.
url | Location URL to add. | |
meta | Location meta information. |
Implemented in Arc::DataPointDirect, and Arc::DataPointIndex.
virtual void Arc::DataPoint::AddURLOptions | ( | const std::map< std::string, std::string > & | options | ) | [virtual] |
Add URL options to this DataPoint's URL object. Invalid options for the DataPoint instance will not be added.
virtual DataStatus Arc::DataPoint::Check | ( | ) | [pure virtual] |
Query the DataPoint to check if object is accessible.
If possible this method will also try to provide meta information about the object. It returns positive response if object's content can be retrieved.
Implemented in Arc::DataPointIndex.
virtual DataStatus Arc::DataPoint::CompareLocationMetadata | ( | ) | const [pure virtual] |
Compare metadata of DataPoint and current location.
Returns inconsistency error or error encountered during operation, or success
Implemented in Arc::DataPointDirect, and Arc::DataPointIndex.
virtual bool Arc::DataPoint::CompareMeta | ( | const DataPoint & | p | ) | const [virtual] |
Compare meta information from another object.
Undefined values are not used for comparison.
p | object to which to compare. |
virtual DataStatus Arc::DataPoint::CreateDirectory | ( | bool | with_parents = false |
) | [pure virtual] |
Create a directory.
If the protocol supports it, this method creates the last directory in the path to the URL. It assumes the last component of the path is a file-like object and not a directory itself, unless the path ends in a directory separator. If with_parents is true then all missing parent directories in the path will also be created.
with_parents | If true then all missing directories in the path are created |
virtual const std::string& Arc::DataPoint::CurrentLocationMetadata | ( | ) | const [pure virtual] |
Returns meta information used to create current URL.
Usage differs between different indexing services.
Implemented in Arc::DataPointDirect, and Arc::DataPointIndex.
virtual DataStatus Arc::DataPoint::FinishReading | ( | bool | error = false |
) | [virtual] |
Finish reading from the URL.
Must be called after transfer of physical file has completed and if PrepareReading() was called, to free resources, release requests that were made during preparation etc.
error | If true then action is taken depending on the error. |
Reimplemented in Arc::DataPointIndex.
virtual DataStatus Arc::DataPoint::FinishWriting | ( | bool | error = false |
) | [virtual] |
Finish writing to the URL.
Must be called after transfer of physical file has completed and if PrepareWriting() was called, to free resources, release requests that were made during preparation etc.
error | If true then action is taken depending on the error. |
Reimplemented in Arc::DataPointIndex.
virtual DataStatus Arc::DataPoint::GetFailureReason | ( | void | ) | const [virtual] |
Returns reason of transfer failure, as reported by callbacks. This could be different from the failure returned by the methods themselves.
virtual DataStatus Arc::DataPoint::List | ( | std::list< FileInfo > & | files, | |
DataPointInfoType | verb = INFO_TYPE_ALL | |||
) | [pure virtual] |
List hierarchical content of this object.
If the DataPoint represents a directory or something similar its contents will be listed.
files | will contain list of file names and requested attributes. There may be more attributes than requested. There may be less if object can't provide particular information. | |
verb | defines attribute types which method must try to retrieve. It is not a failure if some attributes could not be retrieved due to limitation of protocol or access control. |
virtual bool Arc::DataPoint::NextLocation | ( | ) | [pure virtual] |
Switch to next location in list of URLs.
At last location switch to first if number of allowed retries is not exceeded. Returns false if no retries left.
Implemented in Arc::DataPointDirect, and Arc::DataPointIndex.
virtual void Arc::DataPoint::Passive | ( | bool | v | ) | [pure virtual] |
Request passive transfers for FTP-like protocols.
true | to request. |
Implemented in Arc::DataPointDirect, and Arc::DataPointIndex.
virtual DataStatus Arc::DataPoint::PostRegister | ( | bool | replication | ) | [pure virtual] |
Index Service postregistration.
Used for same purpose as PreRegister. Should be called after actual transfer of file successfully finished.
replication | if true, the file is being replicated between two locations registered in Indexing Service under same name. |
Implemented in Arc::DataPointDirect.
virtual DataStatus Arc::DataPoint::PrepareReading | ( | unsigned int | timeout, | |
unsigned int & | wait_time | |||
) | [virtual] |
Prepare DataPoint for reading.
This method should be implemented by protocols which require preparation or staging of physical files for reading. It can act synchronously or asynchronously (if protocol supports it). In the first case the method will block until the file is prepared or the specified timeout has passed. In the second case the method can return with a ReadPrepareWait status before the file is prepared. The caller should then wait some time (a hint from the remote service may be given in wait_time) and call PrepareReading() again to poll for the preparation status, until the file is prepared. In this case it is also up to the caller to decide when the request has taken too long and if so cancel it by calling FinishReading(). When file preparation has finished, the physical file(s) to read from can be found from TransferLocations().
timeout | If non-zero, this method will block until either the file has been prepared successfully or the timeout has passed. A zero value means that the caller would like to call and poll for status. | |
wait_time | If timeout is zero (caller would like asynchronous operation) and ReadPrepareWait is returned, a hint for how long to wait before a subsequent call may be given in wait_time. |
Reimplemented in Arc::DataPointIndex.
virtual DataStatus Arc::DataPoint::PrepareWriting | ( | unsigned int | timeout, | |
unsigned int & | wait_time | |||
) | [virtual] |
Prepare DataPoint for writing.
This method should be implemented by protocols which require preparation of physical files for writing. It can act synchronously or asynchronously (if protocol supports it). In the first case the method will block until the file is prepared or the specified timeout has passed. In the second case the method can return with a WritePrepareWait status before the file is prepared. The caller should then wait some time (a hint from the remote service may be given in wait_time) and call PrepareWriting() again to poll for the preparation status, until the file is prepared. In this case it is also up to the caller to decide when the request has taken too long and if so cancel or abort it by calling FinishWriting(true). When file preparation has finished, the physical file(s) to write to can be found from TransferLocations().
timeout | If non-zero, this method will block until either the file has been prepared successfully or the timeout has passed. A zero value means that the caller would like to call and poll for status. | |
wait_time | If timeout is zero (caller would like asynchronous operation) and WritePrepareWait is returned, a hint for how long to wait before a subsequent call may be given in wait_time. |
Reimplemented in Arc::DataPointIndex.
virtual DataStatus Arc::DataPoint::PreRegister | ( | bool | replication, | |
bool | force = false | |||
) | [pure virtual] |
Index service preregistration.
This function registers the physical location of a file into an indexing service. It should be called *before* the actual transfer to that location happens.
replication | if true, the file is being replicated between two locations registered in the indexing service under same name. | |
force | if true, perform registration of a new file even if it already exists. Should be used to fix failures in Indexing Service. |
Implemented in Arc::DataPointDirect.
virtual DataStatus Arc::DataPoint::PreUnregister | ( | bool | replication | ) | [pure virtual] |
Index Service preunregistration.
Should be called if file transfer failed. It removes changes made by PreRegister.
replication | if true, the file is being replicated between two locations registered in Indexing Service under same name. |
Implemented in Arc::DataPointDirect.
virtual bool Arc::DataPoint::ProvidesMeta | ( | ) | const [pure virtual] |
If endpoint can provide at least some meta information directly.
Implemented in Arc::DataPointDirect, and Arc::DataPointIndex.
virtual void Arc::DataPoint::Range | ( | unsigned long long int | start = 0 , |
|
unsigned long long int | end = 0 | |||
) | [pure virtual] |
Set range of bytes to retrieve.
Default values correspond to whole file.
Implemented in Arc::DataPointDirect, and Arc::DataPointIndex.
virtual void Arc::DataPoint::ReadOutOfOrder | ( | bool | v | ) | [pure virtual] |
Allow/disallow DataPoint to produce scattered data during reading* operation.
v | true if allowed (default is false). |
Implemented in Arc::DataPointDirect, and Arc::DataPointIndex.
virtual bool Arc::DataPoint::Registered | ( | ) | const [pure virtual] |
Check if file is registered in Indexing Service.
Proper value is obtainable only after Resolve.
Implemented in Arc::DataPointDirect, and Arc::DataPointIndex.
virtual DataStatus Arc::DataPoint::Resolve | ( | bool | source, | |
const std::list< DataPoint * > & | urls | |||
) | [pure virtual] |
Resolves several index service URLs.
Can use bulk calls if protocol allows. The protocols and hosts of all the DataPoints in urls must be the same and the same as this DataPoint's protocol and host. This method can be called on any of the urls, for example urls.front()->Resolve(true, urls);
source | true if DataPoint objects represent source of information | |
urls | List of DataPoints to resolve. Protocols and hosts must match and match this DataPoint's protocol and host. |
Implemented in Arc::DataPointDirect.
virtual DataStatus Arc::DataPoint::Resolve | ( | bool | source | ) | [pure virtual] |
Resolves index service URL into list of ordinary URLs.
Also obtains meta information about the file.
source | true if DataPoint object represents source of information. |
Implemented in Arc::DataPointDirect.
virtual void Arc::DataPoint::SetAdditionalChecks | ( | bool | v | ) | [pure virtual] |
Allow/disallow additional checks.
Check for existence of remote file (and probably other checks too) before initiating reading and writing operations.
v | true if allowed (default is true). |
Implemented in Arc::DataPointDirect, and Arc::DataPointIndex.
virtual void Arc::DataPoint::SetMeta | ( | const DataPoint & | p | ) | [virtual] |
Copy meta information from another object.
Already defined values are not overwritten.
p | object from which information is taken. |
Reimplemented in Arc::DataPointIndex.
virtual void Arc::DataPoint::SetSecure | ( | bool | v | ) | [pure virtual] |
Allow/disallow heavy security during data transfer.
v | true if allowed (default depends on protocol). |
Implemented in Arc::DataPointDirect, and Arc::DataPointIndex.
virtual bool Arc::DataPoint::SetURL | ( | const URL & | url | ) | [virtual] |
Assigns new URL. Main purpose of this method is to reuse existing connection for accessing different object at same server. Implementation does not have to implement this method. If supplied URL is not suitable or method is not implemented false is returned.
virtual void Arc::DataPoint::SortLocations | ( | const std::string & | pattern, | |
const URLMap & | url_map | |||
) | [pure virtual] |
Sort locations according to the specified pattern.
pattern | a set of strings, separated by |, to match against. |
Implemented in Arc::DataPointDirect, and Arc::DataPointIndex.
virtual DataStatus Arc::DataPoint::StartReading | ( | DataBuffer & | buffer | ) | [pure virtual] |
Start reading data from URL.
Separate thread to transfer data will be created. No other operation can be performed while reading is in progress.
buffer | operation will use this buffer to put information into. Should not be destroyed before StopReading() was called and returned. |
Implemented in Arc::DataPointIndex.
virtual DataStatus Arc::DataPoint::StartWriting | ( | DataBuffer & | buffer, | |
DataCallback * | space_cb = NULL | |||
) | [pure virtual] |
Start writing data to URL.
Separate thread to transfer data will be created. No other operation can be performed while writing is in progress.
buffer | operation will use this buffer to get information from. Should not be destroyed before stop_writing was called and returned. | |
space_cb | callback which is called if there is not enough space to store data. May not implemented for all protocols. |
Implemented in Arc::DataPointIndex.
virtual DataStatus Arc::DataPoint::Stat | ( | std::list< FileInfo > & | files, | |
const std::list< DataPoint * > & | urls, | |||
DataPointInfoType | verb = INFO_TYPE_ALL | |||
) | [pure virtual] |
Retrieve information about several DataPoints.
If a DataPoint represents a directory or something similar, information about the object itself and not its contents will be obtained. This method can use bulk operations if the protocol supports it. The protocols and hosts of all the DataPoints in urls must be the same and the same as this DataPoint's protocol and host. This method can be called on any of the urls, for example urls.front()->Stat(files, urls); Calling this method with an empty list of urls returns success if the protocol supports bulk Stat, and an error if it does not.
files | will contain objects' names and requested attributes. There may be more attributes than requested. There may be less if objects can't provide particular information. The order of this vector matches the order of urls. If a stat of any url fails then the corresponding FileInfo in this list will evaluate to false. | |
urls | list of DataPoints to stat. Protocols and hosts must match and match this DataPoint's protocol and host. | |
verb | defines attribute types which method must try to retrieve. It is not a failure if some attributes could not be retrieved due to limitation of protocol or access control. |
Implemented in Arc::DataPointDirect.
virtual DataStatus Arc::DataPoint::Stat | ( | FileInfo & | file, | |
DataPointInfoType | verb = INFO_TYPE_ALL | |||
) | [pure virtual] |
Retrieve information about this object.
If the DataPoint represents a directory or something similar, information about the object itself and not its contents will be obtained.
file | will contain object name and requested attributes. There may be more attributes than requested. There may be less if object can't provide particular information. | |
verb | defines attribute types which method must try to retrieve. It is not a failure if some attributes could not be retrieved due to limitation of protocol or access control. |
virtual DataStatus Arc::DataPoint::StopReading | ( | ) | [pure virtual] |
Stop reading.
Must be called after corresponding start_reading method, either after all data is transferred or to cancel transfer. Use buffer object to find out when data is transferred. Must return failure if any happened during transfer.
Implemented in Arc::DataPointIndex.
virtual DataStatus Arc::DataPoint::StopWriting | ( | ) | [pure virtual] |
Stop writing.
Must be called after corresponding start_writing method, either after all data is transferred or to cancel transfer. Use buffer object to find out when data is transferred. Must return failure if any happened during transfer.
Implemented in Arc::DataPointIndex.
virtual std::vector<URL> Arc::DataPoint::TransferLocations | ( | ) | const [virtual] |
Returns physical file(s) to read/write, if different from CurrentLocation().
To be used with protocols which re-direct to different URLs such as Transport URLs (TURLs). The list is initially filled by PrepareReading and PrepareWriting. If this list is non-empty then real transfer should use a URL from this list. It is up to the caller to choose the best URL and instantiate new DataPoint for handling it. For consistency protocols which do not require redirections return original URL. For protocols which need redirection calling StartReading and StartWriting will use first URL in the list.
Reimplemented in Arc::DataPointIndex.
virtual DataStatus Arc::DataPoint::Unregister | ( | bool | all | ) | [pure virtual] |
Index Service unregistration.
Remove information about file registered in Indexing Service.
all | if true, information about file itself is (LFN) is removed. Otherwise only particular physical instance is unregistered. |
Implemented in Arc::DataPointDirect.
virtual bool Arc::DataPoint::WriteOutOfOrder | ( | ) | [pure virtual] |
Returns true if URL can accept scattered data for *writing* operation.
Implemented in Arc::DataPointDirect, and Arc::DataPointIndex.
std::set<std::string> Arc::DataPoint::valid_url_options [protected] |
Subclasses should add their own specific options to this list