diff --git a/docs/README.md b/docs/README.md index 74bc5835..1f312411 100644 --- a/docs/README.md +++ b/docs/README.md @@ -2,6 +2,7 @@ This directory contains some documentation about the driftctl codebase, aimed at readers who are interested in making code contributions. +- [Add new remote provider](new-remote-provider.md) - [Add new resources](new-resource.md) - [Testing](testing.md) diff --git a/docs/media/generalflow.png b/docs/media/generalflow.png new file mode 100644 index 00000000..d5e31ec7 Binary files /dev/null and b/docs/media/generalflow.png differ diff --git a/docs/media/generalflow.puml b/docs/media/generalflow.puml new file mode 100644 index 00000000..c30cbf04 --- /dev/null +++ b/docs/media/generalflow.puml @@ -0,0 +1,26 @@ +@startuml +hnote across: Retrieve Resources +Driftctl -> IACSupplier: Resource() +IACSupplier --> Driftctl: stateResources []Resource +Driftctl -> RemoteSupplier: Resource() +RemoteSupplier --> Driftctl: remoteResources []Resource + +hnote across: Run Middlewares +Driftctl --> Driftctl: List Middlewares +loop on every middleware +Driftctl -> Middleware: Execute(remoteResources, stateResources) +Middleware --> Driftctl: remoteResources, stateResources []Resource +end + +hnote across: Run Filters +Driftctl -> FilterEngine: Run(remoteResources) +FilterEngine --> Driftctl: remoteResources []Resource +Driftctl -> FilterEngine: Run(stateResources) +FilterEngine --> Driftctl: stateResources []Resource + +hnote across: Analyze +Driftctl -> Analyzer: Analyze(remoteResources, stateResources) +Analyzer --> Driftctl: Analyze + + +@enduml diff --git a/docs/media/resource.png b/docs/media/resource.png index 1514ff56..a536cbb3 100644 Binary files a/docs/media/resource.png and b/docs/media/resource.png differ diff --git a/docs/media/resource.puml b/docs/media/resource.puml index 1fad54f0..3daba02c 100644 --- a/docs/media/resource.puml +++ b/docs/media/resource.puml @@ -1,17 +1,23 @@ @startuml Driftctl -> Scanner: Resource() -Scanner -> Scanner: List Suppliers +Scanner -> Scanner: List Enumerators loop -Scanner -> Supplier: Resources() -Supplier -> RemoteSDK: List resource -RemoteSDK --> Supplier: []ResourcesIds -loop -Supplier -> TerraformProvider: ReadResource() -TerraformProvider --> Supplier: CTYRessource -Supplier -> CTYDeserializer: Deserialize() -CTYDeserializer --> Supplier: Resource +Scanner -> Enumerator: Enumerate() +Enumerator -> RemoteSDK: List resource +RemoteSDK --> Enumerator: []remoteRes +loop optionaly retrive resource needed attributes +Enumerator -> RemoteSDK: Retreive needed\nattributes +RemoteSDK --> Enumerator: Attrs +end +Enumerator --> Scanner: []Resources with\nlimited attributes +loop if deepmode +Scanner -> DetailsFetcher: ReadDetails(res) +DetailsFetcher -> TerraformProvider: ReadResource() +TerraformProvider --> DetailsFetcher: CTYValue +DetailsFetcher -> Deserializer: Deserialize() +Deserializer -> DetailsFetcher: Resource +DetailsFetcher -> Scanner: Resource with\nfull attributes end -Supplier --> Scanner: []Resource end Scanner --> Driftctl: []Resource -@enduml \ No newline at end of file +@enduml diff --git a/docs/new-remote-provider.md b/docs/new-remote-provider.md new file mode 100644 index 00000000..fb239298 --- /dev/null +++ b/docs/new-remote-provider.md @@ -0,0 +1,161 @@ +# Add a new remote provider + +A remote provider in Driftctl is a cloud provider like AWS, Github, GCP or Azure. +Current architecture allows to add a new provider in a few step. + +## Declaring the new remote provider +First you need to create a new directory in `pkg/remote/`. It will sit next to already implemented one like `pkg/remote/aws`. + +Inside this directory you will create a `init.go`. First thing to do will be to define the remote name constant: +```go +const RemoteAWSTerraform = "aws+tf" +``` + +You will then create a function to init the provider and all the future resource enumerator. Best way to do would be to copy the function signature from an other provider: +```go +func Init( + // Version required by the user + version string, + // Util to send alert + alerter *alerter.Alerter, + // Library that contains all providers + providerLibrary *terraform.ProviderLibrary, + // Library that contains the enumerators and details fetcher for each supported resources + remoteLibrary *common.RemoteLibrary, + // progress display + progress output.Progress, + // Repository for all resource schema + resourceSchemaRepository *resource.SchemaRepository, + // Factory used to create driftctl resource + factory resource.ResourceFactory, + // Drifctl config directory (in which terraform provider is downloaded) + configDir string) error { + + // Define the default version of terraform provider to be used. When the user does not require a specific one + if version == "" { + version = "3.19.0" + } + + // This is this actual terraform provider creation + provider, err := NewAWSTerraformProvider(version, progress, configDir) + if err != nil { + return err + } + // And then initialisation + err = provider.Init() + if err != nil { + return err + } + + // You'll need to create a new cache that will be use to cache fetched resources lists + repositoryCache := cache.New(100) + + // Deserializer is used to convert cty value return by terraform provider to driftctl AbstactResource + deserializer := resource.NewDeserializer(factory) + + // Adding the provider to the library + providerLibrary.AddProvider(terraform.AWS, provider) +} +``` + +When it's done you'll create a `provider.go` file to contains your terraform provider representation. Again you should looks at other implementation : +```go +// Define your actual provider representation, It is required to compose with terraform.TerraformProvider and to have a name and a version +// Please note that the name should match the real terraform provider name. +type AWSTerraformProvider struct { + *terraform.TerraformProvider + session *session.Session + name string + version string +} + +func NewAWSTerraformProvider(version string, progress output.Progress, configDir string) (*AWSTerraformProvider, error) { + // Just pass your version and name + p := &AWSTerraformProvider{ + version: version, + name: "aws", + } + // Use terraformproviderinstaller to retreive the provider if needed + installer, err := tf.NewProviderInstaller(tf.ProviderConfig{ + Key: p.name, + Version: version, + ConfigDir: configDir, + }) + if err != nil { + return nil, err + } + p.session = session.Must(session.NewSessionWithOptions(session.Options{ + SharedConfigState: session.SharedConfigEnable, + })) + + // Config is dependant on the teraform provider needs. + tfProvider, err := terraform.NewTerraformProvider(installer, terraform.TerraformProviderConfig{ + Name: p.name, + DefaultAlias: *p.session.Config.Region, + GetProviderConfig: func(alias string) interface{} { + return awsConfig{ + Region: alias, + MaxRetries: 10, + } + }, + }, progress) + if err != nil { + return nil, err + } + p.TerraformProvider = tfProvider + return p, err +} + +func (a *AWSTerraformProvider) Name() string { + return a.name +} + +func (p *AWSTerraformProvider) Version() string { + return p.version +} +``` + +You are now almost done. You'll need to make driftctl aware of this provider so in `pkg/remote/remote.go` add your new constant in `supportedRemotes`: +```go +var supportedRemotes = []string{ + aws.RemoteAWSTerraform, + github.RemoteGithubTerraform, +} +``` +And don't forget to modify the Activate function to be able to activate your new provider. You'll need to add a new case in the switch: +```go +func Activate(remote, version string, alerter *alerter.Alerter, + providerLibrary *terraform.ProviderLibrary, + remoteLibrary *common.RemoteLibrary, + progress output.Progress, + resourceSchemaRepository *resource.SchemaRepository, + factory resource.ResourceFactory, + configDir string) error { + switch remote { + case aws.RemoteAWSTerraform: + return aws.Init(version, alerter, providerLibrary, remoteLibrary, progress, resourceSchemaRepository, factory, configDir) + case github.RemoteGithubTerraform: + return github.Init(version, alerter, providerLibrary, remoteLibrary, progress, resourceSchemaRepository, factory, configDir) + default: + return errors.Errorf("unsupported remote '%s'", remote) + } +} +``` + +Your provider is now set up ! + +## Prepare Driftctl to support new resources + +New resource for the just added provider will be located in `pkg/resource/`. You should create this directory and the `metadata.go` file. +Inside this file add a new function: +```go +func InitResourcesMetadata(resourceSchemaRepository resource.SchemaRepositoryInterface) { +} +``` + +And add a call to it in the `remote//init.go` you created at first step. + +Last step will add to create test for the new resource you will implement. +Please use TestCreateNewSchema located in `test/schemas/schemas_test.go` to generate a schema file that will be used for the mocked provider. + +Everything is not ready, you should [start adding new resources](new-resource.md) ! diff --git a/docs/new-resource.md b/docs/new-resource.md index f03967a3..6148d826 100644 --- a/docs/new-resource.md +++ b/docs/new-resource.md @@ -1,149 +1,215 @@ # Add new resources +First you need to understand how driftctl scan works. Here you'll find a global overview of the step that compose the scan: + +![Diagram](media/generalflow.png) + +And here you'll see a more detailed flow of the retrieving resource sequence: ![Diagram](media/resource.png) ## Defining the resource -First step is to implement a new resource. To do that you need to define a go struct representing all fields that need to be monitored for this kind of resource. - -You can find several examples in already implemented resources like aws.S3Bucket: +First step would be to add a file under `pkg/resource//resourcetype.go`. +This file will define a const string that will be the resource type identifier in driftctl. +Optionally, if your resource is to be supported by driftctl experimental deep mode, you can add a function that will be +applied to this resource when it's created. This allows to prevent useless diff to be displayed. +You can also add some metadata to fields so they are compared or displayed differently. +For example this defines the aws_iam_role resource : ```go -type AwsS3Bucket struct { - AccelerationStatus *string `cty:"acceleration_status"` - Acl *string `cty:"acl" diff:"-"` - Arn *string `cty:"arn"` - Bucket *string `cty:"bucket"` -... -``` +const AwsIamRoleResourceType = "aws_iam_role" -Your new type will need to implement `resource.Resource` interface in order for driftctl to retrieve its type and a unique identifier for it. - -```go -type Resource interface { - TerraformId() string - TerraformType() string +func initAwsIAMRoleMetaData(resourceSchemaRepository resource.SchemaRepositoryInterface) { + // assume_role_policy drifts will be displayed as json + resourceSchemaRepository.UpdateSchema(AwsIamRoleResourceType, map[string]func(attributeSchema *resource.AttributeSchema){ + "assume_role_policy": func(attributeSchema *resource.AttributeSchema) { + attributeSchema.JsonString = true + }, + }) + // force_detach_policies should not be compared so it will be removed before the comparison + resourceSchemaRepository.SetNormalizeFunc(AwsIamRoleResourceType, func(res *resource.AbstractResource) { + val := res.Attrs + val.SafeDelete([]string{"force_detach_policies"}) + }) } ``` -Some resources are read differently by the terraform state reader and the supplier. You can optionally implement `resource.NormalizedResource` to add a normalization step before the comparison is made. - +When it's done you'll have to add this function to the metadata initialisation located in `pkg/resource//metadatas.go` : ```go -type NormalizedResource interface { - NormalizeForState() (Resource, error) - NormalizeForProvider() (Resource, error) +func InitResourcesMetadata(resourceSchemaRepository resource.SchemaRepositoryInterface) { + initAwsAmiMetaData(resourceSchemaRepository) } ``` -For example S3Bucket policy is encoded in json but the formatting (newline and tabs) differs when read using the state reader. S3Bucket implements `resource.NormalizedResource`: - +In order for you new resource to be supported by our terraform state reader you should add it in `pkg/resource/resource_types.go` inside the `supportedTypes` slice. ```go -func (s S3Bucket) NormalizeForState() (resource.Resource, error) { - err := normalizePolicy(&s) - return &s, err -} - -func (s S3Bucket) NormalizeForProvider() (resource.Resource, error) { - err := normalizePolicy(&s) - return &s, err -} - -func normalizePolicy(s *S3Bucket) error { - if s.Policy.Policy != nil { - jsonString, err := structure.NormalizeJsonString(*s.Policy.Policy) - if err != nil { - return err - } - s.Policy.Policy = &jsonString - } - return nil +var supportedTypes = map[string]struct{}{ + "aws_ami": {}, } ``` -You can implement different normalization for the state representation and the supplier one. -## Supplier and Deserializer +All resources inside driftctl are `resource.AbstractResource` except for some unit tests. This struct has an id and a type, +All the other attributes are represented inside a `map[string]interface` + +## Repository, Enumerator and DetailsFetcher Then you will have to implement two interfaces: -- `resource.supplier` is used to read resources list. It will call the cloud provider SDK to get the list of resources, and - the terraform provider to get the details for each of these resources -- `remote.CTYDeserializer` is used to transform terraform cty output into your resource +- Repositories are the way we decided to hide direct calls to sdk and pagination logic. It's a common pattern. +- `remote.comon.Enumerator` is used to read resources list. It will call the cloud provider SDK to get the list of resources. + For some resource it could make other call to enrich the resource with needed field when driftctl is used in non deep mode +- `remote.comon.DetailsFetcher` is used to make a call to terraform provider read resource. + This implementation is optional and is only needed if your resource type is to be supported by experimental deep mode. + Please also note that it exists a generic implementation as `remote.comon.GenericDetailsFetcher` that can be used with most resource type. -### Supplier -This is used to read resources list. It will call the cloud provider SDK to get the list of resources, and the -terraform provider to get the details for each of these resources. -You can use an already implemented resource as example. +### Repository + +This will be the struct that hide all the logic linked to your provider sdk. All provider have different way to implement pagination or to name function in their api. +Here we will name all listing function `ListAll` and they all return `[]Resource`. + +For aws we decided to split repositories using the amazon logic. So you'll find repositories for EC2, S3 and so on. +Some provider does not have this grouping logic. Keep in mind that like all our file/struct repositories should not be too big. +So it might be useful to create a grouping logic. + +For our Github implementation the number of listing function was not that heavy so we created a unique repository for everything: + +```go +type GithubRepository interface { + ListRepositories() ([]string, error) + ListTeams() ([]Team, error) + ListMembership() ([]string, error) + ListTeamMemberships() ([]string, error) + ListBranchProtection() ([]string, error) +} + +type githubRepository struct { + client GithubGraphQLClient + ctx context.Context + config githubConfig + cache cache.Cache +} + +func NewGithubRepository(config githubConfig, c cache.Cache) *githubRepository { + ctx := context.Background() + ts := oauth2.StaticTokenSource( + &oauth2.Token{AccessToken: config.Token}, + ) + oauthClient := oauth2.NewClient(ctx, ts) + + repo := &githubRepository{ + client: githubv4.NewClient(oauthClient), + ctx: context.Background(), + config: config, + cache: c, + } + + return repo +} +``` + +So as you can see this contains the logic to create the github client (it might be created outside the repository if it +makes sense to share it between multiple repositories). It also get a cache so every request is cached. +Driftctl sometimes needs to retrieve list of resources more than once, so we cache every request to avoid unnecessary call. + +### Enumerator + +This is used to read resources list. Enumerator is found in `pkg/remote//_enumerator.go`. It will call the cloud provider SDK to get the list of resources. + +Note that at this point resources should not be entirely fetched. +Most of the resource returned by enumerator have empty attributes: they only represent type and terraform id. +There are exception to this: +- Sometime, you will need some more information about resources to retrieve them using the provider they should be added to the resource attribute maps. +- For some more complex cases, middleware needs more information that the id and type and in order to make classic run of driftctl coherent with a run with deep mode activated, +these informations should be fetched manually by the enumerator using the remote sdk. + + +Note that we use the classic repository to hide calls to the provider sdk. +You will probably need to at least add a listing function to list you new resource. + +You should use an already implemented Enumerator as example. + +For example when implementing ec2_instance resource you will need to add a ListAllInstances() function to `repository.EC2Repository`. +It will be called by the enumerator to retrieve the instances list. + Supplier constructor could use these arguments: +- an instance of `Repository` that you will use to retrieved information about the resource +- the global resource factory that should always be used to create a new `resource.Resource` + +Enumerator then need to implement: +- `SupportedType() resource.ResourceType` that will return the constant you defined in the type file at first step +- `Enumerate() ([]resource.Resource, error)` that will return the resource listing. Note that at this point resources should not be entirely fetched. +Most of the resource returned by enumerator have empty attributes: they only represent the type, and the terraform id. -- an instance of `ParallelRunner` that you will use to parallelize your call to the supplier: ```go -results := make(map[string][]cty.Value) -for _, bucket := range response.Buckets { - b := *bucket - s.runner.Run(func() error { - return s.readBucket(b, results) - }) +type EC2InstanceEnumerator struct { + repository repository.EC2Repository + factory resource.ResourceFactory } -if err := s.runner.Wait(); err != nil { - return nil, err -} -``` -- an instance of `terraform.ResourceReader` that you can use to read resource using the supplier: - -```go -s3Bucket, err := s.reader.ReadResource(aws.AwsS3BucketResourceType, name) -if err != nil { - logrus.Warnf("Error reading bucket %s[%s]: %+v", name, aws.AwsS3BucketResourceType, err) - return err -} -appendValueIntoMap(results, aws.AwsS3BucketResourceType, s3Bucket) -``` - -- an instance of the cloud provider SDK that you will use to retrieve resources list - -### Deserializer - -The deserializer is used when reading resource from the terraform provider or from the state. -The interface contains a `Deserialize(values []cty.Value) ([]resource.Resource, error)` method that you'll implement. - -You should then deserialize the obtained cty values into your resource and return the list. - -Example: [aws_s3_bucket_deserializer.go](https://github.com/cloudskiff/driftctl/blob/main/pkg/resource/aws/deserializer/s3_bucket_deserializer.go) - -## Adding your resource - -There are two files you are going to edit to make driftctl aware of your new resource. - -For the state reader you will need to add your `CTYDeserializer` implementation into `iac/deserializers.go`. -Just add an instance in the list: - -```go -func Deserializers() []remote.CTYDeserializer { - return []remote.CTYDeserializer{ - aws.NewS3BucketDeserializer(), - ... +func NewEC2InstanceEnumerator(repo repository.EC2Repository, factory resource.ResourceFactory) *EC2InstanceEnumerator { + return &EC2InstanceEnumerator{ + repository: repo, + factory: factory, } } -``` -Then in the cloud provider's init file (e.g. in `remote/aws/init.go`), add your new implementation for `resource.Supplier`: +func (e *EC2InstanceEnumerator) SupportedType() resource.ResourceType { + return aws.AwsInstanceResourceType +} -```go -func Init() error { - provider, err := NewTerraFormProvider() +func (e *EC2InstanceEnumerator) Enumerate() ([]resource.Resource, error) { + instances, err := e.repository.ListAllInstances() if err != nil { - return err + return nil, remoteerror.NewResourceListingError(err, string(e.SupportedType())) } - terraform.AddProvider(terraform.AWS, provider) - resource.AddSupplier(NewS3BucketSupplier(provider.Runner().SubRunner(), s3.New(provider.session))) - ... + results := make([]resource.Resource, len(instances)) + + for _, instance := range instances { + results = append( + results, + e.factory.CreateAbstractResource( + string(e.SupportedType()), + *instance.InstanceId, + map[string]interface{}{}, + ), + ) + } + + return results, err } ``` +As you can see, listing error are treated in a particular way. Instead of failing and stopping the scan they will be handled, and an alert will be created. +So please don't forget to wrap these errors inside a NewResourceListingError. +For some provider error handling is not that coherent, so you might need to check in `pkg/remote/resource_enumeration_error_handler.go` and add a new case for your error. -Don't forget to add unit tests after adding a new resource. -You can also add acceptance tests if you think it makes sense. + +Once the enumerator is written you have to add it to the remote init located in `pkg/remote//init.go` : +```go + s3Repository := repository.NewS3Repository(client.NewAWSClientFactory(provider.session), repositoryCache) + remoteLibrary.AddEnumerator(NewS3BucketEnumerator(s3Repository, factory, provider.Config)) +``` + +### DetailsFetcher + +DetailsFetcher are only used by driftctl experimental deep mode. + +This is the component that call terraform provider to retrieve the full attribute for each resource. +We do not want to reimplement what has already been done in every terraform provider, so you should not call the remote sdk to do this. + +If `common.GenericDetailsFetcher` satisfy your needs you should always prefer using it instead of implementing DetailsFetcher in a new struct. + +The DetailsFetcher should also be added to `pkg/remote//init.go` even if you use the generic version : +```go + remoteLibrary.AddDetailsFetcher(aws.AwsEbsVolumeResourceType, common.NewGenericDetailsFetcher(aws.AwsEbsVolumeResourceType, provider, deserializer)) +``` + + +***Don't forget to add unit tests after adding a new resource.*** + +You can also find example of "integration" tests in pkg/remote/_scanner_test.go + +You should also add acceptance tests if you think it makes sense, they are located next to the resource definition described at first step. diff --git a/pkg/remote/aws/cloudfront_distribution_enumerator.go b/pkg/remote/aws/cloudfront_distribution_enumerator.go index e66d5df5..60f8cc84 100644 --- a/pkg/remote/aws/cloudfront_distribution_enumerator.go +++ b/pkg/remote/aws/cloudfront_distribution_enumerator.go @@ -41,6 +41,5 @@ func (e *CloudfrontDistributionEnumerator) Enumerate() ([]*resource.Resource, er ), ) } - return results, err } diff --git a/pkg/remote/aws/init.go b/pkg/remote/aws/init.go index e38bc597..90d28350 100644 --- a/pkg/remote/aws/init.go +++ b/pkg/remote/aws/init.go @@ -36,7 +36,6 @@ func Init(version string, alerter *alerter.Alerter, if err != nil { return err } - repositoryCache := cache.New(100) s3Repository := repository.NewS3Repository(client.NewAWSClientFactory(provider.session), repositoryCache)